Search options

Search operator:
Find:
At least one word (OR)
All words (AND)
Exact expression (Phrase)
Semantic search & fuzzy search
Also find:

Import status: Running file import (still 2 documents to extract)

Because of yet running and open tasks like text extraction and analysis maybe not all results were found yet, since at the moment of this search 2 file(s) could be only searched, overviewed and filtered by their file names only, not yet by their content and/or content based facets/filters!

You can prioritize the import(s) of (a) not yet processed file(s) by click on "Prioritize import" in the list view.



2022-02-24T04:00:00Z
geosrc_creator.h
:

#pragma once

#include "address_constructor.h"
#include "config.h"
#include "toponym_relations_provider.h"

#include <maps/search/libs/geosrc/include/geosrc_paths.h>

#include <mapreduce/yt/interface/client.h>

#include <util/generic/size_literals.h>
#include <util/memory/blob.h>

namespace maps::search::geoindexer {

class GeosrcCreator {
public:
using RecognitionPovsMap = std::unordered_map<TString, std::unordered_map<TString, TString>>;

GeosrcCreator(const Config& config,
const fbs::wrappers::RecognitionMap& recognitionMap,
const ToponymRelationsProvider& toponymRelationsProvider);

void createGeosrc();

private:

/*
If Russia (country) from Russia's point of view (001) contains Crimea
and from Ukraine's point of view (UA) doesn't contain Crimea, we build 2 variants of Russia.
It means, that countryToRecognitionSubjects contains ```Russia -> [001, UA]```
*/
THashMap<Region, SubjectSet> buildCountryToRecognitionSubjects() const;

std::pair<std::unordered_set<TString>, RecognitionPovsMap> buildAllPovsAndRecognitionPovsMap(
const THashMap<Region, SubjectSet>& countryToRecognitionSubjects) const;

void createPovsRecognitionTable(const NYT::ITransactionPtr& tx,
const TString& recognizingPovPath,
const RecognitionPovsMap& recognitionPovsMap) const;

void generateTables(const THashMap<Region, SubjectSet>& countryToRecognitionSubjects) const;


const std::string& ytServer_;
const std::string& geostoragePath_;

IndexStorageConfig indexStorage_;
geosrc::GeosrcPaths geosrcPaths_;

const fbs::wrappers::RecognitionMap& recognitionMap_;
const ToponymRelationsProvider& toponymRelationsProvider_;

static constexpr uint64_t JOB_MEMORY_LIMIT = 4_GB;
};

} // namespace maps::search::geoindexer

Hashtags

  • #pragma
  • #include
  • #include
  • #include
  • #include
  • #include
  • #include
  • #include

Filename extension

h
Content-Encoding:
ISO-8859-1

Content-Length:
1892

file_modified_dt:
2022-02-24T04:00:00Z

id:
maps/search/geocoder/indexer/lib/build-geosrc-yt/geosrc_creator.h

path0:
maps

path1:
search

path2:
geocoder

path3:
indexer

path4:
lib

path5:
build-geosrc-yt

path_basename:
geosrc_creator.h

resourceName:
b'geosrc_creator.h'

etl_file_b:
1

etl_enhance_mapping_id_time_millis_i:
0

etl_enhance_mapping_id_b:
1

etl_filter_blacklist_time_millis_i:
0

etl_filter_blacklist_b:
1

etl_filter_file_not_modified_time_millis_i:
3

etl_filter_file_not_modified_b:
1

etl_enhance_extract_text_tika_server_ocr_enabled_b:
1

etl_count_images_yet_no_ocr_i:
0

etl_enhance_ocr_descew_b:
1

etl_enhance_pdf_ocr_b:
1

etl_enhance_extract_text_tika_server_time_millis_i:
85

etl_enhance_extract_text_tika_server_b:
1

etl_enhance_detect_language_tika_server_time_millis_i:
2

etl_enhance_detect_language_tika_server_b:
1

etl_enhance_contenttype_group_time_millis_i:
0

etl_enhance_contenttype_group_b:
1

etl_enhance_pst_time_millis_i:
0

etl_enhance_pst_b:
1

etl_enhance_csv_time_millis_i:
0

etl_enhance_csv_b:
1

etl_enhance_file_mtime_time_millis_i:
0

etl_enhance_file_mtime_b:
1

etl_enhance_path_time_millis_i:
0

etl_enhance_path_b:
1

etl_enhance_extract_hashtags_time_millis_i:
0

etl_enhance_extract_hashtags_b:
1

etl_enhance_warc_time_millis_i:
2

etl_enhance_warc_b:
1

etl_enhance_zip_time_millis_i:
0

etl_enhance_zip_b:
1

etl_clean_title_time_millis_i:
0

etl_clean_title_b:
1

etl_enhance_annotations_time_millis_i:
3

etl_enhance_annotations_b:
1

etl_enhance_multilingual_time_millis_i:
0

etl_enhance_multilingual_b:
1

etl_enhance_regex_time_millis_i:
1

etl_enhance_regex_b:
1

etl_time_millis_i:
101

X-TIKA_Parsed-By:
  • org.apache.tika.parser.CompositeParser
  • org.apache.tika.parser.DefaultParser
  • org.apache.tika.parser.csv.TextAndCSVParser


X-TIKA_Parsed-By-Full-Set:
  • org.apache.tika.parser.CompositeParser
  • org.apache.tika.parser.DefaultParser
  • org.apache.tika.parser.csv.TextAndCSVParser


X-TIKA_content_handler:
ToTextContentHandler

X-TIKA_parse_time_millis:
1

X-TIKA_embedded_depth:
0




Searching ...