1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC 2*8b6cd535SAndroid Build Coastguard Worker // 3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); 4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License. 5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at 6*8b6cd535SAndroid Build Coastguard Worker // 7*8b6cd535SAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0 8*8b6cd535SAndroid Build Coastguard Worker // 9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software 10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, 11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and 13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License. 14*8b6cd535SAndroid Build Coastguard Worker 15*8b6cd535SAndroid Build Coastguard Worker #ifndef ICING_INDEX_MAIN_MAIN_INDEX_H_ 16*8b6cd535SAndroid Build Coastguard Worker #define ICING_INDEX_MAIN_MAIN_INDEX_H_ 17*8b6cd535SAndroid Build Coastguard Worker 18*8b6cd535SAndroid Build Coastguard Worker #include <cstddef> 19*8b6cd535SAndroid Build Coastguard Worker #include <cstdint> 20*8b6cd535SAndroid Build Coastguard Worker #include <memory> 21*8b6cd535SAndroid Build Coastguard Worker #include <string> 22*8b6cd535SAndroid Build Coastguard Worker #include <unordered_map> 23*8b6cd535SAndroid Build Coastguard Worker #include <utility> 24*8b6cd535SAndroid Build Coastguard Worker #include <vector> 25*8b6cd535SAndroid Build Coastguard Worker 26*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h" 27*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/statusor.h" 28*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h" 29*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/filesystem.h" 30*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/posting_list/flash-index-storage.h" 31*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/posting_list/posting-list-identifier.h" 32*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/lite/term-id-hit-pair.h" 33*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/main/posting-list-hit-accessor.h" 34*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/main/posting-list-hit-serializer.h" 35*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/term-id-codec.h" 36*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/term-metadata.h" 37*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/index/icing-dynamic-trie.h" 38*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/index/icing-filesystem.h" 39*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/debug.pb.h" 40*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/scoring.pb.h" 41*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/storage.pb.h" 42*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/term.pb.h" 43*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-id.h" 44*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/suggestion-result-checker.h" 45*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/crc32.h" 46*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/status-macros.h" 47*8b6cd535SAndroid Build Coastguard Worker 48*8b6cd535SAndroid Build Coastguard Worker namespace icing { 49*8b6cd535SAndroid Build Coastguard Worker namespace lib { 50*8b6cd535SAndroid Build Coastguard Worker 51*8b6cd535SAndroid Build Coastguard Worker class MainIndex { 52*8b6cd535SAndroid Build Coastguard Worker public: 53*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 54*8b6cd535SAndroid Build Coastguard Worker // - valid instance of MainIndex, on success. 55*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL error if unable to create the lexicon or flash storage. 56*8b6cd535SAndroid Build Coastguard Worker static libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> Create( 57*8b6cd535SAndroid Build Coastguard Worker const std::string& index_directory, const Filesystem* filesystem, 58*8b6cd535SAndroid Build Coastguard Worker const IcingFilesystem* icing_filesystem); 59*8b6cd535SAndroid Build Coastguard Worker 60*8b6cd535SAndroid Build Coastguard Worker // Reads magic from existing flash index storage file header. We need this 61*8b6cd535SAndroid Build Coastguard Worker // during Icing initialization phase to determine the version. 62*8b6cd535SAndroid Build Coastguard Worker // 63*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 64*8b6cd535SAndroid Build Coastguard Worker // - On success, a valid magic. 65*8b6cd535SAndroid Build Coastguard Worker // - NOT_FOUND if the flash index doesn't exist. 66*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on I/O error. 67*8b6cd535SAndroid Build Coastguard Worker static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic( 68*8b6cd535SAndroid Build Coastguard Worker const Filesystem* filesystem, const std::string& index_directory); 69*8b6cd535SAndroid Build Coastguard Worker 70*8b6cd535SAndroid Build Coastguard Worker // Get a PostingListHitAccessor that holds the posting list chain for 'term'. 71*8b6cd535SAndroid Build Coastguard Worker // 72*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 73*8b6cd535SAndroid Build Coastguard Worker // - On success, a valid PostingListHitAccessor 74*8b6cd535SAndroid Build Coastguard Worker // - NOT_FOUND if term is not present in the main index. 75*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>> 76*8b6cd535SAndroid Build Coastguard Worker GetAccessorForExactTerm(const std::string& term); 77*8b6cd535SAndroid Build Coastguard Worker 78*8b6cd535SAndroid Build Coastguard Worker // Get a PostingListHitAccessor for 'prefix'. 79*8b6cd535SAndroid Build Coastguard Worker // 80*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 81*8b6cd535SAndroid Build Coastguard Worker // - On success, a result containing a valid PostingListHitAccessor. 82*8b6cd535SAndroid Build Coastguard Worker // - NOT_FOUND if neither 'prefix' nor any terms for which 'prefix' is a 83*8b6cd535SAndroid Build Coastguard Worker // prefix are present in the main index. 84*8b6cd535SAndroid Build Coastguard Worker struct GetPrefixAccessorResult { 85*8b6cd535SAndroid Build Coastguard Worker // A PostingListHitAccessor that holds the posting list chain for the term 86*8b6cd535SAndroid Build Coastguard Worker // that best represents 'prefix' in the main index. 87*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<PostingListHitAccessor> accessor; 88*8b6cd535SAndroid Build Coastguard Worker // True if the returned posting list chain is for 'prefix' or false if the 89*8b6cd535SAndroid Build Coastguard Worker // returned posting list chain is for a term for which 'prefix' is a prefix. 90*8b6cd535SAndroid Build Coastguard Worker bool exact; 91*8b6cd535SAndroid Build Coastguard Worker GetPrefixAccessorResultGetPrefixAccessorResult92*8b6cd535SAndroid Build Coastguard Worker explicit GetPrefixAccessorResult( 93*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<PostingListHitAccessor> accessor_in, bool exact_in) 94*8b6cd535SAndroid Build Coastguard Worker : accessor(std::move(accessor_in)), exact(exact_in) {} 95*8b6cd535SAndroid Build Coastguard Worker }; 96*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<GetPrefixAccessorResult> 97*8b6cd535SAndroid Build Coastguard Worker GetAccessorForPrefixTerm(const std::string& prefix); 98*8b6cd535SAndroid Build Coastguard Worker 99*8b6cd535SAndroid Build Coastguard Worker // Finds terms with the given prefix in the given result checker. The 100*8b6cd535SAndroid Build Coastguard Worker // input prefix must be normalized, otherwise inaccurate results may be 101*8b6cd535SAndroid Build Coastguard Worker // returned. If scoring_match_type is EXACT, only exact hit will be counted 102*8b6cd535SAndroid Build Coastguard Worker // and it is PREFIX, both prefix and exact hits will be counted. Results are 103*8b6cd535SAndroid Build Coastguard Worker // not sorted specifically and are in lexicographical order. Number of results 104*8b6cd535SAndroid Build Coastguard Worker // are no more than 'num_to_return'. 105*8b6cd535SAndroid Build Coastguard Worker // 106*8b6cd535SAndroid Build Coastguard Worker // Returns: 107*8b6cd535SAndroid Build Coastguard Worker // A list of TermMetadata on success 108*8b6cd535SAndroid Build Coastguard Worker // INTERNAL_ERROR if failed to access term data. 109*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix( 110*8b6cd535SAndroid Build Coastguard Worker const std::string& prefix, TermMatchType::Code scoring_match_type, 111*8b6cd535SAndroid Build Coastguard Worker SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by, 112*8b6cd535SAndroid Build Coastguard Worker const SuggestionResultChecker* suggestion_result_checker); 113*8b6cd535SAndroid Build Coastguard Worker 114*8b6cd535SAndroid Build Coastguard Worker struct LexiconMergeOutputs { 115*8b6cd535SAndroid Build Coastguard Worker // Maps from main_lexicon tvi for new branching point to the main_lexicon 116*8b6cd535SAndroid Build Coastguard Worker // tvi for posting list whose hits must be backfilled. 117*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<uint32_t, uint32_t> backfill_map; 118*8b6cd535SAndroid Build Coastguard Worker 119*8b6cd535SAndroid Build Coastguard Worker // Maps from lexicon tvis to main_lexicon tvis. 120*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<uint32_t, uint32_t> other_tvi_to_main_tvi; 121*8b6cd535SAndroid Build Coastguard Worker 122*8b6cd535SAndroid Build Coastguard Worker // Maps from main lexicon tvi to the block index. Tvis with no entry do not 123*8b6cd535SAndroid Build Coastguard Worker // have an allocated posting list. 124*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<uint32_t, int> main_tvi_to_block_index; 125*8b6cd535SAndroid Build Coastguard Worker 126*8b6cd535SAndroid Build Coastguard Worker // Maps from the lexicon tvi to the beginning position in 127*8b6cd535SAndroid Build Coastguard Worker // prefix_tvis_buf and the length. 128*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<uint32_t, std::pair<int, int>> 129*8b6cd535SAndroid Build Coastguard Worker other_tvi_to_prefix_main_tvis; 130*8b6cd535SAndroid Build Coastguard Worker 131*8b6cd535SAndroid Build Coastguard Worker // Stores tvis that are mapped to by other_tvi_to_prefix_tvis. 132*8b6cd535SAndroid Build Coastguard Worker std::vector<uint32_t> prefix_tvis_buf; 133*8b6cd535SAndroid Build Coastguard Worker }; 134*8b6cd535SAndroid Build Coastguard Worker 135*8b6cd535SAndroid Build Coastguard Worker // Merge the lexicon into the main lexicon and populate the data 136*8b6cd535SAndroid Build Coastguard Worker // structures necessary to translate lite tvis to main tvis, track backfilling 137*8b6cd535SAndroid Build Coastguard Worker // and expanding lite terms to prefix terms. 138*8b6cd535SAndroid Build Coastguard Worker // 139*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 140*8b6cd535SAndroid Build Coastguard Worker // - OK on success 141*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on IO error while writing to the main lexicon. MergeLexicon(const IcingDynamicTrie & other_lexicon)142*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<LexiconMergeOutputs> MergeLexicon( 143*8b6cd535SAndroid Build Coastguard Worker const IcingDynamicTrie& other_lexicon) { 144*8b6cd535SAndroid Build Coastguard Worker // Backfill branch points need to be added first so that the backfill_map 145*8b6cd535SAndroid Build Coastguard Worker // can be correctly populated. 146*8b6cd535SAndroid Build Coastguard Worker ICING_ASSIGN_OR_RETURN(LexiconMergeOutputs outputs, 147*8b6cd535SAndroid Build Coastguard Worker AddBackfillBranchPoints(other_lexicon)); 148*8b6cd535SAndroid Build Coastguard Worker ICING_ASSIGN_OR_RETURN(outputs, 149*8b6cd535SAndroid Build Coastguard Worker AddTerms(other_lexicon, std::move(outputs))); 150*8b6cd535SAndroid Build Coastguard Worker // Non-backfill branch points need to be added last so that the mapping of 151*8b6cd535SAndroid Build Coastguard Worker // newly added terms to prefix terms can be correctly populated (prefix 152*8b6cd535SAndroid Build Coastguard Worker // terms might be branch points between two new terms or between a 153*8b6cd535SAndroid Build Coastguard Worker // pre-existing term and a new term). 154*8b6cd535SAndroid Build Coastguard Worker ICING_ASSIGN_OR_RETURN(outputs, 155*8b6cd535SAndroid Build Coastguard Worker AddBranchPoints(other_lexicon, std::move(outputs))); 156*8b6cd535SAndroid Build Coastguard Worker return outputs; 157*8b6cd535SAndroid Build Coastguard Worker } 158*8b6cd535SAndroid Build Coastguard Worker 159*8b6cd535SAndroid Build Coastguard Worker // Add hits to the main index and backfill from existing posting lists to new 160*8b6cd535SAndroid Build Coastguard Worker // backfill branch points. 161*8b6cd535SAndroid Build Coastguard Worker // 162*8b6cd535SAndroid Build Coastguard Worker // The backfill_map maps from main_lexicon tvi for a newly added branching 163*8b6cd535SAndroid Build Coastguard Worker // point to the main_lexicon tvi for the posting list whose hits must be 164*8b6cd535SAndroid Build Coastguard Worker // backfilled. backfill_map should be populated as part of LexiconMergeOutputs 165*8b6cd535SAndroid Build Coastguard Worker // in MergeLexicon and be blindly passed to this function. 166*8b6cd535SAndroid Build Coastguard Worker // 167*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 168*8b6cd535SAndroid Build Coastguard Worker // - OK on success 169*8b6cd535SAndroid Build Coastguard Worker // - INVALID_ARGUMENT if one of the elements in the lite index has a term_id 170*8b6cd535SAndroid Build Coastguard Worker // exceeds the max TermId, is not valid or is not less than pre-existing hits 171*8b6cd535SAndroid Build Coastguard Worker // in the main index. 172*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL_ERROR if unable to mmap necessary IndexBlocks 173*8b6cd535SAndroid Build Coastguard Worker // - RESOURCE_EXHAUSTED error if unable to grow the index 174*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status AddHits( 175*8b6cd535SAndroid Build Coastguard Worker const TermIdCodec& term_id_codec, 176*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<uint32_t, uint32_t>&& backfill_map, 177*8b6cd535SAndroid Build Coastguard Worker std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id); 178*8b6cd535SAndroid Build Coastguard Worker PersistToDisk()179*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status PersistToDisk() { 180*8b6cd535SAndroid Build Coastguard Worker if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) { 181*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK; 182*8b6cd535SAndroid Build Coastguard Worker } 183*8b6cd535SAndroid Build Coastguard Worker return absl_ports::InternalError("Unable to sync main index components."); 184*8b6cd535SAndroid Build Coastguard Worker } 185*8b6cd535SAndroid Build Coastguard Worker 186*8b6cd535SAndroid Build Coastguard Worker // Updates and returns the checksums of the components in the MainIndex. UpdateChecksum()187*8b6cd535SAndroid Build Coastguard Worker Crc32 UpdateChecksum() { return main_lexicon_->UpdateCrc(); } 188*8b6cd535SAndroid Build Coastguard Worker 189*8b6cd535SAndroid Build Coastguard Worker // Calculates and returns the checksums of the components in the MainIndex. GetChecksum()190*8b6cd535SAndroid Build Coastguard Worker Crc32 GetChecksum() const { return main_lexicon_->GetCrc(); } 191*8b6cd535SAndroid Build Coastguard Worker last_added_document_id()192*8b6cd535SAndroid Build Coastguard Worker DocumentId last_added_document_id() const { 193*8b6cd535SAndroid Build Coastguard Worker return flash_index_storage_->get_last_indexed_docid(); 194*8b6cd535SAndroid Build Coastguard Worker } 195*8b6cd535SAndroid Build Coastguard Worker Reset()196*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status Reset() { 197*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_IF_ERROR(flash_index_storage_->Reset()); 198*8b6cd535SAndroid Build Coastguard Worker main_lexicon_->Clear(); 199*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK; 200*8b6cd535SAndroid Build Coastguard Worker } 201*8b6cd535SAndroid Build Coastguard Worker Warm()202*8b6cd535SAndroid Build Coastguard Worker void Warm() { main_lexicon_->Warm(); } 203*8b6cd535SAndroid Build Coastguard Worker 204*8b6cd535SAndroid Build Coastguard Worker // Returns: 205*8b6cd535SAndroid Build Coastguard Worker // - elements size of lexicon and index, on success 206*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on IO error 207*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<int64_t> GetElementsSize() const; 208*8b6cd535SAndroid Build Coastguard Worker 209*8b6cd535SAndroid Build Coastguard Worker // Takes the provided storage_info, populates the fields related to the main 210*8b6cd535SAndroid Build Coastguard Worker // index and returns that storage_info. 211*8b6cd535SAndroid Build Coastguard Worker // 212*8b6cd535SAndroid Build Coastguard Worker // If an IO error occurs while trying to calculate the value for a field, then 213*8b6cd535SAndroid Build Coastguard Worker // that field will be set to -1. 214*8b6cd535SAndroid Build Coastguard Worker IndexStorageInfoProto GetStorageInfo( 215*8b6cd535SAndroid Build Coastguard Worker IndexStorageInfoProto storage_info) const; 216*8b6cd535SAndroid Build Coastguard Worker 217*8b6cd535SAndroid Build Coastguard Worker // Returns debug information for the main index in out. 218*8b6cd535SAndroid Build Coastguard Worker // verbosity = BASIC, simplest debug information - just the lexicon 219*8b6cd535SAndroid Build Coastguard Worker // verbosity = DETAILED, more detailed debug information including raw 220*8b6cd535SAndroid Build Coastguard Worker // postings lists. 221*8b6cd535SAndroid Build Coastguard Worker std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) const; 222*8b6cd535SAndroid Build Coastguard Worker 223*8b6cd535SAndroid Build Coastguard Worker // Reduces internal file sizes by reclaiming space of deleted documents. 224*8b6cd535SAndroid Build Coastguard Worker // 225*8b6cd535SAndroid Build Coastguard Worker // This method will update the last_added_docid of the index to the largest 226*8b6cd535SAndroid Build Coastguard Worker // document id that still appears in the index after compaction. 227*8b6cd535SAndroid Build Coastguard Worker // 228*8b6cd535SAndroid Build Coastguard Worker // Returns: 229*8b6cd535SAndroid Build Coastguard Worker // OK on success 230*8b6cd535SAndroid Build Coastguard Worker // INTERNAL_ERROR on IO error, this indicates that the index may be in an 231*8b6cd535SAndroid Build Coastguard Worker // invalid state and should be cleared. 232*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status Optimize( 233*8b6cd535SAndroid Build Coastguard Worker const std::vector<DocumentId>& document_id_old_to_new); 234*8b6cd535SAndroid Build Coastguard Worker 235*8b6cd535SAndroid Build Coastguard Worker private: 236*8b6cd535SAndroid Build Coastguard Worker explicit MainIndex(const std::string& index_directory, 237*8b6cd535SAndroid Build Coastguard Worker const Filesystem* filesystem, 238*8b6cd535SAndroid Build Coastguard Worker const IcingFilesystem* icing_filesystem); 239*8b6cd535SAndroid Build Coastguard Worker 240*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status Init(); 241*8b6cd535SAndroid Build Coastguard Worker 242*8b6cd535SAndroid Build Coastguard Worker // Helpers for merging the lexicon 243*8b6cd535SAndroid Build Coastguard Worker // Add all 'backfill' branch points. Backfill branch points are prefix 244*8b6cd535SAndroid Build Coastguard Worker // branch points that are a prefix of terms that existed in the lexicon 245*8b6cd535SAndroid Build Coastguard Worker // to the merge. 246*8b6cd535SAndroid Build Coastguard Worker // 247*8b6cd535SAndroid Build Coastguard Worker // For example, if the main lexicon only contains "foot" and is then merged 248*8b6cd535SAndroid Build Coastguard Worker // with a lite lexicon containing only "fool", then a backfill branch point 249*8b6cd535SAndroid Build Coastguard Worker // for "foo" will be added to contain prefix hits from both the pre-existing 250*8b6cd535SAndroid Build Coastguard Worker // posting list for "foot" and the new posting list for "fool". 251*8b6cd535SAndroid Build Coastguard Worker // 252*8b6cd535SAndroid Build Coastguard Worker // Populates LexiconMergeOutputs.backfill_map 253*8b6cd535SAndroid Build Coastguard Worker // 254*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 255*8b6cd535SAndroid Build Coastguard Worker // - OK on success 256*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on IO error while writing to the main lexicon. 257*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBackfillBranchPoints( 258*8b6cd535SAndroid Build Coastguard Worker const IcingDynamicTrie& other_lexicon); 259*8b6cd535SAndroid Build Coastguard Worker 260*8b6cd535SAndroid Build Coastguard Worker // Add all terms from the lexicon. 261*8b6cd535SAndroid Build Coastguard Worker // 262*8b6cd535SAndroid Build Coastguard Worker // Populates LexiconMergeOutputs.other_tvi_to_main_tvi 263*8b6cd535SAndroid Build Coastguard Worker // 264*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 265*8b6cd535SAndroid Build Coastguard Worker // - OK on success 266*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on IO error while writing to the main lexicon. 267*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<LexiconMergeOutputs> AddTerms( 268*8b6cd535SAndroid Build Coastguard Worker const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs); 269*8b6cd535SAndroid Build Coastguard Worker 270*8b6cd535SAndroid Build Coastguard Worker // Add all branch points for terms added from the lexicon. 271*8b6cd535SAndroid Build Coastguard Worker // For example, if the main lexicon is empty and is then merged with a 272*8b6cd535SAndroid Build Coastguard Worker // lexicon containing only "foot" and "fool", then a branch point for "foo" 273*8b6cd535SAndroid Build Coastguard Worker // will be added to contain prefix hits from both "foot" and "fool". 274*8b6cd535SAndroid Build Coastguard Worker // 275*8b6cd535SAndroid Build Coastguard Worker // Populates LexiconMergeOutputs.other_tvi_to_prefix_main_tvis and 276*8b6cd535SAndroid Build Coastguard Worker // LexiconMergeOutputs.prefix_tvis_buf; 277*8b6cd535SAndroid Build Coastguard Worker // 278*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 279*8b6cd535SAndroid Build Coastguard Worker // - OK on success 280*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL on IO error while writing to the main lexicon. 281*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBranchPoints( 282*8b6cd535SAndroid Build Coastguard Worker const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs); 283*8b6cd535SAndroid Build Coastguard Worker 284*8b6cd535SAndroid Build Coastguard Worker // Copies all properties from old_tvi in the other lexicon to the new_tvi in 285*8b6cd535SAndroid Build Coastguard Worker // the main lexicon. 286*8b6cd535SAndroid Build Coastguard Worker // Returns true on success, false if an IO error is encountered. 287*8b6cd535SAndroid Build Coastguard Worker bool CopyProperties(const IcingDynamicTrie::PropertyReadersAll& prop_reader, 288*8b6cd535SAndroid Build Coastguard Worker const IcingDynamicTrie& other_lexicon, uint32_t other_tvi, 289*8b6cd535SAndroid Build Coastguard Worker uint32_t new_main_tvi); 290*8b6cd535SAndroid Build Coastguard Worker 291*8b6cd535SAndroid Build Coastguard Worker // Add all hits between [hit_elements, hit_elements + len) to main_index, 292*8b6cd535SAndroid Build Coastguard Worker // updating the entry in the main lexicon at trie_value_index to point to the 293*8b6cd535SAndroid Build Coastguard Worker // resulting posting list. Hits are sorted in descending document id order, so 294*8b6cd535SAndroid Build Coastguard Worker // they should be to posting lists in reverse (starting at hit_elements 295*8b6cd535SAndroid Build Coastguard Worker // + len - 1) and working backwards. Therefore, hit_elements must be in sorted 296*8b6cd535SAndroid Build Coastguard Worker // order. 297*8b6cd535SAndroid Build Coastguard Worker // 298*8b6cd535SAndroid Build Coastguard Worker // trie_value_index may point to a valid posting list id if there is a 299*8b6cd535SAndroid Build Coastguard Worker // pre-existing posting list to append to. 300*8b6cd535SAndroid Build Coastguard Worker // 301*8b6cd535SAndroid Build Coastguard Worker // If backfill_posting_list_id is valid, then the hits from the posting list 302*8b6cd535SAndroid Build Coastguard Worker // identified by backfill_posting_list_id should be added to the new posting 303*8b6cd535SAndroid Build Coastguard Worker // list before the hits in hit_elements. 304*8b6cd535SAndroid Build Coastguard Worker // 305*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 306*8b6cd535SAndroid Build Coastguard Worker // - OK on success 307*8b6cd535SAndroid Build Coastguard Worker // - INVALID_ARGUMENT if posting_list_id stored at trie_value_index is valid 308*8b6cd535SAndroid Build Coastguard Worker // but points out of bounds in the IndexBlock referred to by 309*8b6cd535SAndroid Build Coastguard Worker // id.block_index(), if one of the hits from [hit_elements,hit_elements+len) 310*8b6cd535SAndroid Build Coastguard Worker // is not valid, or if one of the hits from [hit_elements,hit_elements+len) 311*8b6cd535SAndroid Build Coastguard Worker // is not less than the previously added hits. 312*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL_ERROR if posting_list_id stored at trie_value_index is valid 313*8b6cd535SAndroid Build Coastguard Worker // but points to an invalid block index or if unable to mmap the IndexBlock. 314*8b6cd535SAndroid Build Coastguard Worker // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new 315*8b6cd535SAndroid Build Coastguard Worker // posting list. 316*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status AddHitsForTerm( 317*8b6cd535SAndroid Build Coastguard Worker uint32_t tvi, PostingListIdentifier backfill_posting_list_id, 318*8b6cd535SAndroid Build Coastguard Worker const TermIdHitPair* hit_elements, size_t len); 319*8b6cd535SAndroid Build Coastguard Worker 320*8b6cd535SAndroid Build Coastguard Worker // Adds all prefix hits or hits from prefix sections present on the posting 321*8b6cd535SAndroid Build Coastguard Worker // list identified by backfill_posting_list_id to hit_accum. 322*8b6cd535SAndroid Build Coastguard Worker // 323*8b6cd535SAndroid Build Coastguard Worker // RETURNS: 324*8b6cd535SAndroid Build Coastguard Worker // - OK, on success 325*8b6cd535SAndroid Build Coastguard Worker // - INVALID_ARGUMENT if backfill_posting_list_id points out of bounds in the 326*8b6cd535SAndroid Build Coastguard Worker // IndexBlock referred to by id.block_index() 327*8b6cd535SAndroid Build Coastguard Worker // - INTERNAL_ERROR if unable to mmap the block identified by 328*8b6cd535SAndroid Build Coastguard Worker // backfill_posting_list_id or if the posting list identified by 329*8b6cd535SAndroid Build Coastguard Worker // backfill_posting_list_id has been corrupted. 330*8b6cd535SAndroid Build Coastguard Worker // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new 331*8b6cd535SAndroid Build Coastguard Worker // posting list. 332*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status AddPrefixBackfillHits( 333*8b6cd535SAndroid Build Coastguard Worker PostingListIdentifier backfill_posting_list_id, 334*8b6cd535SAndroid Build Coastguard Worker PostingListHitAccessor* hit_accum); 335*8b6cd535SAndroid Build Coastguard Worker 336*8b6cd535SAndroid Build Coastguard Worker // Transfer hits from old_pl_accessor to new_index for term. 337*8b6cd535SAndroid Build Coastguard Worker // 338*8b6cd535SAndroid Build Coastguard Worker // Returns: 339*8b6cd535SAndroid Build Coastguard Worker // largest document id added to the translated posting list, on success 340*8b6cd535SAndroid Build Coastguard Worker // INTERNAL_ERROR on IO error 341*8b6cd535SAndroid Build Coastguard Worker static libtextclassifier3::StatusOr<DocumentId> TransferAndAddHits( 342*8b6cd535SAndroid Build Coastguard Worker const std::vector<DocumentId>& document_id_old_to_new, 343*8b6cd535SAndroid Build Coastguard Worker std::string_view term, PostingListHitAccessor& old_pl_accessor, 344*8b6cd535SAndroid Build Coastguard Worker MainIndex* new_index); 345*8b6cd535SAndroid Build Coastguard Worker 346*8b6cd535SAndroid Build Coastguard Worker // Transfer hits from the current main index to new_index. 347*8b6cd535SAndroid Build Coastguard Worker // 348*8b6cd535SAndroid Build Coastguard Worker // Returns: 349*8b6cd535SAndroid Build Coastguard Worker // OK on success 350*8b6cd535SAndroid Build Coastguard Worker // INTERNAL_ERROR on IO error 351*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status TransferIndex( 352*8b6cd535SAndroid Build Coastguard Worker const std::vector<DocumentId>& document_id_old_to_new, 353*8b6cd535SAndroid Build Coastguard Worker MainIndex* new_index); 354*8b6cd535SAndroid Build Coastguard Worker 355*8b6cd535SAndroid Build Coastguard Worker std::string base_dir_; 356*8b6cd535SAndroid Build Coastguard Worker const Filesystem* filesystem_; 357*8b6cd535SAndroid Build Coastguard Worker const IcingFilesystem* icing_filesystem_; 358*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<PostingListHitSerializer> posting_list_hit_serializer_; 359*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<FlashIndexStorage> flash_index_storage_; 360*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<IcingDynamicTrie> main_lexicon_; 361*8b6cd535SAndroid Build Coastguard Worker }; 362*8b6cd535SAndroid Build Coastguard Worker 363*8b6cd535SAndroid Build Coastguard Worker } // namespace lib 364*8b6cd535SAndroid Build Coastguard Worker } // namespace icing 365*8b6cd535SAndroid Build Coastguard Worker 366*8b6cd535SAndroid Build Coastguard Worker #endif // ICING_INDEX_MAIN_MAIN_INDEX_H_ 367