xref: /aosp_15_r20/external/icing/icing/index/main/main-index.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker //      http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker 
15*8b6cd535SAndroid Build Coastguard Worker #ifndef ICING_INDEX_MAIN_MAIN_INDEX_H_
16*8b6cd535SAndroid Build Coastguard Worker #define ICING_INDEX_MAIN_MAIN_INDEX_H_
17*8b6cd535SAndroid Build Coastguard Worker 
18*8b6cd535SAndroid Build Coastguard Worker #include <cstddef>
19*8b6cd535SAndroid Build Coastguard Worker #include <cstdint>
20*8b6cd535SAndroid Build Coastguard Worker #include <memory>
21*8b6cd535SAndroid Build Coastguard Worker #include <string>
22*8b6cd535SAndroid Build Coastguard Worker #include <unordered_map>
23*8b6cd535SAndroid Build Coastguard Worker #include <utility>
24*8b6cd535SAndroid Build Coastguard Worker #include <vector>
25*8b6cd535SAndroid Build Coastguard Worker 
26*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h"
27*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/statusor.h"
28*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h"
29*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/filesystem.h"
30*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/posting_list/flash-index-storage.h"
31*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/posting_list/posting-list-identifier.h"
32*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/lite/term-id-hit-pair.h"
33*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/main/posting-list-hit-accessor.h"
34*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/main/posting-list-hit-serializer.h"
35*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/term-id-codec.h"
36*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/term-metadata.h"
37*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/index/icing-dynamic-trie.h"
38*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/index/icing-filesystem.h"
39*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/debug.pb.h"
40*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/scoring.pb.h"
41*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/storage.pb.h"
42*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/term.pb.h"
43*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-id.h"
44*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/suggestion-result-checker.h"
45*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/crc32.h"
46*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/status-macros.h"
47*8b6cd535SAndroid Build Coastguard Worker 
48*8b6cd535SAndroid Build Coastguard Worker namespace icing {
49*8b6cd535SAndroid Build Coastguard Worker namespace lib {
50*8b6cd535SAndroid Build Coastguard Worker 
51*8b6cd535SAndroid Build Coastguard Worker class MainIndex {
52*8b6cd535SAndroid Build Coastguard Worker  public:
53*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
54*8b6cd535SAndroid Build Coastguard Worker   //  - valid instance of MainIndex, on success.
55*8b6cd535SAndroid Build Coastguard Worker   //  - INTERNAL error if unable to create the lexicon or flash storage.
56*8b6cd535SAndroid Build Coastguard Worker   static libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> Create(
57*8b6cd535SAndroid Build Coastguard Worker       const std::string& index_directory, const Filesystem* filesystem,
58*8b6cd535SAndroid Build Coastguard Worker       const IcingFilesystem* icing_filesystem);
59*8b6cd535SAndroid Build Coastguard Worker 
60*8b6cd535SAndroid Build Coastguard Worker   // Reads magic from existing flash index storage file header. We need this
61*8b6cd535SAndroid Build Coastguard Worker   // during Icing initialization phase to determine the version.
62*8b6cd535SAndroid Build Coastguard Worker   //
63*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
64*8b6cd535SAndroid Build Coastguard Worker   //   - On success, a valid magic.
65*8b6cd535SAndroid Build Coastguard Worker   //   - NOT_FOUND if the flash index doesn't exist.
66*8b6cd535SAndroid Build Coastguard Worker   //   - INTERNAL on I/O error.
67*8b6cd535SAndroid Build Coastguard Worker   static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
68*8b6cd535SAndroid Build Coastguard Worker       const Filesystem* filesystem, const std::string& index_directory);
69*8b6cd535SAndroid Build Coastguard Worker 
70*8b6cd535SAndroid Build Coastguard Worker   // Get a PostingListHitAccessor that holds the posting list chain for 'term'.
71*8b6cd535SAndroid Build Coastguard Worker   //
72*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
73*8b6cd535SAndroid Build Coastguard Worker   //  - On success, a valid PostingListHitAccessor
74*8b6cd535SAndroid Build Coastguard Worker   //  - NOT_FOUND if term is not present in the main index.
75*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<std::unique_ptr<PostingListHitAccessor>>
76*8b6cd535SAndroid Build Coastguard Worker   GetAccessorForExactTerm(const std::string& term);
77*8b6cd535SAndroid Build Coastguard Worker 
78*8b6cd535SAndroid Build Coastguard Worker   // Get a PostingListHitAccessor for 'prefix'.
79*8b6cd535SAndroid Build Coastguard Worker   //
80*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
81*8b6cd535SAndroid Build Coastguard Worker   //  - On success, a result containing a valid PostingListHitAccessor.
82*8b6cd535SAndroid Build Coastguard Worker   //  - NOT_FOUND if neither 'prefix' nor any terms for which 'prefix' is a
83*8b6cd535SAndroid Build Coastguard Worker   //    prefix are present in the main index.
84*8b6cd535SAndroid Build Coastguard Worker   struct GetPrefixAccessorResult {
85*8b6cd535SAndroid Build Coastguard Worker     // A PostingListHitAccessor that holds the posting list chain for the term
86*8b6cd535SAndroid Build Coastguard Worker     // that best represents 'prefix' in the main index.
87*8b6cd535SAndroid Build Coastguard Worker     std::unique_ptr<PostingListHitAccessor> accessor;
88*8b6cd535SAndroid Build Coastguard Worker     // True if the returned posting list chain is for 'prefix' or false if the
89*8b6cd535SAndroid Build Coastguard Worker     // returned posting list chain is for a term for which 'prefix' is a prefix.
90*8b6cd535SAndroid Build Coastguard Worker     bool exact;
91*8b6cd535SAndroid Build Coastguard Worker 
GetPrefixAccessorResultGetPrefixAccessorResult92*8b6cd535SAndroid Build Coastguard Worker     explicit GetPrefixAccessorResult(
93*8b6cd535SAndroid Build Coastguard Worker         std::unique_ptr<PostingListHitAccessor> accessor_in, bool exact_in)
94*8b6cd535SAndroid Build Coastguard Worker         : accessor(std::move(accessor_in)), exact(exact_in) {}
95*8b6cd535SAndroid Build Coastguard Worker   };
96*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<GetPrefixAccessorResult>
97*8b6cd535SAndroid Build Coastguard Worker   GetAccessorForPrefixTerm(const std::string& prefix);
98*8b6cd535SAndroid Build Coastguard Worker 
99*8b6cd535SAndroid Build Coastguard Worker   // Finds terms with the given prefix in the given result checker. The
100*8b6cd535SAndroid Build Coastguard Worker   // input prefix must be normalized, otherwise inaccurate results may be
101*8b6cd535SAndroid Build Coastguard Worker   // returned. If scoring_match_type is EXACT, only exact hit will be counted
102*8b6cd535SAndroid Build Coastguard Worker   // and it is PREFIX, both prefix and exact hits will be counted. Results are
103*8b6cd535SAndroid Build Coastguard Worker   // not sorted specifically and are in lexicographical order. Number of results
104*8b6cd535SAndroid Build Coastguard Worker   // are no more than 'num_to_return'.
105*8b6cd535SAndroid Build Coastguard Worker   //
106*8b6cd535SAndroid Build Coastguard Worker   // Returns:
107*8b6cd535SAndroid Build Coastguard Worker   //   A list of TermMetadata on success
108*8b6cd535SAndroid Build Coastguard Worker   //   INTERNAL_ERROR if failed to access term data.
109*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
110*8b6cd535SAndroid Build Coastguard Worker       const std::string& prefix, TermMatchType::Code scoring_match_type,
111*8b6cd535SAndroid Build Coastguard Worker       SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
112*8b6cd535SAndroid Build Coastguard Worker       const SuggestionResultChecker* suggestion_result_checker);
113*8b6cd535SAndroid Build Coastguard Worker 
114*8b6cd535SAndroid Build Coastguard Worker   struct LexiconMergeOutputs {
115*8b6cd535SAndroid Build Coastguard Worker     // Maps from main_lexicon tvi for new branching point to the main_lexicon
116*8b6cd535SAndroid Build Coastguard Worker     // tvi for posting list whose hits must be backfilled.
117*8b6cd535SAndroid Build Coastguard Worker     std::unordered_map<uint32_t, uint32_t> backfill_map;
118*8b6cd535SAndroid Build Coastguard Worker 
119*8b6cd535SAndroid Build Coastguard Worker     // Maps from lexicon tvis to main_lexicon tvis.
120*8b6cd535SAndroid Build Coastguard Worker     std::unordered_map<uint32_t, uint32_t> other_tvi_to_main_tvi;
121*8b6cd535SAndroid Build Coastguard Worker 
122*8b6cd535SAndroid Build Coastguard Worker     // Maps from main lexicon tvi to the block index. Tvis with no entry do not
123*8b6cd535SAndroid Build Coastguard Worker     // have an allocated posting list.
124*8b6cd535SAndroid Build Coastguard Worker     std::unordered_map<uint32_t, int> main_tvi_to_block_index;
125*8b6cd535SAndroid Build Coastguard Worker 
126*8b6cd535SAndroid Build Coastguard Worker     // Maps from the lexicon tvi to the beginning position in
127*8b6cd535SAndroid Build Coastguard Worker     // prefix_tvis_buf and the length.
128*8b6cd535SAndroid Build Coastguard Worker     std::unordered_map<uint32_t, std::pair<int, int>>
129*8b6cd535SAndroid Build Coastguard Worker         other_tvi_to_prefix_main_tvis;
130*8b6cd535SAndroid Build Coastguard Worker 
131*8b6cd535SAndroid Build Coastguard Worker     // Stores tvis that are mapped to by other_tvi_to_prefix_tvis.
132*8b6cd535SAndroid Build Coastguard Worker     std::vector<uint32_t> prefix_tvis_buf;
133*8b6cd535SAndroid Build Coastguard Worker   };
134*8b6cd535SAndroid Build Coastguard Worker 
135*8b6cd535SAndroid Build Coastguard Worker   // Merge the lexicon into the main lexicon and populate the data
136*8b6cd535SAndroid Build Coastguard Worker   // structures necessary to translate lite tvis to main tvis, track backfilling
137*8b6cd535SAndroid Build Coastguard Worker   // and expanding lite terms to prefix terms.
138*8b6cd535SAndroid Build Coastguard Worker   //
139*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
140*8b6cd535SAndroid Build Coastguard Worker   //   - OK on success
141*8b6cd535SAndroid Build Coastguard Worker   //   - INTERNAL on IO error while writing to the main lexicon.
MergeLexicon(const IcingDynamicTrie & other_lexicon)142*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<LexiconMergeOutputs> MergeLexicon(
143*8b6cd535SAndroid Build Coastguard Worker       const IcingDynamicTrie& other_lexicon) {
144*8b6cd535SAndroid Build Coastguard Worker     // Backfill branch points need to be added first so that the backfill_map
145*8b6cd535SAndroid Build Coastguard Worker     // can be correctly populated.
146*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(LexiconMergeOutputs outputs,
147*8b6cd535SAndroid Build Coastguard Worker                            AddBackfillBranchPoints(other_lexicon));
148*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(outputs,
149*8b6cd535SAndroid Build Coastguard Worker                            AddTerms(other_lexicon, std::move(outputs)));
150*8b6cd535SAndroid Build Coastguard Worker     // Non-backfill branch points need to be added last so that the mapping of
151*8b6cd535SAndroid Build Coastguard Worker     // newly added terms to prefix terms can be correctly populated (prefix
152*8b6cd535SAndroid Build Coastguard Worker     // terms might be branch points between two new terms or between a
153*8b6cd535SAndroid Build Coastguard Worker     // pre-existing term and a new term).
154*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(outputs,
155*8b6cd535SAndroid Build Coastguard Worker                            AddBranchPoints(other_lexicon, std::move(outputs)));
156*8b6cd535SAndroid Build Coastguard Worker     return outputs;
157*8b6cd535SAndroid Build Coastguard Worker   }
158*8b6cd535SAndroid Build Coastguard Worker 
159*8b6cd535SAndroid Build Coastguard Worker   // Add hits to the main index and backfill from existing posting lists to new
160*8b6cd535SAndroid Build Coastguard Worker   // backfill branch points.
161*8b6cd535SAndroid Build Coastguard Worker   //
162*8b6cd535SAndroid Build Coastguard Worker   // The backfill_map maps from main_lexicon tvi for a newly added branching
163*8b6cd535SAndroid Build Coastguard Worker   // point to the main_lexicon tvi for the posting list whose hits must be
164*8b6cd535SAndroid Build Coastguard Worker   // backfilled. backfill_map should be populated as part of LexiconMergeOutputs
165*8b6cd535SAndroid Build Coastguard Worker   // in MergeLexicon and be blindly passed to this function.
166*8b6cd535SAndroid Build Coastguard Worker   //
167*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
168*8b6cd535SAndroid Build Coastguard Worker   //  - OK on success
169*8b6cd535SAndroid Build Coastguard Worker   //  - INVALID_ARGUMENT if one of the elements in the lite index has a term_id
170*8b6cd535SAndroid Build Coastguard Worker   //  exceeds the max TermId, is not valid or is not less than pre-existing hits
171*8b6cd535SAndroid Build Coastguard Worker   //  in the main index.
172*8b6cd535SAndroid Build Coastguard Worker   //  - INTERNAL_ERROR if unable to mmap necessary IndexBlocks
173*8b6cd535SAndroid Build Coastguard Worker   //  - RESOURCE_EXHAUSTED error if unable to grow the index
174*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status AddHits(
175*8b6cd535SAndroid Build Coastguard Worker       const TermIdCodec& term_id_codec,
176*8b6cd535SAndroid Build Coastguard Worker       std::unordered_map<uint32_t, uint32_t>&& backfill_map,
177*8b6cd535SAndroid Build Coastguard Worker       std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id);
178*8b6cd535SAndroid Build Coastguard Worker 
PersistToDisk()179*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status PersistToDisk() {
180*8b6cd535SAndroid Build Coastguard Worker     if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) {
181*8b6cd535SAndroid Build Coastguard Worker       return libtextclassifier3::Status::OK;
182*8b6cd535SAndroid Build Coastguard Worker     }
183*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("Unable to sync main index components.");
184*8b6cd535SAndroid Build Coastguard Worker   }
185*8b6cd535SAndroid Build Coastguard Worker 
186*8b6cd535SAndroid Build Coastguard Worker   // Updates and returns the checksums of the components in the MainIndex.
UpdateChecksum()187*8b6cd535SAndroid Build Coastguard Worker   Crc32 UpdateChecksum() { return main_lexicon_->UpdateCrc(); }
188*8b6cd535SAndroid Build Coastguard Worker 
189*8b6cd535SAndroid Build Coastguard Worker   // Calculates and returns the checksums of the components in the MainIndex.
GetChecksum()190*8b6cd535SAndroid Build Coastguard Worker   Crc32 GetChecksum() const { return main_lexicon_->GetCrc(); }
191*8b6cd535SAndroid Build Coastguard Worker 
last_added_document_id()192*8b6cd535SAndroid Build Coastguard Worker   DocumentId last_added_document_id() const {
193*8b6cd535SAndroid Build Coastguard Worker     return flash_index_storage_->get_last_indexed_docid();
194*8b6cd535SAndroid Build Coastguard Worker   }
195*8b6cd535SAndroid Build Coastguard Worker 
Reset()196*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status Reset() {
197*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(flash_index_storage_->Reset());
198*8b6cd535SAndroid Build Coastguard Worker     main_lexicon_->Clear();
199*8b6cd535SAndroid Build Coastguard Worker     return libtextclassifier3::Status::OK;
200*8b6cd535SAndroid Build Coastguard Worker   }
201*8b6cd535SAndroid Build Coastguard Worker 
Warm()202*8b6cd535SAndroid Build Coastguard Worker   void Warm() { main_lexicon_->Warm(); }
203*8b6cd535SAndroid Build Coastguard Worker 
204*8b6cd535SAndroid Build Coastguard Worker   // Returns:
205*8b6cd535SAndroid Build Coastguard Worker   //  - elements size of lexicon and index, on success
206*8b6cd535SAndroid Build Coastguard Worker   //  - INTERNAL on IO error
207*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
208*8b6cd535SAndroid Build Coastguard Worker 
209*8b6cd535SAndroid Build Coastguard Worker   // Takes the provided storage_info, populates the fields related to the main
210*8b6cd535SAndroid Build Coastguard Worker   // index and returns that storage_info.
211*8b6cd535SAndroid Build Coastguard Worker   //
212*8b6cd535SAndroid Build Coastguard Worker   // If an IO error occurs while trying to calculate the value for a field, then
213*8b6cd535SAndroid Build Coastguard Worker   // that field will be set to -1.
214*8b6cd535SAndroid Build Coastguard Worker   IndexStorageInfoProto GetStorageInfo(
215*8b6cd535SAndroid Build Coastguard Worker       IndexStorageInfoProto storage_info) const;
216*8b6cd535SAndroid Build Coastguard Worker 
217*8b6cd535SAndroid Build Coastguard Worker   // Returns debug information for the main index in out.
218*8b6cd535SAndroid Build Coastguard Worker   // verbosity = BASIC, simplest debug information - just the lexicon
219*8b6cd535SAndroid Build Coastguard Worker   // verbosity = DETAILED, more detailed debug information including raw
220*8b6cd535SAndroid Build Coastguard Worker   // postings lists.
221*8b6cd535SAndroid Build Coastguard Worker   std::string GetDebugInfo(DebugInfoVerbosity::Code verbosity) const;
222*8b6cd535SAndroid Build Coastguard Worker 
223*8b6cd535SAndroid Build Coastguard Worker   // Reduces internal file sizes by reclaiming space of deleted documents.
224*8b6cd535SAndroid Build Coastguard Worker   //
225*8b6cd535SAndroid Build Coastguard Worker   // This method will update the last_added_docid of the index to the largest
226*8b6cd535SAndroid Build Coastguard Worker   // document id that still appears in the index after compaction.
227*8b6cd535SAndroid Build Coastguard Worker   //
228*8b6cd535SAndroid Build Coastguard Worker   // Returns:
229*8b6cd535SAndroid Build Coastguard Worker   //   OK on success
230*8b6cd535SAndroid Build Coastguard Worker   //   INTERNAL_ERROR on IO error, this indicates that the index may be in an
231*8b6cd535SAndroid Build Coastguard Worker   //                               invalid state and should be cleared.
232*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status Optimize(
233*8b6cd535SAndroid Build Coastguard Worker       const std::vector<DocumentId>& document_id_old_to_new);
234*8b6cd535SAndroid Build Coastguard Worker 
235*8b6cd535SAndroid Build Coastguard Worker  private:
236*8b6cd535SAndroid Build Coastguard Worker   explicit MainIndex(const std::string& index_directory,
237*8b6cd535SAndroid Build Coastguard Worker                      const Filesystem* filesystem,
238*8b6cd535SAndroid Build Coastguard Worker                      const IcingFilesystem* icing_filesystem);
239*8b6cd535SAndroid Build Coastguard Worker 
240*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status Init();
241*8b6cd535SAndroid Build Coastguard Worker 
242*8b6cd535SAndroid Build Coastguard Worker   // Helpers for merging the lexicon
243*8b6cd535SAndroid Build Coastguard Worker   // Add all 'backfill' branch points. Backfill branch points are prefix
244*8b6cd535SAndroid Build Coastguard Worker   // branch points that are a prefix of terms that existed in the lexicon
245*8b6cd535SAndroid Build Coastguard Worker   // to the merge.
246*8b6cd535SAndroid Build Coastguard Worker   //
247*8b6cd535SAndroid Build Coastguard Worker   // For example, if the main lexicon only contains "foot" and is then merged
248*8b6cd535SAndroid Build Coastguard Worker   // with a lite lexicon containing only "fool", then a backfill branch point
249*8b6cd535SAndroid Build Coastguard Worker   // for "foo" will be added to contain prefix hits from both the pre-existing
250*8b6cd535SAndroid Build Coastguard Worker   // posting list for "foot" and the new posting list for "fool".
251*8b6cd535SAndroid Build Coastguard Worker   //
252*8b6cd535SAndroid Build Coastguard Worker   // Populates LexiconMergeOutputs.backfill_map
253*8b6cd535SAndroid Build Coastguard Worker   //
254*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
255*8b6cd535SAndroid Build Coastguard Worker   //   - OK on success
256*8b6cd535SAndroid Build Coastguard Worker   //   - INTERNAL on IO error while writing to the main lexicon.
257*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBackfillBranchPoints(
258*8b6cd535SAndroid Build Coastguard Worker       const IcingDynamicTrie& other_lexicon);
259*8b6cd535SAndroid Build Coastguard Worker 
260*8b6cd535SAndroid Build Coastguard Worker   // Add all terms from the lexicon.
261*8b6cd535SAndroid Build Coastguard Worker   //
262*8b6cd535SAndroid Build Coastguard Worker   // Populates LexiconMergeOutputs.other_tvi_to_main_tvi
263*8b6cd535SAndroid Build Coastguard Worker   //
264*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
265*8b6cd535SAndroid Build Coastguard Worker   //   - OK on success
266*8b6cd535SAndroid Build Coastguard Worker   //   - INTERNAL on IO error while writing to the main lexicon.
267*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<LexiconMergeOutputs> AddTerms(
268*8b6cd535SAndroid Build Coastguard Worker       const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs);
269*8b6cd535SAndroid Build Coastguard Worker 
270*8b6cd535SAndroid Build Coastguard Worker   // Add all branch points for terms added from the lexicon.
271*8b6cd535SAndroid Build Coastguard Worker   // For example, if the main lexicon is empty and is then merged with a
272*8b6cd535SAndroid Build Coastguard Worker   // lexicon containing only "foot" and "fool", then a branch point for "foo"
273*8b6cd535SAndroid Build Coastguard Worker   // will be added to contain prefix hits from both "foot" and "fool".
274*8b6cd535SAndroid Build Coastguard Worker   //
275*8b6cd535SAndroid Build Coastguard Worker   // Populates LexiconMergeOutputs.other_tvi_to_prefix_main_tvis and
276*8b6cd535SAndroid Build Coastguard Worker   // LexiconMergeOutputs.prefix_tvis_buf;
277*8b6cd535SAndroid Build Coastguard Worker   //
278*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
279*8b6cd535SAndroid Build Coastguard Worker   //   - OK on success
280*8b6cd535SAndroid Build Coastguard Worker   //   - INTERNAL on IO error while writing to the main lexicon.
281*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<LexiconMergeOutputs> AddBranchPoints(
282*8b6cd535SAndroid Build Coastguard Worker       const IcingDynamicTrie& other_lexicon, LexiconMergeOutputs&& outputs);
283*8b6cd535SAndroid Build Coastguard Worker 
284*8b6cd535SAndroid Build Coastguard Worker   // Copies all properties from old_tvi in the other lexicon to the new_tvi in
285*8b6cd535SAndroid Build Coastguard Worker   // the main lexicon.
286*8b6cd535SAndroid Build Coastguard Worker   // Returns true on success, false if an IO error is encountered.
287*8b6cd535SAndroid Build Coastguard Worker   bool CopyProperties(const IcingDynamicTrie::PropertyReadersAll& prop_reader,
288*8b6cd535SAndroid Build Coastguard Worker                       const IcingDynamicTrie& other_lexicon, uint32_t other_tvi,
289*8b6cd535SAndroid Build Coastguard Worker                       uint32_t new_main_tvi);
290*8b6cd535SAndroid Build Coastguard Worker 
291*8b6cd535SAndroid Build Coastguard Worker   // Add all hits between [hit_elements, hit_elements + len) to main_index,
292*8b6cd535SAndroid Build Coastguard Worker   // updating the entry in the main lexicon at trie_value_index to point to the
293*8b6cd535SAndroid Build Coastguard Worker   // resulting posting list. Hits are sorted in descending document id order, so
294*8b6cd535SAndroid Build Coastguard Worker   // they should be to posting lists in reverse (starting at hit_elements
295*8b6cd535SAndroid Build Coastguard Worker   // + len - 1) and working backwards. Therefore, hit_elements must be in sorted
296*8b6cd535SAndroid Build Coastguard Worker   // order.
297*8b6cd535SAndroid Build Coastguard Worker   //
298*8b6cd535SAndroid Build Coastguard Worker   // trie_value_index may point to a valid posting list id if there is a
299*8b6cd535SAndroid Build Coastguard Worker   // pre-existing posting list to append to.
300*8b6cd535SAndroid Build Coastguard Worker   //
301*8b6cd535SAndroid Build Coastguard Worker   // If backfill_posting_list_id is valid, then the hits from the posting list
302*8b6cd535SAndroid Build Coastguard Worker   // identified by backfill_posting_list_id should be added to the new posting
303*8b6cd535SAndroid Build Coastguard Worker   // list before the hits in hit_elements.
304*8b6cd535SAndroid Build Coastguard Worker   //
305*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
306*8b6cd535SAndroid Build Coastguard Worker   //  - OK on success
307*8b6cd535SAndroid Build Coastguard Worker   //  - INVALID_ARGUMENT if posting_list_id stored at trie_value_index is valid
308*8b6cd535SAndroid Build Coastguard Worker   //  but points out of bounds in the IndexBlock referred to by
309*8b6cd535SAndroid Build Coastguard Worker   //  id.block_index(), if one of the hits from [hit_elements,hit_elements+len)
310*8b6cd535SAndroid Build Coastguard Worker   //  is not valid, or if one of the hits from [hit_elements,hit_elements+len)
311*8b6cd535SAndroid Build Coastguard Worker   //  is not less than the previously added hits.
312*8b6cd535SAndroid Build Coastguard Worker   //  - INTERNAL_ERROR if posting_list_id stored at trie_value_index is valid
313*8b6cd535SAndroid Build Coastguard Worker   //  but points to an invalid block index or if unable to mmap the IndexBlock.
314*8b6cd535SAndroid Build Coastguard Worker   //  - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
315*8b6cd535SAndroid Build Coastguard Worker   //  posting list.
316*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status AddHitsForTerm(
317*8b6cd535SAndroid Build Coastguard Worker       uint32_t tvi, PostingListIdentifier backfill_posting_list_id,
318*8b6cd535SAndroid Build Coastguard Worker       const TermIdHitPair* hit_elements, size_t len);
319*8b6cd535SAndroid Build Coastguard Worker 
320*8b6cd535SAndroid Build Coastguard Worker   // Adds all prefix hits or hits from prefix sections present on the posting
321*8b6cd535SAndroid Build Coastguard Worker   // list identified by backfill_posting_list_id to hit_accum.
322*8b6cd535SAndroid Build Coastguard Worker   //
323*8b6cd535SAndroid Build Coastguard Worker   // RETURNS:
324*8b6cd535SAndroid Build Coastguard Worker   //  - OK, on success
325*8b6cd535SAndroid Build Coastguard Worker   //  - INVALID_ARGUMENT if backfill_posting_list_id points out of bounds in the
326*8b6cd535SAndroid Build Coastguard Worker   //  IndexBlock referred to by id.block_index()
327*8b6cd535SAndroid Build Coastguard Worker   //  - INTERNAL_ERROR if unable to mmap the block identified by
328*8b6cd535SAndroid Build Coastguard Worker   //  backfill_posting_list_id or if the posting list identified by
329*8b6cd535SAndroid Build Coastguard Worker   //  backfill_posting_list_id has been corrupted.
330*8b6cd535SAndroid Build Coastguard Worker   //  - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
331*8b6cd535SAndroid Build Coastguard Worker   //  posting list.
332*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status AddPrefixBackfillHits(
333*8b6cd535SAndroid Build Coastguard Worker       PostingListIdentifier backfill_posting_list_id,
334*8b6cd535SAndroid Build Coastguard Worker       PostingListHitAccessor* hit_accum);
335*8b6cd535SAndroid Build Coastguard Worker 
336*8b6cd535SAndroid Build Coastguard Worker   // Transfer hits from old_pl_accessor to new_index for term.
337*8b6cd535SAndroid Build Coastguard Worker   //
338*8b6cd535SAndroid Build Coastguard Worker   // Returns:
339*8b6cd535SAndroid Build Coastguard Worker   //   largest document id added to the translated posting list, on success
340*8b6cd535SAndroid Build Coastguard Worker   //   INTERNAL_ERROR on IO error
341*8b6cd535SAndroid Build Coastguard Worker   static libtextclassifier3::StatusOr<DocumentId> TransferAndAddHits(
342*8b6cd535SAndroid Build Coastguard Worker       const std::vector<DocumentId>& document_id_old_to_new,
343*8b6cd535SAndroid Build Coastguard Worker       std::string_view term, PostingListHitAccessor& old_pl_accessor,
344*8b6cd535SAndroid Build Coastguard Worker       MainIndex* new_index);
345*8b6cd535SAndroid Build Coastguard Worker 
346*8b6cd535SAndroid Build Coastguard Worker   // Transfer hits from the current main index to new_index.
347*8b6cd535SAndroid Build Coastguard Worker   //
348*8b6cd535SAndroid Build Coastguard Worker   // Returns:
349*8b6cd535SAndroid Build Coastguard Worker   //   OK on success
350*8b6cd535SAndroid Build Coastguard Worker   //   INTERNAL_ERROR on IO error
351*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status TransferIndex(
352*8b6cd535SAndroid Build Coastguard Worker       const std::vector<DocumentId>& document_id_old_to_new,
353*8b6cd535SAndroid Build Coastguard Worker       MainIndex* new_index);
354*8b6cd535SAndroid Build Coastguard Worker 
355*8b6cd535SAndroid Build Coastguard Worker   std::string base_dir_;
356*8b6cd535SAndroid Build Coastguard Worker   const Filesystem* filesystem_;
357*8b6cd535SAndroid Build Coastguard Worker   const IcingFilesystem* icing_filesystem_;
358*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<PostingListHitSerializer> posting_list_hit_serializer_;
359*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<FlashIndexStorage> flash_index_storage_;
360*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<IcingDynamicTrie> main_lexicon_;
361*8b6cd535SAndroid Build Coastguard Worker };
362*8b6cd535SAndroid Build Coastguard Worker 
363*8b6cd535SAndroid Build Coastguard Worker }  // namespace lib
364*8b6cd535SAndroid Build Coastguard Worker }  // namespace icing
365*8b6cd535SAndroid Build Coastguard Worker 
366*8b6cd535SAndroid Build Coastguard Worker #endif  // ICING_INDEX_MAIN_MAIN_INDEX_H_
367