xref: /aosp_15_r20/external/icing/icing/index/numeric/integer-index.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/numeric/integer-index.h"
16 
17 #include <algorithm>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/destructible-directory.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
33 #include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
34 #include "icing/index/numeric/integer-index-storage.h"
35 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
36 #include "icing/store/document-id.h"
37 #include "icing/util/crc32.h"
38 #include "icing/util/status-macros.h"
39 
40 namespace icing {
41 namespace lib {
42 
43 namespace {
44 
45 // Helper function to get the file name of metadata.
GetMetadataFileName()46 std::string GetMetadataFileName() {
47   return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
48 }
49 
50 // Helper function to get the file path of metadata according to the given
51 // working directory.
GetMetadataFilePath(std::string_view working_path)52 std::string GetMetadataFilePath(std::string_view working_path) {
53   return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
54 }
55 
56 constexpr std::string_view kWildcardPropertyIndexFileName =
57     "wildcard_property_index";
58 
59 constexpr std::string_view kWildcardPropertyStorageFileName =
60     "wildcard_property_storage";
61 
GetWildcardPropertyStorageFilePath(std::string_view working_path)62 std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
63   return absl_ports::StrCat(working_path, "/",
64                             kWildcardPropertyStorageFileName);
65 }
66 
67 // Helper function to get the sub working (directory) path of
68 // IntegerIndexStorage according to the given working directory and property
69 // path.
GetPropertyIndexStoragePath(std::string_view working_path,std::string_view property_path)70 std::string GetPropertyIndexStoragePath(std::string_view working_path,
71                                         std::string_view property_path) {
72   return absl_ports::StrCat(working_path, "/", property_path);
73 }
74 
75 // Helper function to get all existing property paths by listing all
76 // directories.
77 libtextclassifier3::StatusOr<std::vector<std::string>>
GetAllExistingPropertyPaths(const Filesystem & filesystem,const std::string & working_path)78 GetAllExistingPropertyPaths(const Filesystem& filesystem,
79                             const std::string& working_path) {
80   std::vector<std::string> property_paths;
81   std::unordered_set<std::string> excludes = {
82       GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
83   if (!filesystem.ListDirectory(working_path.c_str(), excludes,
84                                 /*recursive=*/false, &property_paths)) {
85     return absl_ports::InternalError("Failed to list directory");
86   }
87   return property_paths;
88 }
89 
90 libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
GetPropertyIntegerIndexStorageMap(const Filesystem & filesystem,const std::string & working_path,PostingListIntegerIndexSerializer * posting_list_serializer,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)91 GetPropertyIntegerIndexStorageMap(
92     const Filesystem& filesystem, const std::string& working_path,
93     PostingListIntegerIndexSerializer* posting_list_serializer,
94     int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
95   ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
96                          GetAllExistingPropertyPaths(filesystem, working_path));
97 
98   IntegerIndex::PropertyToStorageMapType property_to_storage_map;
99   for (const std::string& property_path : property_paths) {
100     if (property_path == kWildcardPropertyIndexFileName) {
101       continue;
102     }
103     std::string storage_working_path =
104         GetPropertyIndexStoragePath(working_path, property_path);
105     ICING_ASSIGN_OR_RETURN(
106         std::unique_ptr<IntegerIndexStorage> storage,
107         IntegerIndexStorage::Create(
108             filesystem, storage_working_path,
109             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
110                                          pre_mapping_fbv),
111             posting_list_serializer));
112     property_to_storage_map.insert(
113         std::make_pair(property_path, std::move(storage)));
114   }
115 
116   return property_to_storage_map;
117 }
118 
119 // RETURNS:
120 //   - On success, an unordered_set representing the list of property paths
121 //     stored in the WildcardPropertyStorage managed by property_storage
122 //   - INTERNAL_ERROR on any failure to successfully read the underlying proto.
CreatePropertySet(const FileBackedProto<WildcardPropertyStorage> & property_storage)123 libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
124     const FileBackedProto<WildcardPropertyStorage>& property_storage) {
125   std::unordered_set<std::string> wildcard_properties_set;
126   auto wildcard_properties_or = property_storage.Read();
127   if (!wildcard_properties_or.ok()) {
128     if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
129       return wildcard_properties_set;
130     }
131     return wildcard_properties_or.status();
132   }
133 
134   const WildcardPropertyStorage* wildcard_properties =
135       wildcard_properties_or.ValueOrDie();
136   wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
137   for (const std::string& property : wildcard_properties->property_entries()) {
138     wildcard_properties_set.insert(property);
139   }
140   return wildcard_properties_set;
141 }
142 
143 }  // namespace
144 
IndexAllBufferedKeys()145 libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
146   integer_index_.SetDirty();
147 
148   auto iter = integer_index_.property_to_storage_map_.find(property_path_);
149   IntegerIndexStorage* target_storage = nullptr;
150   // 1. Check if this property already has its own individual index.
151   if (iter != integer_index_.property_to_storage_map_.end()) {
152     target_storage = iter->second.get();
153     // 2. Check if this property was added to wildcard storage.
154   } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
155              integer_index_.wildcard_properties_set_.end()) {
156     target_storage = integer_index_.wildcard_index_storage_.get();
157     // 3. Check if we've reach the limit of individual property storages.
158   } else if (integer_index_.property_to_storage_map_.size() >=
159              kMaxPropertyStorages) {
160     // 3a. Create the wildcard storage if it doesn't exist.
161     if (integer_index_.wildcard_index_storage_ == nullptr) {
162       ICING_ASSIGN_OR_RETURN(
163           integer_index_.wildcard_index_storage_,
164           IntegerIndexStorage::Create(
165               integer_index_.filesystem_,
166               GetPropertyIndexStoragePath(integer_index_.working_path_,
167                                           kWildcardPropertyIndexFileName),
168               IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
169                                            pre_mapping_fbv_),
170               integer_index_.posting_list_serializer_.get()));
171     }
172     ICING_RETURN_IF_ERROR(
173         integer_index_.AddPropertyToWildcardStorage(property_path_));
174     target_storage = integer_index_.wildcard_index_storage_.get();
175     // 4. Create a new individual storage for this new property.
176   } else {
177     ICING_ASSIGN_OR_RETURN(
178         std::unique_ptr<IntegerIndexStorage> new_storage,
179         IntegerIndexStorage::Create(
180             integer_index_.filesystem_,
181             GetPropertyIndexStoragePath(integer_index_.working_path_,
182                                         property_path_),
183             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
184                                          pre_mapping_fbv_),
185             integer_index_.posting_list_serializer_.get()));
186     target_storage = new_storage.get();
187     integer_index_.property_to_storage_map_.insert(
188         std::make_pair(property_path_, std::move(new_storage)));
189   }
190 
191   return target_storage->AddKeys(document_id_, section_id_,
192                                  std::move(seen_keys_));
193 }
194 
195 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
Create(const Filesystem & filesystem,std::string working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)196 IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
197                      int32_t num_data_threshold_for_bucket_split,
198                      bool pre_mapping_fbv) {
199   if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
200     // Discard working_path if metadata file is missing, and reinitialize.
201     if (filesystem.DirectoryExists(working_path.c_str())) {
202       ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
203     }
204     return InitializeNewFiles(filesystem, std::move(working_path),
205                               num_data_threshold_for_bucket_split,
206                               pre_mapping_fbv);
207   }
208   return InitializeExistingFiles(filesystem, std::move(working_path),
209                                  num_data_threshold_for_bucket_split,
210                                  pre_mapping_fbv);
211 }
212 
~IntegerIndex()213 IntegerIndex::~IntegerIndex() {
214   if (!PersistToDisk().ok()) {
215     ICING_LOG(WARNING)
216         << "Failed to persist integer index to disk while destructing "
217         << working_path_;
218   }
219 }
220 
221 libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
GetIterator(std::string_view property_path,int64_t key_lower,int64_t key_upper,const DocumentStore & document_store,const SchemaStore & schema_store,int64_t current_time_ms) const222 IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
223                           int64_t key_upper,
224                           const DocumentStore& document_store,
225                           const SchemaStore& schema_store,
226                           int64_t current_time_ms) const {
227   std::string property_path_str(property_path);
228   auto iter = property_to_storage_map_.find(property_path_str);
229   if (iter != property_to_storage_map_.end()) {
230     return iter->second->GetIterator(key_lower, key_upper);
231   }
232 
233   if (wildcard_properties_set_.find(property_path_str) !=
234       wildcard_properties_set_.end()) {
235     ICING_ASSIGN_OR_RETURN(
236         std::unique_ptr<DocHitInfoIterator> delegate,
237         wildcard_index_storage_->GetIterator(key_lower, key_upper));
238     std::set<std::string> property_paths = {std::move(property_path_str)};
239     return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
240         std::move(delegate), &document_store, &schema_store,
241         std::move(property_paths), current_time_ms);
242   }
243 
244   // Return an empty iterator.
245   return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
246       /*numeric_index_iter=*/nullptr);
247 }
248 
AddPropertyToWildcardStorage(const std::string & property_path)249 libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
250     const std::string& property_path) {
251   SetDirty();
252 
253   WildcardPropertyStorage wildcard_properties;
254   wildcard_properties.mutable_property_entries()->Reserve(
255       wildcard_properties_set_.size());
256   for (const std::string& property_path : wildcard_properties_set_) {
257     wildcard_properties.add_property_entries(property_path);
258   }
259   ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
260       std::make_unique<WildcardPropertyStorage>(
261           std::move(wildcard_properties))));
262 
263   wildcard_properties_set_.insert(property_path);
264   return libtextclassifier3::Status::OK;
265 }
266 
Optimize(const std::vector<DocumentId> & document_id_old_to_new,DocumentId new_last_added_document_id)267 libtextclassifier3::Status IntegerIndex::Optimize(
268     const std::vector<DocumentId>& document_id_old_to_new,
269     DocumentId new_last_added_document_id) {
270   std::string temp_working_path = working_path_ + "_temp";
271   ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
272 
273   DestructibleDirectory temp_working_path_ddir(&filesystem_,
274                                                std::move(temp_working_path));
275   if (!temp_working_path_ddir.is_valid()) {
276     return absl_ports::InternalError(
277         "Unable to create temp directory to build new integer index");
278   }
279 
280   {
281     // Transfer all indexed data from current integer index to new integer
282     // index. Also PersistToDisk and destruct the instance after finishing, so
283     // we can safely swap directories later.
284     ICING_ASSIGN_OR_RETURN(
285         std::unique_ptr<IntegerIndex> new_integer_index,
286         Create(filesystem_, temp_working_path_ddir.dir(),
287                num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
288     ICING_RETURN_IF_ERROR(
289         TransferIndex(document_id_old_to_new, new_integer_index.get()));
290     new_integer_index->set_last_added_document_id(new_last_added_document_id);
291     ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
292   }
293 
294   // Destruct current storage instances to safely swap directories.
295   metadata_mmapped_file_.reset();
296   property_to_storage_map_.clear();
297   wildcard_index_storage_.reset();
298   wildcard_property_storage_.reset();
299   if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
300                              working_path_.c_str())) {
301     return absl_ports::InternalError(
302         "Unable to apply new integer index due to failed swap");
303   }
304 
305   // Reinitialize the integer index.
306   std::string metadata_file_path = GetMetadataFilePath(working_path_);
307   ICING_ASSIGN_OR_RETURN(
308       MemoryMappedFile metadata_mmapped_file,
309       MemoryMappedFile::Create(filesystem_, metadata_file_path,
310                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
311                                /*max_file_size=*/kMetadataFileSize,
312                                /*pre_mapping_file_offset=*/0,
313                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
314   if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
315     return absl_ports::InternalError(
316         "Invalid metadata file size after Optimize");
317   }
318   metadata_mmapped_file_ =
319       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
320 
321   // Recreate all of the data structures tracking the wildcard storage.
322   std::string wildcard_property_path =
323       GetWildcardPropertyStorageFilePath(working_path_);
324   wildcard_property_storage_ =
325       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
326           filesystem_, wildcard_property_path);
327 
328   ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
329                          CreatePropertySet(*wildcard_property_storage_));
330   if (!wildcard_properties_set_.empty()) {
331     ICING_ASSIGN_OR_RETURN(
332         wildcard_index_storage_,
333         IntegerIndexStorage::Create(
334             filesystem_,
335             GetPropertyIndexStoragePath(working_path_,
336                                         kWildcardPropertyIndexFileName),
337             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
338                                          pre_mapping_fbv_),
339             posting_list_serializer_.get()));
340   }
341 
342   // Initialize all existing integer index storages.
343   ICING_ASSIGN_OR_RETURN(
344       property_to_storage_map_,
345       GetPropertyIntegerIndexStorageMap(
346           filesystem_, working_path_, posting_list_serializer_.get(),
347           num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
348 
349   return libtextclassifier3::Status::OK;
350 }
351 
Clear()352 libtextclassifier3::Status IntegerIndex::Clear() {
353   SetDirty();
354 
355   // Step 1: clear property_to_storage_map_.
356   property_to_storage_map_.clear();
357   wildcard_index_storage_.reset();
358 
359   // Step 2: delete all IntegerIndexStorages. It is safe because there is no
360   //         active IntegerIndexStorage after clearing the map.
361   ICING_ASSIGN_OR_RETURN(
362       std::vector<std::string> property_paths,
363       GetAllExistingPropertyPaths(filesystem_, working_path_));
364   for (const std::string& property_path : property_paths) {
365     ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
366         filesystem_,
367         GetPropertyIndexStoragePath(working_path_, property_path)));
368   }
369 
370   // Step 3: Delete the wildcard property storage
371   std::string wildcard_property_path =
372       GetWildcardPropertyStorageFilePath(working_path_);
373   if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
374       !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
375     return absl_ports::InternalError(absl_ports::StrCat(
376         "Unable to delete file at path ", wildcard_property_path));
377   }
378 
379   info().last_added_document_id = kInvalidDocumentId;
380   return libtextclassifier3::Status::OK;
381 }
382 
383 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)384 IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
385                                  std::string&& working_path,
386                                  int32_t num_data_threshold_for_bucket_split,
387                                  bool pre_mapping_fbv) {
388   // Create working directory.
389   if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
390     return absl_ports::InternalError(
391         absl_ports::StrCat("Failed to create directory: ", working_path));
392   }
393 
394   // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
395   // call GrowAndRemapIfNecessary to grow the underlying file.
396   ICING_ASSIGN_OR_RETURN(
397       MemoryMappedFile metadata_mmapped_file,
398       MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
399                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
400                                /*max_file_size=*/kMetadataFileSize,
401                                /*pre_mapping_file_offset=*/0,
402                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
403   ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
404       /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
405 
406   std::string wildcard_property_path =
407       GetWildcardPropertyStorageFilePath(working_path);
408   auto wildcard_property_storage =
409       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
410           filesystem, wildcard_property_path);
411 
412   // Create instance.
413   auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
414       filesystem, std::move(working_path),
415       std::make_unique<PostingListIntegerIndexSerializer>(),
416       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
417       /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
418       /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
419       num_data_threshold_for_bucket_split, pre_mapping_fbv));
420 
421   // Initialize info content by writing mapped memory directly.
422   Info& info_ref = new_integer_index->info();
423   info_ref.magic = Info::kMagic;
424   info_ref.last_added_document_id = kInvalidDocumentId;
425   info_ref.num_data_threshold_for_bucket_split =
426       num_data_threshold_for_bucket_split;
427 
428   // Initialize new PersistentStorage. The initial checksums will be computed
429   // and set via InitializeNewStorage.
430   ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
431 
432   return new_integer_index;
433 }
434 
435 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)436 IntegerIndex::InitializeExistingFiles(
437     const Filesystem& filesystem, std::string&& working_path,
438     int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
439   // Mmap the content of the crcs and info.
440   ICING_ASSIGN_OR_RETURN(
441       MemoryMappedFile metadata_mmapped_file,
442       MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
443                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
444                                /*max_file_size=*/kMetadataFileSize,
445                                /*pre_mapping_file_offset=*/0,
446                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
447   if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
448     return absl_ports::FailedPreconditionError("Incorrect metadata file size");
449   }
450 
451   auto posting_list_serializer =
452       std::make_unique<PostingListIntegerIndexSerializer>();
453 
454   // Initialize all existing integer index storages.
455   ICING_ASSIGN_OR_RETURN(
456       PropertyToStorageMapType property_to_storage_map,
457       GetPropertyIntegerIndexStorageMap(
458           filesystem, working_path, posting_list_serializer.get(),
459           num_data_threshold_for_bucket_split, pre_mapping_fbv));
460 
461   std::string wildcard_property_path =
462       GetWildcardPropertyStorageFilePath(working_path);
463   auto wildcard_property_storage =
464       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
465           filesystem, wildcard_property_path);
466 
467   ICING_ASSIGN_OR_RETURN(
468       std::unordered_set<std::string> wildcard_properties_set,
469       CreatePropertySet(*wildcard_property_storage));
470 
471   std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
472   if (!wildcard_properties_set.empty()) {
473     ICING_ASSIGN_OR_RETURN(
474         wildcard_index_storage,
475         IntegerIndexStorage::Create(
476             filesystem,
477             GetPropertyIndexStoragePath(working_path,
478                                         kWildcardPropertyIndexFileName),
479             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
480                                          pre_mapping_fbv),
481             posting_list_serializer.get()));
482   }
483 
484   // Create instance.
485   auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
486       filesystem, std::move(working_path), std::move(posting_list_serializer),
487       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
488       std::move(property_to_storage_map), std::move(wildcard_property_storage),
489       std::move(wildcard_properties_set), std::move(wildcard_index_storage),
490       num_data_threshold_for_bucket_split, pre_mapping_fbv));
491 
492   // Initialize existing PersistentStorage. Checksums will be validated.
493   ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
494 
495   // Validate magic.
496   if (integer_index->info().magic != Info::kMagic) {
497     return absl_ports::FailedPreconditionError("Incorrect magic value");
498   }
499 
500   // If num_data_threshold_for_bucket_split mismatches, then return error to let
501   // caller rebuild.
502   if (integer_index->info().num_data_threshold_for_bucket_split !=
503       num_data_threshold_for_bucket_split) {
504     return absl_ports::FailedPreconditionError(
505         "Mismatch num_data_threshold_for_bucket_split");
506   }
507 
508   return integer_index;
509 }
510 
511 libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
TransferIntegerIndexStorage(const std::vector<DocumentId> & document_id_old_to_new,const IntegerIndexStorage * old_storage,const std::string & property_path,IntegerIndex * new_integer_index) const512 IntegerIndex::TransferIntegerIndexStorage(
513     const std::vector<DocumentId>& document_id_old_to_new,
514     const IntegerIndexStorage* old_storage, const std::string& property_path,
515     IntegerIndex* new_integer_index) const {
516   std::string new_storage_working_path = GetPropertyIndexStoragePath(
517       new_integer_index->working_path_, property_path);
518   ICING_ASSIGN_OR_RETURN(
519       std::unique_ptr<IntegerIndexStorage> new_storage,
520       IntegerIndexStorage::Create(
521           new_integer_index->filesystem_, new_storage_working_path,
522           IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
523                                        pre_mapping_fbv_),
524           new_integer_index->posting_list_serializer_.get()));
525 
526   ICING_RETURN_IF_ERROR(
527       old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
528 
529   if (new_storage->num_data() == 0) {
530     new_storage.reset();
531     ICING_RETURN_IF_ERROR(
532         IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
533   }
534   return new_storage;
535 }
536 
TransferWildcardStorage(IntegerIndex * new_integer_index) const537 libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
538     IntegerIndex* new_integer_index) const {
539   auto property_storage = std::make_unique<WildcardPropertyStorage>();
540   property_storage->mutable_property_entries()->Reserve(
541       wildcard_properties_set_.size());
542   for (const std::string& property : wildcard_properties_set_) {
543     property_storage->add_property_entries(property);
544   }
545 
546   ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
547       std::move(property_storage)));
548   new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
549   return libtextclassifier3::Status::OK;
550 }
551 
TransferIndex(const std::vector<DocumentId> & document_id_old_to_new,IntegerIndex * new_integer_index) const552 libtextclassifier3::Status IntegerIndex::TransferIndex(
553     const std::vector<DocumentId>& document_id_old_to_new,
554     IntegerIndex* new_integer_index) const {
555   // Transfer over the integer index storages
556   std::unique_ptr<IntegerIndexStorage> new_storage;
557   for (const auto& [property_path, old_storage] : property_to_storage_map_) {
558     ICING_ASSIGN_OR_RETURN(
559         new_storage,
560         TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
561                                     property_path, new_integer_index));
562     if (new_storage != nullptr) {
563       new_integer_index->property_to_storage_map_.insert(
564           {property_path, std::move(new_storage)});
565     }
566   }
567   if (wildcard_index_storage_ != nullptr) {
568     ICING_ASSIGN_OR_RETURN(
569         new_storage,
570         TransferIntegerIndexStorage(
571             document_id_old_to_new, wildcard_index_storage_.get(),
572             std::string(kWildcardPropertyIndexFileName), new_integer_index));
573     if (new_storage != nullptr) {
574       new_integer_index->wildcard_index_storage_ = std::move(new_storage);
575 
576       // The only time we need to copy over the list of properties using
577       // wildcard storage is if wildcard_index_storage and new_storage are both
578       // non-null. Otherwise, the new wildcard index storage won't have any
579       // data.
580       ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
581     }
582   }
583 
584   return libtextclassifier3::Status::OK;
585 }
586 
PersistStoragesToDisk()587 libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
588   if (is_initialized_ && !is_storage_dirty()) {
589     return libtextclassifier3::Status::OK;
590   }
591 
592   for (auto& [_, storage] : property_to_storage_map_) {
593     ICING_RETURN_IF_ERROR(storage->PersistToDisk());
594   }
595   // No need to persist wildcard_properties_storage_. All calls to
596   // FileBackedProto::Write are fully written through at the time of the call.
597   if (wildcard_index_storage_) {
598     ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
599   }
600   is_storage_dirty_ = false;
601   return libtextclassifier3::Status::OK;
602 }
603 
PersistMetadataToDisk()604 libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() {
605   if (is_initialized_ && !is_info_dirty() && !is_storage_dirty()) {
606     return libtextclassifier3::Status::OK;
607   }
608 
609   // Changes should have been applied to the underlying file when using
610   // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
611   // extra safety step to ensure they are written out.
612   ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
613   is_info_dirty_ = false;
614   return libtextclassifier3::Status::OK;
615 }
616 
UpdateStoragesChecksum()617 libtextclassifier3::StatusOr<Crc32> IntegerIndex::UpdateStoragesChecksum() {
618   if (is_initialized_ && !is_storage_dirty()) {
619     return Crc32(crcs().component_crcs.storages_crc);
620   }
621 
622   // XOR all crcs of all storages. Since XOR is commutative and associative,
623   // the order doesn't matter.
624   uint32_t storages_checksum = 0;
625   for (auto& [property_path, storage] : property_to_storage_map_) {
626     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
627     storage_crc.Append(property_path);
628 
629     storages_checksum ^= storage_crc.Get();
630   }
631 
632   if (wildcard_index_storage_ != nullptr) {
633     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
634                            wildcard_index_storage_->UpdateChecksums());
635     storages_checksum ^= storage_crc.Get();
636   }
637 
638   // FileBackedProto always keeps its checksum up to date. So we just need to
639   // retrieve the checksum.
640   ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
641                          wildcard_property_storage_->GetChecksum());
642   storages_checksum ^= wildcard_properties_crc.Get();
643 
644   return Crc32(storages_checksum);
645 }
646 
GetInfoChecksum() const647 libtextclassifier3::StatusOr<Crc32> IntegerIndex::GetInfoChecksum() const {
648   if (is_initialized_ && !is_info_dirty()) {
649     return Crc32(crcs().component_crcs.info_crc);
650   }
651   return info().GetChecksum();
652 }
653 
GetStoragesChecksum() const654 libtextclassifier3::StatusOr<Crc32> IntegerIndex::GetStoragesChecksum() const {
655   if (is_initialized_ && !is_storage_dirty()) {
656     return Crc32(crcs().component_crcs.storages_crc);
657   }
658 
659   // XOR all crcs of all storages. Since XOR is commutative and associative,
660   // the order doesn't matter.
661   uint32_t storages_checksum = 0;
662   for (auto& [property_path, storage] : property_to_storage_map_) {
663     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->GetChecksum());
664     storage_crc.Append(property_path);
665 
666     storages_checksum ^= storage_crc.Get();
667   }
668 
669   if (wildcard_index_storage_ != nullptr) {
670     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
671                            wildcard_index_storage_->GetChecksum());
672     storages_checksum ^= storage_crc.Get();
673   }
674 
675   ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
676                          wildcard_property_storage_->GetChecksum());
677   storages_checksum ^= wildcard_properties_crc.Get();
678 
679   return Crc32(storages_checksum);
680 }
681 
682 }  // namespace lib
683 }  // namespace icing
684