1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/numeric/integer-index.h"
16
17 #include <algorithm>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/destructible-directory.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
33 #include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
34 #include "icing/index/numeric/integer-index-storage.h"
35 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
36 #include "icing/store/document-id.h"
37 #include "icing/util/crc32.h"
38 #include "icing/util/status-macros.h"
39
40 namespace icing {
41 namespace lib {
42
43 namespace {
44
45 // Helper function to get the file name of metadata.
GetMetadataFileName()46 std::string GetMetadataFileName() {
47 return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
48 }
49
50 // Helper function to get the file path of metadata according to the given
51 // working directory.
GetMetadataFilePath(std::string_view working_path)52 std::string GetMetadataFilePath(std::string_view working_path) {
53 return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
54 }
55
56 constexpr std::string_view kWildcardPropertyIndexFileName =
57 "wildcard_property_index";
58
59 constexpr std::string_view kWildcardPropertyStorageFileName =
60 "wildcard_property_storage";
61
GetWildcardPropertyStorageFilePath(std::string_view working_path)62 std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
63 return absl_ports::StrCat(working_path, "/",
64 kWildcardPropertyStorageFileName);
65 }
66
67 // Helper function to get the sub working (directory) path of
68 // IntegerIndexStorage according to the given working directory and property
69 // path.
GetPropertyIndexStoragePath(std::string_view working_path,std::string_view property_path)70 std::string GetPropertyIndexStoragePath(std::string_view working_path,
71 std::string_view property_path) {
72 return absl_ports::StrCat(working_path, "/", property_path);
73 }
74
75 // Helper function to get all existing property paths by listing all
76 // directories.
77 libtextclassifier3::StatusOr<std::vector<std::string>>
GetAllExistingPropertyPaths(const Filesystem & filesystem,const std::string & working_path)78 GetAllExistingPropertyPaths(const Filesystem& filesystem,
79 const std::string& working_path) {
80 std::vector<std::string> property_paths;
81 std::unordered_set<std::string> excludes = {
82 GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
83 if (!filesystem.ListDirectory(working_path.c_str(), excludes,
84 /*recursive=*/false, &property_paths)) {
85 return absl_ports::InternalError("Failed to list directory");
86 }
87 return property_paths;
88 }
89
90 libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
GetPropertyIntegerIndexStorageMap(const Filesystem & filesystem,const std::string & working_path,PostingListIntegerIndexSerializer * posting_list_serializer,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)91 GetPropertyIntegerIndexStorageMap(
92 const Filesystem& filesystem, const std::string& working_path,
93 PostingListIntegerIndexSerializer* posting_list_serializer,
94 int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
95 ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
96 GetAllExistingPropertyPaths(filesystem, working_path));
97
98 IntegerIndex::PropertyToStorageMapType property_to_storage_map;
99 for (const std::string& property_path : property_paths) {
100 if (property_path == kWildcardPropertyIndexFileName) {
101 continue;
102 }
103 std::string storage_working_path =
104 GetPropertyIndexStoragePath(working_path, property_path);
105 ICING_ASSIGN_OR_RETURN(
106 std::unique_ptr<IntegerIndexStorage> storage,
107 IntegerIndexStorage::Create(
108 filesystem, storage_working_path,
109 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
110 pre_mapping_fbv),
111 posting_list_serializer));
112 property_to_storage_map.insert(
113 std::make_pair(property_path, std::move(storage)));
114 }
115
116 return property_to_storage_map;
117 }
118
119 // RETURNS:
120 // - On success, an unordered_set representing the list of property paths
121 // stored in the WildcardPropertyStorage managed by property_storage
122 // - INTERNAL_ERROR on any failure to successfully read the underlying proto.
CreatePropertySet(const FileBackedProto<WildcardPropertyStorage> & property_storage)123 libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
124 const FileBackedProto<WildcardPropertyStorage>& property_storage) {
125 std::unordered_set<std::string> wildcard_properties_set;
126 auto wildcard_properties_or = property_storage.Read();
127 if (!wildcard_properties_or.ok()) {
128 if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
129 return wildcard_properties_set;
130 }
131 return wildcard_properties_or.status();
132 }
133
134 const WildcardPropertyStorage* wildcard_properties =
135 wildcard_properties_or.ValueOrDie();
136 wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
137 for (const std::string& property : wildcard_properties->property_entries()) {
138 wildcard_properties_set.insert(property);
139 }
140 return wildcard_properties_set;
141 }
142
143 } // namespace
144
IndexAllBufferedKeys()145 libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
146 integer_index_.SetDirty();
147
148 auto iter = integer_index_.property_to_storage_map_.find(property_path_);
149 IntegerIndexStorage* target_storage = nullptr;
150 // 1. Check if this property already has its own individual index.
151 if (iter != integer_index_.property_to_storage_map_.end()) {
152 target_storage = iter->second.get();
153 // 2. Check if this property was added to wildcard storage.
154 } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
155 integer_index_.wildcard_properties_set_.end()) {
156 target_storage = integer_index_.wildcard_index_storage_.get();
157 // 3. Check if we've reach the limit of individual property storages.
158 } else if (integer_index_.property_to_storage_map_.size() >=
159 kMaxPropertyStorages) {
160 // 3a. Create the wildcard storage if it doesn't exist.
161 if (integer_index_.wildcard_index_storage_ == nullptr) {
162 ICING_ASSIGN_OR_RETURN(
163 integer_index_.wildcard_index_storage_,
164 IntegerIndexStorage::Create(
165 integer_index_.filesystem_,
166 GetPropertyIndexStoragePath(integer_index_.working_path_,
167 kWildcardPropertyIndexFileName),
168 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
169 pre_mapping_fbv_),
170 integer_index_.posting_list_serializer_.get()));
171 }
172 ICING_RETURN_IF_ERROR(
173 integer_index_.AddPropertyToWildcardStorage(property_path_));
174 target_storage = integer_index_.wildcard_index_storage_.get();
175 // 4. Create a new individual storage for this new property.
176 } else {
177 ICING_ASSIGN_OR_RETURN(
178 std::unique_ptr<IntegerIndexStorage> new_storage,
179 IntegerIndexStorage::Create(
180 integer_index_.filesystem_,
181 GetPropertyIndexStoragePath(integer_index_.working_path_,
182 property_path_),
183 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
184 pre_mapping_fbv_),
185 integer_index_.posting_list_serializer_.get()));
186 target_storage = new_storage.get();
187 integer_index_.property_to_storage_map_.insert(
188 std::make_pair(property_path_, std::move(new_storage)));
189 }
190
191 return target_storage->AddKeys(document_id_, section_id_,
192 std::move(seen_keys_));
193 }
194
195 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
Create(const Filesystem & filesystem,std::string working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)196 IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
197 int32_t num_data_threshold_for_bucket_split,
198 bool pre_mapping_fbv) {
199 if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
200 // Discard working_path if metadata file is missing, and reinitialize.
201 if (filesystem.DirectoryExists(working_path.c_str())) {
202 ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
203 }
204 return InitializeNewFiles(filesystem, std::move(working_path),
205 num_data_threshold_for_bucket_split,
206 pre_mapping_fbv);
207 }
208 return InitializeExistingFiles(filesystem, std::move(working_path),
209 num_data_threshold_for_bucket_split,
210 pre_mapping_fbv);
211 }
212
~IntegerIndex()213 IntegerIndex::~IntegerIndex() {
214 if (!PersistToDisk().ok()) {
215 ICING_LOG(WARNING)
216 << "Failed to persist integer index to disk while destructing "
217 << working_path_;
218 }
219 }
220
221 libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
GetIterator(std::string_view property_path,int64_t key_lower,int64_t key_upper,const DocumentStore & document_store,const SchemaStore & schema_store,int64_t current_time_ms) const222 IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
223 int64_t key_upper,
224 const DocumentStore& document_store,
225 const SchemaStore& schema_store,
226 int64_t current_time_ms) const {
227 std::string property_path_str(property_path);
228 auto iter = property_to_storage_map_.find(property_path_str);
229 if (iter != property_to_storage_map_.end()) {
230 return iter->second->GetIterator(key_lower, key_upper);
231 }
232
233 if (wildcard_properties_set_.find(property_path_str) !=
234 wildcard_properties_set_.end()) {
235 ICING_ASSIGN_OR_RETURN(
236 std::unique_ptr<DocHitInfoIterator> delegate,
237 wildcard_index_storage_->GetIterator(key_lower, key_upper));
238 std::set<std::string> property_paths = {std::move(property_path_str)};
239 return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
240 std::move(delegate), &document_store, &schema_store,
241 std::move(property_paths), current_time_ms);
242 }
243
244 // Return an empty iterator.
245 return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
246 /*numeric_index_iter=*/nullptr);
247 }
248
AddPropertyToWildcardStorage(const std::string & property_path)249 libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
250 const std::string& property_path) {
251 SetDirty();
252
253 WildcardPropertyStorage wildcard_properties;
254 wildcard_properties.mutable_property_entries()->Reserve(
255 wildcard_properties_set_.size());
256 for (const std::string& property_path : wildcard_properties_set_) {
257 wildcard_properties.add_property_entries(property_path);
258 }
259 ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
260 std::make_unique<WildcardPropertyStorage>(
261 std::move(wildcard_properties))));
262
263 wildcard_properties_set_.insert(property_path);
264 return libtextclassifier3::Status::OK;
265 }
266
Optimize(const std::vector<DocumentId> & document_id_old_to_new,DocumentId new_last_added_document_id)267 libtextclassifier3::Status IntegerIndex::Optimize(
268 const std::vector<DocumentId>& document_id_old_to_new,
269 DocumentId new_last_added_document_id) {
270 std::string temp_working_path = working_path_ + "_temp";
271 ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
272
273 DestructibleDirectory temp_working_path_ddir(&filesystem_,
274 std::move(temp_working_path));
275 if (!temp_working_path_ddir.is_valid()) {
276 return absl_ports::InternalError(
277 "Unable to create temp directory to build new integer index");
278 }
279
280 {
281 // Transfer all indexed data from current integer index to new integer
282 // index. Also PersistToDisk and destruct the instance after finishing, so
283 // we can safely swap directories later.
284 ICING_ASSIGN_OR_RETURN(
285 std::unique_ptr<IntegerIndex> new_integer_index,
286 Create(filesystem_, temp_working_path_ddir.dir(),
287 num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
288 ICING_RETURN_IF_ERROR(
289 TransferIndex(document_id_old_to_new, new_integer_index.get()));
290 new_integer_index->set_last_added_document_id(new_last_added_document_id);
291 ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
292 }
293
294 // Destruct current storage instances to safely swap directories.
295 metadata_mmapped_file_.reset();
296 property_to_storage_map_.clear();
297 wildcard_index_storage_.reset();
298 wildcard_property_storage_.reset();
299 if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
300 working_path_.c_str())) {
301 return absl_ports::InternalError(
302 "Unable to apply new integer index due to failed swap");
303 }
304
305 // Reinitialize the integer index.
306 std::string metadata_file_path = GetMetadataFilePath(working_path_);
307 ICING_ASSIGN_OR_RETURN(
308 MemoryMappedFile metadata_mmapped_file,
309 MemoryMappedFile::Create(filesystem_, metadata_file_path,
310 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
311 /*max_file_size=*/kMetadataFileSize,
312 /*pre_mapping_file_offset=*/0,
313 /*pre_mapping_mmap_size=*/kMetadataFileSize));
314 if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
315 return absl_ports::InternalError(
316 "Invalid metadata file size after Optimize");
317 }
318 metadata_mmapped_file_ =
319 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
320
321 // Recreate all of the data structures tracking the wildcard storage.
322 std::string wildcard_property_path =
323 GetWildcardPropertyStorageFilePath(working_path_);
324 wildcard_property_storage_ =
325 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
326 filesystem_, wildcard_property_path);
327
328 ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
329 CreatePropertySet(*wildcard_property_storage_));
330 if (!wildcard_properties_set_.empty()) {
331 ICING_ASSIGN_OR_RETURN(
332 wildcard_index_storage_,
333 IntegerIndexStorage::Create(
334 filesystem_,
335 GetPropertyIndexStoragePath(working_path_,
336 kWildcardPropertyIndexFileName),
337 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
338 pre_mapping_fbv_),
339 posting_list_serializer_.get()));
340 }
341
342 // Initialize all existing integer index storages.
343 ICING_ASSIGN_OR_RETURN(
344 property_to_storage_map_,
345 GetPropertyIntegerIndexStorageMap(
346 filesystem_, working_path_, posting_list_serializer_.get(),
347 num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
348
349 return libtextclassifier3::Status::OK;
350 }
351
Clear()352 libtextclassifier3::Status IntegerIndex::Clear() {
353 SetDirty();
354
355 // Step 1: clear property_to_storage_map_.
356 property_to_storage_map_.clear();
357 wildcard_index_storage_.reset();
358
359 // Step 2: delete all IntegerIndexStorages. It is safe because there is no
360 // active IntegerIndexStorage after clearing the map.
361 ICING_ASSIGN_OR_RETURN(
362 std::vector<std::string> property_paths,
363 GetAllExistingPropertyPaths(filesystem_, working_path_));
364 for (const std::string& property_path : property_paths) {
365 ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
366 filesystem_,
367 GetPropertyIndexStoragePath(working_path_, property_path)));
368 }
369
370 // Step 3: Delete the wildcard property storage
371 std::string wildcard_property_path =
372 GetWildcardPropertyStorageFilePath(working_path_);
373 if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
374 !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
375 return absl_ports::InternalError(absl_ports::StrCat(
376 "Unable to delete file at path ", wildcard_property_path));
377 }
378
379 info().last_added_document_id = kInvalidDocumentId;
380 return libtextclassifier3::Status::OK;
381 }
382
383 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)384 IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
385 std::string&& working_path,
386 int32_t num_data_threshold_for_bucket_split,
387 bool pre_mapping_fbv) {
388 // Create working directory.
389 if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
390 return absl_ports::InternalError(
391 absl_ports::StrCat("Failed to create directory: ", working_path));
392 }
393
394 // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
395 // call GrowAndRemapIfNecessary to grow the underlying file.
396 ICING_ASSIGN_OR_RETURN(
397 MemoryMappedFile metadata_mmapped_file,
398 MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
399 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
400 /*max_file_size=*/kMetadataFileSize,
401 /*pre_mapping_file_offset=*/0,
402 /*pre_mapping_mmap_size=*/kMetadataFileSize));
403 ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
404 /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
405
406 std::string wildcard_property_path =
407 GetWildcardPropertyStorageFilePath(working_path);
408 auto wildcard_property_storage =
409 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
410 filesystem, wildcard_property_path);
411
412 // Create instance.
413 auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
414 filesystem, std::move(working_path),
415 std::make_unique<PostingListIntegerIndexSerializer>(),
416 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
417 /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
418 /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
419 num_data_threshold_for_bucket_split, pre_mapping_fbv));
420
421 // Initialize info content by writing mapped memory directly.
422 Info& info_ref = new_integer_index->info();
423 info_ref.magic = Info::kMagic;
424 info_ref.last_added_document_id = kInvalidDocumentId;
425 info_ref.num_data_threshold_for_bucket_split =
426 num_data_threshold_for_bucket_split;
427
428 // Initialize new PersistentStorage. The initial checksums will be computed
429 // and set via InitializeNewStorage.
430 ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
431
432 return new_integer_index;
433 }
434
435 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)436 IntegerIndex::InitializeExistingFiles(
437 const Filesystem& filesystem, std::string&& working_path,
438 int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
439 // Mmap the content of the crcs and info.
440 ICING_ASSIGN_OR_RETURN(
441 MemoryMappedFile metadata_mmapped_file,
442 MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
443 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
444 /*max_file_size=*/kMetadataFileSize,
445 /*pre_mapping_file_offset=*/0,
446 /*pre_mapping_mmap_size=*/kMetadataFileSize));
447 if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
448 return absl_ports::FailedPreconditionError("Incorrect metadata file size");
449 }
450
451 auto posting_list_serializer =
452 std::make_unique<PostingListIntegerIndexSerializer>();
453
454 // Initialize all existing integer index storages.
455 ICING_ASSIGN_OR_RETURN(
456 PropertyToStorageMapType property_to_storage_map,
457 GetPropertyIntegerIndexStorageMap(
458 filesystem, working_path, posting_list_serializer.get(),
459 num_data_threshold_for_bucket_split, pre_mapping_fbv));
460
461 std::string wildcard_property_path =
462 GetWildcardPropertyStorageFilePath(working_path);
463 auto wildcard_property_storage =
464 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
465 filesystem, wildcard_property_path);
466
467 ICING_ASSIGN_OR_RETURN(
468 std::unordered_set<std::string> wildcard_properties_set,
469 CreatePropertySet(*wildcard_property_storage));
470
471 std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
472 if (!wildcard_properties_set.empty()) {
473 ICING_ASSIGN_OR_RETURN(
474 wildcard_index_storage,
475 IntegerIndexStorage::Create(
476 filesystem,
477 GetPropertyIndexStoragePath(working_path,
478 kWildcardPropertyIndexFileName),
479 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
480 pre_mapping_fbv),
481 posting_list_serializer.get()));
482 }
483
484 // Create instance.
485 auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
486 filesystem, std::move(working_path), std::move(posting_list_serializer),
487 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
488 std::move(property_to_storage_map), std::move(wildcard_property_storage),
489 std::move(wildcard_properties_set), std::move(wildcard_index_storage),
490 num_data_threshold_for_bucket_split, pre_mapping_fbv));
491
492 // Initialize existing PersistentStorage. Checksums will be validated.
493 ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
494
495 // Validate magic.
496 if (integer_index->info().magic != Info::kMagic) {
497 return absl_ports::FailedPreconditionError("Incorrect magic value");
498 }
499
500 // If num_data_threshold_for_bucket_split mismatches, then return error to let
501 // caller rebuild.
502 if (integer_index->info().num_data_threshold_for_bucket_split !=
503 num_data_threshold_for_bucket_split) {
504 return absl_ports::FailedPreconditionError(
505 "Mismatch num_data_threshold_for_bucket_split");
506 }
507
508 return integer_index;
509 }
510
511 libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
TransferIntegerIndexStorage(const std::vector<DocumentId> & document_id_old_to_new,const IntegerIndexStorage * old_storage,const std::string & property_path,IntegerIndex * new_integer_index) const512 IntegerIndex::TransferIntegerIndexStorage(
513 const std::vector<DocumentId>& document_id_old_to_new,
514 const IntegerIndexStorage* old_storage, const std::string& property_path,
515 IntegerIndex* new_integer_index) const {
516 std::string new_storage_working_path = GetPropertyIndexStoragePath(
517 new_integer_index->working_path_, property_path);
518 ICING_ASSIGN_OR_RETURN(
519 std::unique_ptr<IntegerIndexStorage> new_storage,
520 IntegerIndexStorage::Create(
521 new_integer_index->filesystem_, new_storage_working_path,
522 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
523 pre_mapping_fbv_),
524 new_integer_index->posting_list_serializer_.get()));
525
526 ICING_RETURN_IF_ERROR(
527 old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
528
529 if (new_storage->num_data() == 0) {
530 new_storage.reset();
531 ICING_RETURN_IF_ERROR(
532 IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
533 }
534 return new_storage;
535 }
536
TransferWildcardStorage(IntegerIndex * new_integer_index) const537 libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
538 IntegerIndex* new_integer_index) const {
539 auto property_storage = std::make_unique<WildcardPropertyStorage>();
540 property_storage->mutable_property_entries()->Reserve(
541 wildcard_properties_set_.size());
542 for (const std::string& property : wildcard_properties_set_) {
543 property_storage->add_property_entries(property);
544 }
545
546 ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
547 std::move(property_storage)));
548 new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
549 return libtextclassifier3::Status::OK;
550 }
551
TransferIndex(const std::vector<DocumentId> & document_id_old_to_new,IntegerIndex * new_integer_index) const552 libtextclassifier3::Status IntegerIndex::TransferIndex(
553 const std::vector<DocumentId>& document_id_old_to_new,
554 IntegerIndex* new_integer_index) const {
555 // Transfer over the integer index storages
556 std::unique_ptr<IntegerIndexStorage> new_storage;
557 for (const auto& [property_path, old_storage] : property_to_storage_map_) {
558 ICING_ASSIGN_OR_RETURN(
559 new_storage,
560 TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
561 property_path, new_integer_index));
562 if (new_storage != nullptr) {
563 new_integer_index->property_to_storage_map_.insert(
564 {property_path, std::move(new_storage)});
565 }
566 }
567 if (wildcard_index_storage_ != nullptr) {
568 ICING_ASSIGN_OR_RETURN(
569 new_storage,
570 TransferIntegerIndexStorage(
571 document_id_old_to_new, wildcard_index_storage_.get(),
572 std::string(kWildcardPropertyIndexFileName), new_integer_index));
573 if (new_storage != nullptr) {
574 new_integer_index->wildcard_index_storage_ = std::move(new_storage);
575
576 // The only time we need to copy over the list of properties using
577 // wildcard storage is if wildcard_index_storage and new_storage are both
578 // non-null. Otherwise, the new wildcard index storage won't have any
579 // data.
580 ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
581 }
582 }
583
584 return libtextclassifier3::Status::OK;
585 }
586
PersistStoragesToDisk()587 libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
588 if (is_initialized_ && !is_storage_dirty()) {
589 return libtextclassifier3::Status::OK;
590 }
591
592 for (auto& [_, storage] : property_to_storage_map_) {
593 ICING_RETURN_IF_ERROR(storage->PersistToDisk());
594 }
595 // No need to persist wildcard_properties_storage_. All calls to
596 // FileBackedProto::Write are fully written through at the time of the call.
597 if (wildcard_index_storage_) {
598 ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
599 }
600 is_storage_dirty_ = false;
601 return libtextclassifier3::Status::OK;
602 }
603
PersistMetadataToDisk()604 libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() {
605 if (is_initialized_ && !is_info_dirty() && !is_storage_dirty()) {
606 return libtextclassifier3::Status::OK;
607 }
608
609 // Changes should have been applied to the underlying file when using
610 // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
611 // extra safety step to ensure they are written out.
612 ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
613 is_info_dirty_ = false;
614 return libtextclassifier3::Status::OK;
615 }
616
UpdateStoragesChecksum()617 libtextclassifier3::StatusOr<Crc32> IntegerIndex::UpdateStoragesChecksum() {
618 if (is_initialized_ && !is_storage_dirty()) {
619 return Crc32(crcs().component_crcs.storages_crc);
620 }
621
622 // XOR all crcs of all storages. Since XOR is commutative and associative,
623 // the order doesn't matter.
624 uint32_t storages_checksum = 0;
625 for (auto& [property_path, storage] : property_to_storage_map_) {
626 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
627 storage_crc.Append(property_path);
628
629 storages_checksum ^= storage_crc.Get();
630 }
631
632 if (wildcard_index_storage_ != nullptr) {
633 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
634 wildcard_index_storage_->UpdateChecksums());
635 storages_checksum ^= storage_crc.Get();
636 }
637
638 // FileBackedProto always keeps its checksum up to date. So we just need to
639 // retrieve the checksum.
640 ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
641 wildcard_property_storage_->GetChecksum());
642 storages_checksum ^= wildcard_properties_crc.Get();
643
644 return Crc32(storages_checksum);
645 }
646
GetInfoChecksum() const647 libtextclassifier3::StatusOr<Crc32> IntegerIndex::GetInfoChecksum() const {
648 if (is_initialized_ && !is_info_dirty()) {
649 return Crc32(crcs().component_crcs.info_crc);
650 }
651 return info().GetChecksum();
652 }
653
GetStoragesChecksum() const654 libtextclassifier3::StatusOr<Crc32> IntegerIndex::GetStoragesChecksum() const {
655 if (is_initialized_ && !is_storage_dirty()) {
656 return Crc32(crcs().component_crcs.storages_crc);
657 }
658
659 // XOR all crcs of all storages. Since XOR is commutative and associative,
660 // the order doesn't matter.
661 uint32_t storages_checksum = 0;
662 for (auto& [property_path, storage] : property_to_storage_map_) {
663 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->GetChecksum());
664 storage_crc.Append(property_path);
665
666 storages_checksum ^= storage_crc.Get();
667 }
668
669 if (wildcard_index_storage_ != nullptr) {
670 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
671 wildcard_index_storage_->GetChecksum());
672 storages_checksum ^= storage_crc.Get();
673 }
674
675 ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
676 wildcard_property_storage_->GetChecksum());
677 storages_checksum ^= wildcard_properties_crc.Get();
678
679 return Crc32(storages_checksum);
680 }
681
682 } // namespace lib
683 } // namespace icing
684