xref: /aosp_15_r20/external/icing/icing/store/document-store.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker //      http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker 
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-store.h"
16*8b6cd535SAndroid Build Coastguard Worker 
17*8b6cd535SAndroid Build Coastguard Worker #include <cstdint>
18*8b6cd535SAndroid Build Coastguard Worker #include <limits>
19*8b6cd535SAndroid Build Coastguard Worker #include <memory>
20*8b6cd535SAndroid Build Coastguard Worker #include <optional>
21*8b6cd535SAndroid Build Coastguard Worker #include <string>
22*8b6cd535SAndroid Build Coastguard Worker #include <string_view>
23*8b6cd535SAndroid Build Coastguard Worker #include <unordered_map>
24*8b6cd535SAndroid Build Coastguard Worker #include <unordered_set>
25*8b6cd535SAndroid Build Coastguard Worker #include <utility>
26*8b6cd535SAndroid Build Coastguard Worker #include <vector>
27*8b6cd535SAndroid Build Coastguard Worker 
28*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h"
29*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/statusor.h"
30*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/annotate.h"
31*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h"
32*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/str_cat.h"
33*8b6cd535SAndroid Build Coastguard Worker #include "icing/feature-flags.h"
34*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/file-backed-proto-log.h"
35*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/file-backed-vector.h"
36*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/filesystem.h"
37*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/memory-mapped-file-backed-proto-log.h"
38*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/memory-mapped-file.h"
39*8b6cd535SAndroid Build Coastguard Worker #include "icing/file/portable-file-backed-proto-log.h"
40*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/core/icing-string-util.h"
41*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/debug.pb.h"
42*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/document.pb.h"
43*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/document_wrapper.pb.h"
44*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/internal/scorable_property_set.pb.h"
45*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/logging.pb.h"
46*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/optimize.pb.h"
47*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/persist.pb.h"
48*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/schema.pb.h"
49*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/storage.pb.h"
50*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/usage.pb.h"
51*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/property-util.h"
52*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/schema-store.h"
53*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/scorable_property_manager.h"
54*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/blob-store.h"
55*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/corpus-associated-scoring-data.h"
56*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/corpus-id.h"
57*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-associated-score-data.h"
58*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-filter-data.h"
59*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-id.h"
60*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-log-creator.h"
61*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/dynamic-trie-key-mapper.h"
62*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/key-mapper.h"
63*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/namespace-id-fingerprint.h"
64*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/namespace-id.h"
65*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/persistent-hash-map-key-mapper.h"
66*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/usage-store.h"
67*8b6cd535SAndroid Build Coastguard Worker #include "icing/tokenization/language-segmenter.h"
68*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/clock.h"
69*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/crc32.h"
70*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/data-loss.h"
71*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/fingerprint-util.h"
72*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/logging.h"
73*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/scorable_property_set.h"
74*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/status-macros.h"
75*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/tokenized-document.h"
76*8b6cd535SAndroid Build Coastguard Worker 
77*8b6cd535SAndroid Build Coastguard Worker namespace icing {
78*8b6cd535SAndroid Build Coastguard Worker namespace lib {
79*8b6cd535SAndroid Build Coastguard Worker 
80*8b6cd535SAndroid Build Coastguard Worker namespace {
81*8b6cd535SAndroid Build Coastguard Worker 
82*8b6cd535SAndroid Build Coastguard Worker // Used in DocumentId mapper to mark a document as deleted
83*8b6cd535SAndroid Build Coastguard Worker constexpr int64_t kDocDeletedFlag = -1;
84*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kInvalidScorablePropertyCacheIndex = -1;
85*8b6cd535SAndroid Build Coastguard Worker constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
86*8b6cd535SAndroid Build Coastguard Worker constexpr char kUriHashMapperWorkingPath[] = "uri_mapper";
87*8b6cd535SAndroid Build Coastguard Worker constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
88*8b6cd535SAndroid Build Coastguard Worker constexpr char kScoreCacheFilename[] = "score_cache";
89*8b6cd535SAndroid Build Coastguard Worker constexpr char kScorablePropertyCacheFilename[] = "scorable_property_cache";
90*8b6cd535SAndroid Build Coastguard Worker constexpr char kCorpusScoreCache[] = "corpus_score_cache";
91*8b6cd535SAndroid Build Coastguard Worker constexpr char kFilterCacheFilename[] = "filter_cache";
92*8b6cd535SAndroid Build Coastguard Worker constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
93*8b6cd535SAndroid Build Coastguard Worker constexpr char kUsageStoreDirectoryName[] = "usage_store";
94*8b6cd535SAndroid Build Coastguard Worker constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
95*8b6cd535SAndroid Build Coastguard Worker 
96*8b6cd535SAndroid Build Coastguard Worker // Determined through manual testing to allow for 4 million uris. 4 million
97*8b6cd535SAndroid Build Coastguard Worker // because we allow up to 4 million DocumentIds.
98*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kUriDynamicTrieKeyMapperMaxSize =
99*8b6cd535SAndroid Build Coastguard Worker     144 * 1024 * 1024;  // 144 MiB
100*8b6cd535SAndroid Build Coastguard Worker 
101*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kUriHashKeyMapperMaxNumEntries =
102*8b6cd535SAndroid Build Coastguard Worker     kMaxDocumentId + 1;  // 1 << 22, 4M
103*8b6cd535SAndroid Build Coastguard Worker // - Key: namespace_id_str (3 bytes) + fingerprinted_uri (10 bytes) + '\0' (1
104*8b6cd535SAndroid Build Coastguard Worker //        byte)
105*8b6cd535SAndroid Build Coastguard Worker // - Value: DocumentId (4 bytes)
106*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kUriHashKeyMapperKVByteSize = 13 + 1 + sizeof(DocumentId);
107*8b6cd535SAndroid Build Coastguard Worker 
108*8b6cd535SAndroid Build Coastguard Worker // 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a
109*8b6cd535SAndroid Build Coastguard Worker // max of 128 KiB for storage.
110*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024;  // 384 KiB
111*8b6cd535SAndroid Build Coastguard Worker constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024;     // 384 KiB
112*8b6cd535SAndroid Build Coastguard Worker 
CreateDocumentWrapper(DocumentProto && document)113*8b6cd535SAndroid Build Coastguard Worker DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
114*8b6cd535SAndroid Build Coastguard Worker   DocumentWrapper document_wrapper;
115*8b6cd535SAndroid Build Coastguard Worker   *document_wrapper.mutable_document() = std::move(document);
116*8b6cd535SAndroid Build Coastguard Worker   return document_wrapper;
117*8b6cd535SAndroid Build Coastguard Worker }
118*8b6cd535SAndroid Build Coastguard Worker 
MakeHeaderFilename(const std::string & base_dir)119*8b6cd535SAndroid Build Coastguard Worker std::string MakeHeaderFilename(const std::string& base_dir) {
120*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
121*8b6cd535SAndroid Build Coastguard Worker }
122*8b6cd535SAndroid Build Coastguard Worker 
MakeUriHashMapperWorkingPath(const std::string & base_dir)123*8b6cd535SAndroid Build Coastguard Worker std::string MakeUriHashMapperWorkingPath(const std::string& base_dir) {
124*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kUriHashMapperWorkingPath);
125*8b6cd535SAndroid Build Coastguard Worker }
126*8b6cd535SAndroid Build Coastguard Worker 
MakeDocumentIdMapperFilename(const std::string & base_dir)127*8b6cd535SAndroid Build Coastguard Worker std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
128*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
129*8b6cd535SAndroid Build Coastguard Worker }
130*8b6cd535SAndroid Build Coastguard Worker 
MakeScoreCacheFilename(const std::string & base_dir)131*8b6cd535SAndroid Build Coastguard Worker std::string MakeScoreCacheFilename(const std::string& base_dir) {
132*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
133*8b6cd535SAndroid Build Coastguard Worker }
134*8b6cd535SAndroid Build Coastguard Worker 
MakeScorablePropertyCacheFilename(const std::string & base_dir)135*8b6cd535SAndroid Build Coastguard Worker std::string MakeScorablePropertyCacheFilename(const std::string& base_dir) {
136*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kScorablePropertyCacheFilename);
137*8b6cd535SAndroid Build Coastguard Worker }
138*8b6cd535SAndroid Build Coastguard Worker 
MakeCorpusScoreCache(const std::string & base_dir)139*8b6cd535SAndroid Build Coastguard Worker std::string MakeCorpusScoreCache(const std::string& base_dir) {
140*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kCorpusScoreCache);
141*8b6cd535SAndroid Build Coastguard Worker }
142*8b6cd535SAndroid Build Coastguard Worker 
MakeFilterCacheFilename(const std::string & base_dir)143*8b6cd535SAndroid Build Coastguard Worker std::string MakeFilterCacheFilename(const std::string& base_dir) {
144*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kFilterCacheFilename);
145*8b6cd535SAndroid Build Coastguard Worker }
146*8b6cd535SAndroid Build Coastguard Worker 
MakeNamespaceMapperFilename(const std::string & base_dir)147*8b6cd535SAndroid Build Coastguard Worker std::string MakeNamespaceMapperFilename(const std::string& base_dir) {
148*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename);
149*8b6cd535SAndroid Build Coastguard Worker }
150*8b6cd535SAndroid Build Coastguard Worker 
MakeUsageStoreDirectoryName(const std::string & base_dir)151*8b6cd535SAndroid Build Coastguard Worker std::string MakeUsageStoreDirectoryName(const std::string& base_dir) {
152*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
153*8b6cd535SAndroid Build Coastguard Worker }
154*8b6cd535SAndroid Build Coastguard Worker 
MakeCorpusMapperFilename(const std::string & base_dir)155*8b6cd535SAndroid Build Coastguard Worker std::string MakeCorpusMapperFilename(const std::string& base_dir) {
156*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
157*8b6cd535SAndroid Build Coastguard Worker }
158*8b6cd535SAndroid Build Coastguard Worker 
CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,int64_t ttl_ms)159*8b6cd535SAndroid Build Coastguard Worker int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
160*8b6cd535SAndroid Build Coastguard Worker                                        int64_t ttl_ms) {
161*8b6cd535SAndroid Build Coastguard Worker   if (ttl_ms == 0) {
162*8b6cd535SAndroid Build Coastguard Worker     // Special case where a TTL of 0 indicates the document should never
163*8b6cd535SAndroid Build Coastguard Worker     // expire. int64_t max, interpreted as seconds since epoch, represents
164*8b6cd535SAndroid Build Coastguard Worker     // some point in the year 292,277,026,596. So we're probably ok to use
165*8b6cd535SAndroid Build Coastguard Worker     // this as "never reaching this point".
166*8b6cd535SAndroid Build Coastguard Worker     return std::numeric_limits<int64_t>::max();
167*8b6cd535SAndroid Build Coastguard Worker   }
168*8b6cd535SAndroid Build Coastguard Worker 
169*8b6cd535SAndroid Build Coastguard Worker   int64_t expiration_timestamp_ms;
170*8b6cd535SAndroid Build Coastguard Worker   if (__builtin_add_overflow(creation_timestamp_ms, ttl_ms,
171*8b6cd535SAndroid Build Coastguard Worker                              &expiration_timestamp_ms)) {
172*8b6cd535SAndroid Build Coastguard Worker     // Overflow detected. Treat overflow as the same behavior of just int64_t
173*8b6cd535SAndroid Build Coastguard Worker     // max
174*8b6cd535SAndroid Build Coastguard Worker     return std::numeric_limits<int64_t>::max();
175*8b6cd535SAndroid Build Coastguard Worker   }
176*8b6cd535SAndroid Build Coastguard Worker 
177*8b6cd535SAndroid Build Coastguard Worker   return expiration_timestamp_ms;
178*8b6cd535SAndroid Build Coastguard Worker }
179*8b6cd535SAndroid Build Coastguard Worker 
GetRecoveryCause(const DocumentLogCreator::CreateResult & create_result,bool force_recovery_and_revalidate_documents)180*8b6cd535SAndroid Build Coastguard Worker InitializeStatsProto::RecoveryCause GetRecoveryCause(
181*8b6cd535SAndroid Build Coastguard Worker     const DocumentLogCreator::CreateResult& create_result,
182*8b6cd535SAndroid Build Coastguard Worker     bool force_recovery_and_revalidate_documents) {
183*8b6cd535SAndroid Build Coastguard Worker   if (force_recovery_and_revalidate_documents) {
184*8b6cd535SAndroid Build Coastguard Worker     return InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC;
185*8b6cd535SAndroid Build Coastguard Worker   } else if (create_result.log_create_result.has_data_loss()) {
186*8b6cd535SAndroid Build Coastguard Worker     return InitializeStatsProto::DATA_LOSS;
187*8b6cd535SAndroid Build Coastguard Worker   } else if (create_result.preexisting_file_version !=
188*8b6cd535SAndroid Build Coastguard Worker              DocumentLogCreator::kCurrentVersion) {
189*8b6cd535SAndroid Build Coastguard Worker     return InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT;
190*8b6cd535SAndroid Build Coastguard Worker   }
191*8b6cd535SAndroid Build Coastguard Worker   return InitializeStatsProto::NONE;
192*8b6cd535SAndroid Build Coastguard Worker }
193*8b6cd535SAndroid Build Coastguard Worker 
GetDataStatus(DataLoss data_loss)194*8b6cd535SAndroid Build Coastguard Worker InitializeStatsProto::DocumentStoreDataStatus GetDataStatus(
195*8b6cd535SAndroid Build Coastguard Worker     DataLoss data_loss) {
196*8b6cd535SAndroid Build Coastguard Worker   switch (data_loss) {
197*8b6cd535SAndroid Build Coastguard Worker     case DataLoss::PARTIAL:
198*8b6cd535SAndroid Build Coastguard Worker       return InitializeStatsProto::PARTIAL_LOSS;
199*8b6cd535SAndroid Build Coastguard Worker     case DataLoss::COMPLETE:
200*8b6cd535SAndroid Build Coastguard Worker       return InitializeStatsProto::COMPLETE_LOSS;
201*8b6cd535SAndroid Build Coastguard Worker     case DataLoss::NONE:
202*8b6cd535SAndroid Build Coastguard Worker       return InitializeStatsProto::NO_DATA_LOSS;
203*8b6cd535SAndroid Build Coastguard Worker   }
204*8b6cd535SAndroid Build Coastguard Worker }
205*8b6cd535SAndroid Build Coastguard Worker 
GetNamespaceIdsToNamespaces(const KeyMapper<NamespaceId> * key_mapper)206*8b6cd535SAndroid Build Coastguard Worker std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
207*8b6cd535SAndroid Build Coastguard Worker     const KeyMapper<NamespaceId>* key_mapper) {
208*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<NamespaceId, std::string> namespace_ids_to_namespaces;
209*8b6cd535SAndroid Build Coastguard Worker 
210*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<typename KeyMapper<NamespaceId>::Iterator> itr =
211*8b6cd535SAndroid Build Coastguard Worker       key_mapper->GetIterator();
212*8b6cd535SAndroid Build Coastguard Worker   while (itr->Advance()) {
213*8b6cd535SAndroid Build Coastguard Worker     namespace_ids_to_namespaces.insert(
214*8b6cd535SAndroid Build Coastguard Worker         {itr->GetValue(), std::string(itr->GetKey())});
215*8b6cd535SAndroid Build Coastguard Worker   }
216*8b6cd535SAndroid Build Coastguard Worker   return namespace_ids_to_namespaces;
217*8b6cd535SAndroid Build Coastguard Worker }
218*8b6cd535SAndroid Build Coastguard Worker 
219*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<std::unique_ptr<
220*8b6cd535SAndroid Build Coastguard Worker     KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>>
CreateUriMapper(const Filesystem & filesystem,const std::string & base_dir,bool use_persistent_hash_map)221*8b6cd535SAndroid Build Coastguard Worker CreateUriMapper(const Filesystem& filesystem, const std::string& base_dir,
222*8b6cd535SAndroid Build Coastguard Worker                 bool use_persistent_hash_map) {
223*8b6cd535SAndroid Build Coastguard Worker   std::string uri_hash_mapper_working_path =
224*8b6cd535SAndroid Build Coastguard Worker       MakeUriHashMapperWorkingPath(base_dir);
225*8b6cd535SAndroid Build Coastguard Worker   // Due to historic issue, we use document store's base_dir directly as
226*8b6cd535SAndroid Build Coastguard Worker   // DynamicTrieKeyMapper's working directory for uri mapper.
227*8b6cd535SAndroid Build Coastguard Worker   // DynamicTrieKeyMapper also creates a subdirectory "key_mapper_dir", so the
228*8b6cd535SAndroid Build Coastguard Worker   // actual files will be put under "<base_dir>/key_mapper_dir/".
229*8b6cd535SAndroid Build Coastguard Worker   bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
230*8b6cd535SAndroid Build Coastguard Worker       absl_ports::StrCat(base_dir, "/key_mapper_dir").c_str());
231*8b6cd535SAndroid Build Coastguard Worker   bool persistent_hash_map_dir_exists =
232*8b6cd535SAndroid Build Coastguard Worker       filesystem.DirectoryExists(uri_hash_mapper_working_path.c_str());
233*8b6cd535SAndroid Build Coastguard Worker   if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
234*8b6cd535SAndroid Build Coastguard Worker       (!use_persistent_hash_map && persistent_hash_map_dir_exists)) {
235*8b6cd535SAndroid Build Coastguard Worker     // Return a failure here so that the caller can properly delete and rebuild
236*8b6cd535SAndroid Build Coastguard Worker     // this component.
237*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::FailedPreconditionError("Key mapper type mismatch");
238*8b6cd535SAndroid Build Coastguard Worker   }
239*8b6cd535SAndroid Build Coastguard Worker 
240*8b6cd535SAndroid Build Coastguard Worker   if (use_persistent_hash_map) {
241*8b6cd535SAndroid Build Coastguard Worker     return PersistentHashMapKeyMapper<
242*8b6cd535SAndroid Build Coastguard Worker         DocumentId, fingerprint_util::FingerprintStringFormatter>::
243*8b6cd535SAndroid Build Coastguard Worker         Create(filesystem, std::move(uri_hash_mapper_working_path),
244*8b6cd535SAndroid Build Coastguard Worker                /*pre_mapping_fbv=*/false,
245*8b6cd535SAndroid Build Coastguard Worker                /*max_num_entries=*/kUriHashKeyMapperMaxNumEntries,
246*8b6cd535SAndroid Build Coastguard Worker                /*average_kv_byte_size=*/kUriHashKeyMapperKVByteSize);
247*8b6cd535SAndroid Build Coastguard Worker   } else {
248*8b6cd535SAndroid Build Coastguard Worker     return DynamicTrieKeyMapper<DocumentId,
249*8b6cd535SAndroid Build Coastguard Worker                                 fingerprint_util::FingerprintStringFormatter>::
250*8b6cd535SAndroid Build Coastguard Worker         Create(filesystem, base_dir, kUriDynamicTrieKeyMapperMaxSize);
251*8b6cd535SAndroid Build Coastguard Worker   }
252*8b6cd535SAndroid Build Coastguard Worker }
253*8b6cd535SAndroid Build Coastguard Worker 
254*8b6cd535SAndroid Build Coastguard Worker // Find the existing blob handles in the given document and remove them from the
255*8b6cd535SAndroid Build Coastguard Worker // dead_blob_handles set. Those are the blob handles that are still in use.
256*8b6cd535SAndroid Build Coastguard Worker //
257*8b6cd535SAndroid Build Coastguard Worker // This method is flag-guarded by the flag enable_blob_store. If the flag is
258*8b6cd535SAndroid Build Coastguard Worker // disabled, the dead_blob_handles must be empty and this method will be a
259*8b6cd535SAndroid Build Coastguard Worker // no-op.
260*8b6cd535SAndroid Build Coastguard Worker //
261*8b6cd535SAndroid Build Coastguard Worker // The type_blob_map is a map from schema type to a set of blob property names.
RemoveAliveBlobHandles(const DocumentProto & document,const std::unordered_map<std::string,std::vector<std::string>> & type_blob_property_map,std::unordered_set<std::string> & dead_blob_handles)262*8b6cd535SAndroid Build Coastguard Worker void RemoveAliveBlobHandles(
263*8b6cd535SAndroid Build Coastguard Worker     const DocumentProto& document,
264*8b6cd535SAndroid Build Coastguard Worker     const std::unordered_map<std::string, std::vector<std::string>>&
265*8b6cd535SAndroid Build Coastguard Worker         type_blob_property_map,
266*8b6cd535SAndroid Build Coastguard Worker     std::unordered_set<std::string>& dead_blob_handles) {
267*8b6cd535SAndroid Build Coastguard Worker   if (dead_blob_handles.empty() ||
268*8b6cd535SAndroid Build Coastguard Worker       type_blob_property_map.find(document.schema()) ==
269*8b6cd535SAndroid Build Coastguard Worker           type_blob_property_map.end()) {
270*8b6cd535SAndroid Build Coastguard Worker     // This document does not have any blob properties.
271*8b6cd535SAndroid Build Coastguard Worker     return;
272*8b6cd535SAndroid Build Coastguard Worker   }
273*8b6cd535SAndroid Build Coastguard Worker   const std::vector<std::string>& blob_property_paths =
274*8b6cd535SAndroid Build Coastguard Worker       type_blob_property_map.at(document.schema());
275*8b6cd535SAndroid Build Coastguard Worker 
276*8b6cd535SAndroid Build Coastguard Worker   for (const std::string& blob_property_path : blob_property_paths) {
277*8b6cd535SAndroid Build Coastguard Worker     auto content_or = property_util::ExtractPropertyValuesFromDocument<
278*8b6cd535SAndroid Build Coastguard Worker         PropertyProto::BlobHandleProto>(document, blob_property_path);
279*8b6cd535SAndroid Build Coastguard Worker     if (content_or.ok()) {
280*8b6cd535SAndroid Build Coastguard Worker       for (const PropertyProto::BlobHandleProto& blob_handle :
281*8b6cd535SAndroid Build Coastguard Worker            content_or.ValueOrDie()) {
282*8b6cd535SAndroid Build Coastguard Worker         dead_blob_handles.erase(BlobStore::BuildBlobHandleStr(blob_handle));
283*8b6cd535SAndroid Build Coastguard Worker       }
284*8b6cd535SAndroid Build Coastguard Worker     }
285*8b6cd535SAndroid Build Coastguard Worker   }
286*8b6cd535SAndroid Build Coastguard Worker }
287*8b6cd535SAndroid Build Coastguard Worker 
288*8b6cd535SAndroid Build Coastguard Worker }  // namespace
289*8b6cd535SAndroid Build Coastguard Worker 
DocumentStore(const Filesystem * filesystem,const std::string_view base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags * feature_flags,bool pre_mapping_fbv,bool use_persistent_hash_map,int32_t compression_level)290*8b6cd535SAndroid Build Coastguard Worker DocumentStore::DocumentStore(const Filesystem* filesystem,
291*8b6cd535SAndroid Build Coastguard Worker                              const std::string_view base_dir,
292*8b6cd535SAndroid Build Coastguard Worker                              const Clock* clock,
293*8b6cd535SAndroid Build Coastguard Worker                              const SchemaStore* schema_store,
294*8b6cd535SAndroid Build Coastguard Worker                              const FeatureFlags* feature_flags,
295*8b6cd535SAndroid Build Coastguard Worker                              bool pre_mapping_fbv, bool use_persistent_hash_map,
296*8b6cd535SAndroid Build Coastguard Worker                              int32_t compression_level)
297*8b6cd535SAndroid Build Coastguard Worker     : filesystem_(filesystem),
298*8b6cd535SAndroid Build Coastguard Worker       base_dir_(base_dir),
299*8b6cd535SAndroid Build Coastguard Worker       clock_(*clock),
300*8b6cd535SAndroid Build Coastguard Worker       feature_flags_(*feature_flags),
301*8b6cd535SAndroid Build Coastguard Worker       schema_store_(schema_store),
302*8b6cd535SAndroid Build Coastguard Worker       document_validator_(schema_store),
303*8b6cd535SAndroid Build Coastguard Worker       pre_mapping_fbv_(pre_mapping_fbv),
304*8b6cd535SAndroid Build Coastguard Worker       use_persistent_hash_map_(use_persistent_hash_map),
305*8b6cd535SAndroid Build Coastguard Worker       compression_level_(compression_level) {}
306*8b6cd535SAndroid Build Coastguard Worker 
Put(const DocumentProto & document,int32_t num_tokens,PutDocumentStatsProto * put_document_stats)307*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::PutResult> DocumentStore::Put(
308*8b6cd535SAndroid Build Coastguard Worker     const DocumentProto& document, int32_t num_tokens,
309*8b6cd535SAndroid Build Coastguard Worker     PutDocumentStatsProto* put_document_stats) {
310*8b6cd535SAndroid Build Coastguard Worker   return Put(DocumentProto(document), num_tokens, put_document_stats);
311*8b6cd535SAndroid Build Coastguard Worker }
312*8b6cd535SAndroid Build Coastguard Worker 
Put(DocumentProto && document,int32_t num_tokens,PutDocumentStatsProto * put_document_stats)313*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::PutResult> DocumentStore::Put(
314*8b6cd535SAndroid Build Coastguard Worker     DocumentProto&& document, int32_t num_tokens,
315*8b6cd535SAndroid Build Coastguard Worker     PutDocumentStatsProto* put_document_stats) {
316*8b6cd535SAndroid Build Coastguard Worker   document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
317*8b6cd535SAndroid Build Coastguard Worker   return InternalPut(std::move(document), put_document_stats);
318*8b6cd535SAndroid Build Coastguard Worker }
319*8b6cd535SAndroid Build Coastguard Worker 
~DocumentStore()320*8b6cd535SAndroid Build Coastguard Worker DocumentStore::~DocumentStore() {
321*8b6cd535SAndroid Build Coastguard Worker   if (initialized_) {
322*8b6cd535SAndroid Build Coastguard Worker     if (!PersistToDisk(PersistType::FULL).ok()) {
323*8b6cd535SAndroid Build Coastguard Worker       ICING_LOG(ERROR)
324*8b6cd535SAndroid Build Coastguard Worker           << "Error persisting to disk in DocumentStore destructor";
325*8b6cd535SAndroid Build Coastguard Worker     }
326*8b6cd535SAndroid Build Coastguard Worker   }
327*8b6cd535SAndroid Build Coastguard Worker }
328*8b6cd535SAndroid Build Coastguard Worker 
Create(const Filesystem * filesystem,const std::string & base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags * feature_flags,bool force_recovery_and_revalidate_documents,bool pre_mapping_fbv,bool use_persistent_hash_map,int32_t compression_level,InitializeStatsProto * initialize_stats)329*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
330*8b6cd535SAndroid Build Coastguard Worker     const Filesystem* filesystem, const std::string& base_dir,
331*8b6cd535SAndroid Build Coastguard Worker     const Clock* clock, const SchemaStore* schema_store,
332*8b6cd535SAndroid Build Coastguard Worker     const FeatureFlags* feature_flags,
333*8b6cd535SAndroid Build Coastguard Worker     bool force_recovery_and_revalidate_documents, bool pre_mapping_fbv,
334*8b6cd535SAndroid Build Coastguard Worker     bool use_persistent_hash_map, int32_t compression_level,
335*8b6cd535SAndroid Build Coastguard Worker     InitializeStatsProto* initialize_stats) {
336*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_ERROR_IF_NULL(filesystem);
337*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_ERROR_IF_NULL(clock);
338*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_ERROR_IF_NULL(schema_store);
339*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_ERROR_IF_NULL(feature_flags);
340*8b6cd535SAndroid Build Coastguard Worker 
341*8b6cd535SAndroid Build Coastguard Worker   auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
342*8b6cd535SAndroid Build Coastguard Worker       filesystem, base_dir, clock, schema_store, feature_flags, pre_mapping_fbv,
343*8b6cd535SAndroid Build Coastguard Worker       use_persistent_hash_map, compression_level));
344*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
345*8b6cd535SAndroid Build Coastguard Worker       InitializeResult initialize_result,
346*8b6cd535SAndroid Build Coastguard Worker       document_store->Initialize(force_recovery_and_revalidate_documents,
347*8b6cd535SAndroid Build Coastguard Worker                                  initialize_stats));
348*8b6cd535SAndroid Build Coastguard Worker 
349*8b6cd535SAndroid Build Coastguard Worker   CreateResult create_result;
350*8b6cd535SAndroid Build Coastguard Worker   create_result.document_store = std::move(document_store);
351*8b6cd535SAndroid Build Coastguard Worker   create_result.data_loss = initialize_result.data_loss;
352*8b6cd535SAndroid Build Coastguard Worker   create_result.derived_files_regenerated =
353*8b6cd535SAndroid Build Coastguard Worker       initialize_result.derived_files_regenerated;
354*8b6cd535SAndroid Build Coastguard Worker   return create_result;
355*8b6cd535SAndroid Build Coastguard Worker }
356*8b6cd535SAndroid Build Coastguard Worker 
DiscardDerivedFiles(const Filesystem * filesystem,const std::string & base_dir)357*8b6cd535SAndroid Build Coastguard Worker /* static */ libtextclassifier3::Status DocumentStore::DiscardDerivedFiles(
358*8b6cd535SAndroid Build Coastguard Worker     const Filesystem* filesystem, const std::string& base_dir) {
359*8b6cd535SAndroid Build Coastguard Worker   // Header
360*8b6cd535SAndroid Build Coastguard Worker   const std::string header_filename = MakeHeaderFilename(base_dir);
361*8b6cd535SAndroid Build Coastguard Worker   if (!filesystem->DeleteFile(MakeHeaderFilename(base_dir).c_str())) {
362*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("Couldn't delete header file");
363*8b6cd535SAndroid Build Coastguard Worker   }
364*8b6cd535SAndroid Build Coastguard Worker 
365*8b6cd535SAndroid Build Coastguard Worker   // Document key mapper. Doesn't hurt to delete both dynamic trie and
366*8b6cd535SAndroid Build Coastguard Worker   // persistent hash map without checking.
367*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
368*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir));
369*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<DocumentId>::Delete(
370*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeUriHashMapperWorkingPath(base_dir)));
371*8b6cd535SAndroid Build Coastguard Worker 
372*8b6cd535SAndroid Build Coastguard Worker   // Document id mapper
373*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete(
374*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeDocumentIdMapperFilename(base_dir)));
375*8b6cd535SAndroid Build Coastguard Worker 
376*8b6cd535SAndroid Build Coastguard Worker   // Document associated score cache
377*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
378*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeScoreCacheFilename(base_dir)));
379*8b6cd535SAndroid Build Coastguard Worker 
380*8b6cd535SAndroid Build Coastguard Worker   // Filter cache
381*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
382*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeFilterCacheFilename(base_dir)));
383*8b6cd535SAndroid Build Coastguard Worker 
384*8b6cd535SAndroid Build Coastguard Worker   // Namespace mapper
385*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<NamespaceId>::Delete(
386*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeNamespaceMapperFilename(base_dir)));
387*8b6cd535SAndroid Build Coastguard Worker 
388*8b6cd535SAndroid Build Coastguard Worker   // Corpus mapper
389*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<CorpusId>::Delete(
390*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeCorpusMapperFilename(base_dir)));
391*8b6cd535SAndroid Build Coastguard Worker 
392*8b6cd535SAndroid Build Coastguard Worker   // Corpus associated score cache
393*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
394*8b6cd535SAndroid Build Coastguard Worker       *filesystem, MakeCorpusScoreCache(base_dir)));
395*8b6cd535SAndroid Build Coastguard Worker 
396*8b6cd535SAndroid Build Coastguard Worker   // Scorable Property Cache
397*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
398*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFileBackedProtoLog<ScorablePropertySetProto>::Delete(
399*8b6cd535SAndroid Build Coastguard Worker           *filesystem, MakeScorablePropertyCacheFilename(base_dir)));
400*8b6cd535SAndroid Build Coastguard Worker 
401*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
402*8b6cd535SAndroid Build Coastguard Worker }
403*8b6cd535SAndroid Build Coastguard Worker 
404*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::InitializeResult>
Initialize(bool force_recovery_and_revalidate_documents,InitializeStatsProto * initialize_stats)405*8b6cd535SAndroid Build Coastguard Worker DocumentStore::Initialize(bool force_recovery_and_revalidate_documents,
406*8b6cd535SAndroid Build Coastguard Worker                           InitializeStatsProto* initialize_stats) {
407*8b6cd535SAndroid Build Coastguard Worker   auto create_result_or =
408*8b6cd535SAndroid Build Coastguard Worker       DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
409*8b6cd535SAndroid Build Coastguard Worker 
410*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
411*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
412*8b6cd535SAndroid Build Coastguard Worker   if (!create_result_or.ok()) {
413*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << create_result_or.status().error_message()
414*8b6cd535SAndroid Build Coastguard Worker                      << "\nFailed to initialize DocumentLog.";
415*8b6cd535SAndroid Build Coastguard Worker     return create_result_or.status();
416*8b6cd535SAndroid Build Coastguard Worker   }
417*8b6cd535SAndroid Build Coastguard Worker   DocumentLogCreator::CreateResult create_result =
418*8b6cd535SAndroid Build Coastguard Worker       std::move(create_result_or).ValueOrDie();
419*8b6cd535SAndroid Build Coastguard Worker 
420*8b6cd535SAndroid Build Coastguard Worker   document_log_ = std::move(create_result.log_create_result.proto_log);
421*8b6cd535SAndroid Build Coastguard Worker   InitializeStatsProto::RecoveryCause recovery_cause =
422*8b6cd535SAndroid Build Coastguard Worker       GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
423*8b6cd535SAndroid Build Coastguard Worker 
424*8b6cd535SAndroid Build Coastguard Worker   bool derived_files_regenerated = false;
425*8b6cd535SAndroid Build Coastguard Worker   if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
426*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(INFO) << "Starting Document Store Recovery with cause="
427*8b6cd535SAndroid Build Coastguard Worker                     << recovery_cause << ", and create result { new_file="
428*8b6cd535SAndroid Build Coastguard Worker                     << create_result.new_file << ", preeisting_file_version="
429*8b6cd535SAndroid Build Coastguard Worker                     << create_result.preexisting_file_version << ", data_loss="
430*8b6cd535SAndroid Build Coastguard Worker                     << create_result.log_create_result.data_loss
431*8b6cd535SAndroid Build Coastguard Worker                     << "} and kCurrentVersion="
432*8b6cd535SAndroid Build Coastguard Worker                     << DocumentLogCreator::kCurrentVersion;
433*8b6cd535SAndroid Build Coastguard Worker     // We can't rely on any existing derived files. Recreate them from scratch.
434*8b6cd535SAndroid Build Coastguard Worker     // Currently happens if:
435*8b6cd535SAndroid Build Coastguard Worker     //   1) This is a new log and we don't have derived files yet
436*8b6cd535SAndroid Build Coastguard Worker     //   2) Client wanted us to force a regeneration.
437*8b6cd535SAndroid Build Coastguard Worker     //   3) Log has some data loss, can't rely on existing derived data.
438*8b6cd535SAndroid Build Coastguard Worker     std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
439*8b6cd535SAndroid Build Coastguard Worker     libtextclassifier3::Status status =
440*8b6cd535SAndroid Build Coastguard Worker         RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
441*8b6cd535SAndroid Build Coastguard Worker     if (recovery_cause != InitializeStatsProto::NONE) {
442*8b6cd535SAndroid Build Coastguard Worker       // Only consider it a recovery if the client forced a recovery or there
443*8b6cd535SAndroid Build Coastguard Worker       // was data loss. Otherwise, this could just be the first time we're
444*8b6cd535SAndroid Build Coastguard Worker       // initializing and generating derived files.
445*8b6cd535SAndroid Build Coastguard Worker       derived_files_regenerated = true;
446*8b6cd535SAndroid Build Coastguard Worker       if (initialize_stats != nullptr) {
447*8b6cd535SAndroid Build Coastguard Worker         initialize_stats->set_document_store_recovery_latency_ms(
448*8b6cd535SAndroid Build Coastguard Worker             document_recovery_timer->GetElapsedMilliseconds());
449*8b6cd535SAndroid Build Coastguard Worker         initialize_stats->set_document_store_recovery_cause(recovery_cause);
450*8b6cd535SAndroid Build Coastguard Worker         initialize_stats->set_document_store_data_status(
451*8b6cd535SAndroid Build Coastguard Worker             GetDataStatus(create_result.log_create_result.data_loss));
452*8b6cd535SAndroid Build Coastguard Worker       }
453*8b6cd535SAndroid Build Coastguard Worker     }
454*8b6cd535SAndroid Build Coastguard Worker     if (!status.ok()) {
455*8b6cd535SAndroid Build Coastguard Worker       ICING_LOG(ERROR)
456*8b6cd535SAndroid Build Coastguard Worker           << "Failed to regenerate derived files for DocumentStore";
457*8b6cd535SAndroid Build Coastguard Worker       return status;
458*8b6cd535SAndroid Build Coastguard Worker     }
459*8b6cd535SAndroid Build Coastguard Worker   } else {
460*8b6cd535SAndroid Build Coastguard Worker     if (!InitializeExistingDerivedFiles().ok()) {
461*8b6cd535SAndroid Build Coastguard Worker       ICING_LOG(WARNING)
462*8b6cd535SAndroid Build Coastguard Worker           << "Couldn't find derived files or failed to initialize them, "
463*8b6cd535SAndroid Build Coastguard Worker              "regenerating derived files for DocumentStore.";
464*8b6cd535SAndroid Build Coastguard Worker       std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
465*8b6cd535SAndroid Build Coastguard Worker       derived_files_regenerated = true;
466*8b6cd535SAndroid Build Coastguard Worker       libtextclassifier3::Status status = RegenerateDerivedFiles(
467*8b6cd535SAndroid Build Coastguard Worker           /*force_recovery_and_revalidate_documents=*/false);
468*8b6cd535SAndroid Build Coastguard Worker       if (initialize_stats != nullptr) {
469*8b6cd535SAndroid Build Coastguard Worker         initialize_stats->set_document_store_recovery_cause(
470*8b6cd535SAndroid Build Coastguard Worker             InitializeStatsProto::IO_ERROR);
471*8b6cd535SAndroid Build Coastguard Worker         initialize_stats->set_document_store_recovery_latency_ms(
472*8b6cd535SAndroid Build Coastguard Worker             document_recovery_timer->GetElapsedMilliseconds());
473*8b6cd535SAndroid Build Coastguard Worker       }
474*8b6cd535SAndroid Build Coastguard Worker       if (!status.ok()) {
475*8b6cd535SAndroid Build Coastguard Worker         ICING_LOG(ERROR)
476*8b6cd535SAndroid Build Coastguard Worker             << "Failed to regenerate derived files for DocumentStore";
477*8b6cd535SAndroid Build Coastguard Worker         return status;
478*8b6cd535SAndroid Build Coastguard Worker       }
479*8b6cd535SAndroid Build Coastguard Worker     }
480*8b6cd535SAndroid Build Coastguard Worker   }
481*8b6cd535SAndroid Build Coastguard Worker 
482*8b6cd535SAndroid Build Coastguard Worker   initialized_ = true;
483*8b6cd535SAndroid Build Coastguard Worker   if (initialize_stats != nullptr) {
484*8b6cd535SAndroid Build Coastguard Worker     initialize_stats->set_num_documents(document_id_mapper_->num_elements());
485*8b6cd535SAndroid Build Coastguard Worker   }
486*8b6cd535SAndroid Build Coastguard Worker 
487*8b6cd535SAndroid Build Coastguard Worker   InitializeResult initialize_result = {
488*8b6cd535SAndroid Build Coastguard Worker       .data_loss = create_result.log_create_result.data_loss,
489*8b6cd535SAndroid Build Coastguard Worker       .derived_files_regenerated = derived_files_regenerated};
490*8b6cd535SAndroid Build Coastguard Worker   return initialize_result;
491*8b6cd535SAndroid Build Coastguard Worker }
492*8b6cd535SAndroid Build Coastguard Worker 
InitializeExistingDerivedFiles()493*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
494*8b6cd535SAndroid Build Coastguard Worker   if (!HeaderExists()) {
495*8b6cd535SAndroid Build Coastguard Worker     // Without a header, we don't know if things are consistent between each
496*8b6cd535SAndroid Build Coastguard Worker     // other so the caller should just regenerate everything from ground
497*8b6cd535SAndroid Build Coastguard Worker     // truth.
498*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("DocumentStore header doesn't exist");
499*8b6cd535SAndroid Build Coastguard Worker   }
500*8b6cd535SAndroid Build Coastguard Worker 
501*8b6cd535SAndroid Build Coastguard Worker   DocumentStore::Header header;
502*8b6cd535SAndroid Build Coastguard Worker   if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header,
503*8b6cd535SAndroid Build Coastguard Worker                          sizeof(header))) {
504*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError(
505*8b6cd535SAndroid Build Coastguard Worker         absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
506*8b6cd535SAndroid Build Coastguard Worker   }
507*8b6cd535SAndroid Build Coastguard Worker 
508*8b6cd535SAndroid Build Coastguard Worker   if (header.magic != DocumentStore::Header::kMagic) {
509*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError(absl_ports::StrCat(
510*8b6cd535SAndroid Build Coastguard Worker         "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
511*8b6cd535SAndroid Build Coastguard Worker   }
512*8b6cd535SAndroid Build Coastguard Worker 
513*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
514*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
515*8b6cd535SAndroid Build Coastguard Worker   auto document_key_mapper_or =
516*8b6cd535SAndroid Build Coastguard Worker       CreateUriMapper(*filesystem_, base_dir_, use_persistent_hash_map_);
517*8b6cd535SAndroid Build Coastguard Worker   if (!document_key_mapper_or.ok()) {
518*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
519*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to initialize KeyMapper";
520*8b6cd535SAndroid Build Coastguard Worker     return document_key_mapper_or.status();
521*8b6cd535SAndroid Build Coastguard Worker   }
522*8b6cd535SAndroid Build Coastguard Worker   document_key_mapper_ = std::move(document_key_mapper_or).ValueOrDie();
523*8b6cd535SAndroid Build Coastguard Worker 
524*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
525*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
526*8b6cd535SAndroid Build Coastguard Worker   auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
527*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
528*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFile::READ_WRITE_AUTO_SYNC);
529*8b6cd535SAndroid Build Coastguard Worker   if (!document_id_mapper_or.ok()) {
530*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_id_mapper_or.status().error_message()
531*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to initialize DocumentIdMapper";
532*8b6cd535SAndroid Build Coastguard Worker     return document_id_mapper_or.status();
533*8b6cd535SAndroid Build Coastguard Worker   }
534*8b6cd535SAndroid Build Coastguard Worker   document_id_mapper_ = std::move(document_id_mapper_or).ValueOrDie();
535*8b6cd535SAndroid Build Coastguard Worker 
536*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(score_cache_,
537*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<DocumentAssociatedScoreData>::Create(
538*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeScoreCacheFilename(base_dir_),
539*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
540*8b6cd535SAndroid Build Coastguard Worker 
541*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
542*8b6cd535SAndroid Build Coastguard Worker       scorable_property_cache_,
543*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFileBackedProtoLog<ScorablePropertySetProto>::Create(
544*8b6cd535SAndroid Build Coastguard Worker           *filesystem_, MakeScorablePropertyCacheFilename(base_dir_)));
545*8b6cd535SAndroid Build Coastguard Worker 
546*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(filter_cache_,
547*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<DocumentFilterData>::Create(
548*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeFilterCacheFilename(base_dir_),
549*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
550*8b6cd535SAndroid Build Coastguard Worker 
551*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
552*8b6cd535SAndroid Build Coastguard Worker       namespace_mapper_,
553*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<NamespaceId>::Create(
554*8b6cd535SAndroid Build Coastguard Worker           *filesystem_, MakeNamespaceMapperFilename(base_dir_),
555*8b6cd535SAndroid Build Coastguard Worker           kNamespaceMapperMaxSize));
556*8b6cd535SAndroid Build Coastguard Worker 
557*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
558*8b6cd535SAndroid Build Coastguard Worker       usage_store_,
559*8b6cd535SAndroid Build Coastguard Worker       UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
560*8b6cd535SAndroid Build Coastguard Worker 
561*8b6cd535SAndroid Build Coastguard Worker   auto corpus_mapper_or =
562*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<CorpusId,
563*8b6cd535SAndroid Build Coastguard Worker                            fingerprint_util::FingerprintStringFormatter>::
564*8b6cd535SAndroid Build Coastguard Worker           Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
565*8b6cd535SAndroid Build Coastguard Worker                  kCorpusMapperMaxSize);
566*8b6cd535SAndroid Build Coastguard Worker   if (!corpus_mapper_or.ok()) {
567*8b6cd535SAndroid Build Coastguard Worker     return std::move(corpus_mapper_or).status();
568*8b6cd535SAndroid Build Coastguard Worker   }
569*8b6cd535SAndroid Build Coastguard Worker   corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
570*8b6cd535SAndroid Build Coastguard Worker 
571*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
572*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<CorpusAssociatedScoreData>::Create(
573*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeCorpusScoreCache(base_dir_),
574*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
575*8b6cd535SAndroid Build Coastguard Worker 
576*8b6cd535SAndroid Build Coastguard Worker   // Ensure the usage store is the correct size.
577*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
578*8b6cd535SAndroid Build Coastguard Worker       usage_store_->TruncateTo(document_id_mapper_->num_elements()));
579*8b6cd535SAndroid Build Coastguard Worker 
580*8b6cd535SAndroid Build Coastguard Worker   Crc32 expected_checksum(header.checksum);
581*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(Crc32 checksum, GetChecksum());
582*8b6cd535SAndroid Build Coastguard Worker   if (checksum != expected_checksum) {
583*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError(
584*8b6cd535SAndroid Build Coastguard Worker         "Combined checksum of DocStore was inconsistent");
585*8b6cd535SAndroid Build Coastguard Worker   }
586*8b6cd535SAndroid Build Coastguard Worker 
587*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
588*8b6cd535SAndroid Build Coastguard Worker }
589*8b6cd535SAndroid Build Coastguard Worker 
RegenerateDerivedFiles(bool revalidate_documents)590*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
591*8b6cd535SAndroid Build Coastguard Worker     bool revalidate_documents) {
592*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
593*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
594*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
595*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetScorablePropertyCache());
596*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetFilterCache());
597*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
598*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetCorpusMapper());
599*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(ResetCorpusAssociatedScoreCache());
600*8b6cd535SAndroid Build Coastguard Worker 
601*8b6cd535SAndroid Build Coastguard Worker   // Creates a new UsageStore instance. Note that we don't reset the data in
602*8b6cd535SAndroid Build Coastguard Worker   // usage store here because we're not able to regenerate the usage scores.
603*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
604*8b6cd535SAndroid Build Coastguard Worker       usage_store_,
605*8b6cd535SAndroid Build Coastguard Worker       UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
606*8b6cd535SAndroid Build Coastguard Worker 
607*8b6cd535SAndroid Build Coastguard Worker   // Iterates through document log
608*8b6cd535SAndroid Build Coastguard Worker   auto iterator = document_log_->GetIterator();
609*8b6cd535SAndroid Build Coastguard Worker   auto iterator_status = iterator.Advance();
610*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<int64_t> element_size =
611*8b6cd535SAndroid Build Coastguard Worker       document_log_->GetElementsFileSize();
612*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<int64_t> disk_usage =
613*8b6cd535SAndroid Build Coastguard Worker       document_log_->GetDiskUsage();
614*8b6cd535SAndroid Build Coastguard Worker   if (element_size.ok() && disk_usage.ok()) {
615*8b6cd535SAndroid Build Coastguard Worker     ICING_VLOG(1) << "Starting recovery of document store. Document store "
616*8b6cd535SAndroid Build Coastguard Worker                      "elements file size:"
617*8b6cd535SAndroid Build Coastguard Worker                   << element_size.ValueOrDie()
618*8b6cd535SAndroid Build Coastguard Worker                   << ", disk usage=" << disk_usage.ValueOrDie();
619*8b6cd535SAndroid Build Coastguard Worker   }
620*8b6cd535SAndroid Build Coastguard Worker   while (iterator_status.ok()) {
621*8b6cd535SAndroid Build Coastguard Worker     ICING_VLOG(2) << "Attempting to read document at offset="
622*8b6cd535SAndroid Build Coastguard Worker                   << iterator.GetOffset();
623*8b6cd535SAndroid Build Coastguard Worker     libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
624*8b6cd535SAndroid Build Coastguard Worker         document_log_->ReadProto(iterator.GetOffset());
625*8b6cd535SAndroid Build Coastguard Worker 
626*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(document_wrapper_or.status())) {
627*8b6cd535SAndroid Build Coastguard Worker       // The erased document still occupies 1 document id.
628*8b6cd535SAndroid Build Coastguard Worker       DocumentId new_document_id = document_id_mapper_->num_elements();
629*8b6cd535SAndroid Build Coastguard Worker       ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
630*8b6cd535SAndroid Build Coastguard Worker       iterator_status = iterator.Advance();
631*8b6cd535SAndroid Build Coastguard Worker       continue;
632*8b6cd535SAndroid Build Coastguard Worker     } else if (!document_wrapper_or.ok()) {
633*8b6cd535SAndroid Build Coastguard Worker       return document_wrapper_or.status();
634*8b6cd535SAndroid Build Coastguard Worker     }
635*8b6cd535SAndroid Build Coastguard Worker 
636*8b6cd535SAndroid Build Coastguard Worker     DocumentWrapper document_wrapper =
637*8b6cd535SAndroid Build Coastguard Worker         std::move(document_wrapper_or).ValueOrDie();
638*8b6cd535SAndroid Build Coastguard Worker     // Revalidate that this document is still compatible if requested.
639*8b6cd535SAndroid Build Coastguard Worker     if (revalidate_documents) {
640*8b6cd535SAndroid Build Coastguard Worker       if (!document_validator_.Validate(document_wrapper.document()).ok()) {
641*8b6cd535SAndroid Build Coastguard Worker         // Document is no longer valid with the current schema. Mark as
642*8b6cd535SAndroid Build Coastguard Worker         // deleted
643*8b6cd535SAndroid Build Coastguard Worker         DocumentId new_document_id = document_id_mapper_->num_elements();
644*8b6cd535SAndroid Build Coastguard Worker         ICING_RETURN_IF_ERROR(document_log_->EraseProto(iterator.GetOffset()));
645*8b6cd535SAndroid Build Coastguard Worker         ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
646*8b6cd535SAndroid Build Coastguard Worker         continue;
647*8b6cd535SAndroid Build Coastguard Worker       }
648*8b6cd535SAndroid Build Coastguard Worker     }
649*8b6cd535SAndroid Build Coastguard Worker 
650*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(
651*8b6cd535SAndroid Build Coastguard Worker         NamespaceId namespace_id,
652*8b6cd535SAndroid Build Coastguard Worker         namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
653*8b6cd535SAndroid Build Coastguard Worker                                     namespace_mapper_->num_keys()));
654*8b6cd535SAndroid Build Coastguard Worker 
655*8b6cd535SAndroid Build Coastguard Worker     // Updates key mapper and document_id mapper with the new document
656*8b6cd535SAndroid Build Coastguard Worker     DocumentId new_document_id = document_id_mapper_->num_elements();
657*8b6cd535SAndroid Build Coastguard Worker     NamespaceIdFingerprint new_doc_nsid_uri_fingerprint(
658*8b6cd535SAndroid Build Coastguard Worker         namespace_id, document_wrapper.document().uri());
659*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
660*8b6cd535SAndroid Build Coastguard Worker         new_doc_nsid_uri_fingerprint.EncodeToCString(), new_document_id));
661*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(
662*8b6cd535SAndroid Build Coastguard Worker         document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
663*8b6cd535SAndroid Build Coastguard Worker 
664*8b6cd535SAndroid Build Coastguard Worker     SchemaTypeId schema_type_id;
665*8b6cd535SAndroid Build Coastguard Worker     auto schema_type_id_or =
666*8b6cd535SAndroid Build Coastguard Worker         schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
667*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(schema_type_id_or.status())) {
668*8b6cd535SAndroid Build Coastguard Worker       // Didn't find a SchemaTypeId. This means that the DocumentStore and
669*8b6cd535SAndroid Build Coastguard Worker       // the SchemaStore are out of sync. But DocumentStore can't do
670*8b6cd535SAndroid Build Coastguard Worker       // anything about it so just ignore this for now. This should be
671*8b6cd535SAndroid Build Coastguard Worker       // detected/handled by the owner of DocumentStore. Set it to some
672*8b6cd535SAndroid Build Coastguard Worker       // arbitrary invalid value for now, it'll get updated to the correct
673*8b6cd535SAndroid Build Coastguard Worker       // ID later.
674*8b6cd535SAndroid Build Coastguard Worker       schema_type_id = -1;
675*8b6cd535SAndroid Build Coastguard Worker     } else if (!schema_type_id_or.ok()) {
676*8b6cd535SAndroid Build Coastguard Worker       // Real error. Pass it up
677*8b6cd535SAndroid Build Coastguard Worker       return schema_type_id_or.status();
678*8b6cd535SAndroid Build Coastguard Worker     } else {
679*8b6cd535SAndroid Build Coastguard Worker       // We're guaranteed that SchemaTypeId is valid now
680*8b6cd535SAndroid Build Coastguard Worker       schema_type_id = schema_type_id_or.ValueOrDie();
681*8b6cd535SAndroid Build Coastguard Worker     }
682*8b6cd535SAndroid Build Coastguard Worker 
683*8b6cd535SAndroid Build Coastguard Worker     // Update corpus maps
684*8b6cd535SAndroid Build Coastguard Worker     NamespaceIdFingerprint corpus_nsid_schema_fingerprint(
685*8b6cd535SAndroid Build Coastguard Worker         namespace_id, document_wrapper.document().schema());
686*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(CorpusId corpus_id,
687*8b6cd535SAndroid Build Coastguard Worker                            corpus_mapper_->GetOrPut(
688*8b6cd535SAndroid Build Coastguard Worker                                corpus_nsid_schema_fingerprint.EncodeToCString(),
689*8b6cd535SAndroid Build Coastguard Worker                                corpus_mapper_->num_keys()));
690*8b6cd535SAndroid Build Coastguard Worker 
691*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
692*8b6cd535SAndroid Build Coastguard Worker                            GetCorpusAssociatedScoreDataToUpdate(corpus_id));
693*8b6cd535SAndroid Build Coastguard Worker     scoring_data.AddDocument(
694*8b6cd535SAndroid Build Coastguard Worker         document_wrapper.document().internal_fields().length_in_tokens());
695*8b6cd535SAndroid Build Coastguard Worker 
696*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(
697*8b6cd535SAndroid Build Coastguard Worker         UpdateCorpusAssociatedScoreCache(corpus_id, scoring_data));
698*8b6cd535SAndroid Build Coastguard Worker 
699*8b6cd535SAndroid Build Coastguard Worker     int32_t scorable_property_cache_index = kInvalidScorablePropertyCacheIndex;
700*8b6cd535SAndroid Build Coastguard Worker     // Swallow the error when schema_type_id is not found, and skip updating the
701*8b6cd535SAndroid Build Coastguard Worker     // scorable property cache.
702*8b6cd535SAndroid Build Coastguard Worker     if (schema_type_id != -1) {
703*8b6cd535SAndroid Build Coastguard Worker       ICING_ASSIGN_OR_RETURN(scorable_property_cache_index,
704*8b6cd535SAndroid Build Coastguard Worker                              UpdateScorablePropertyCache(
705*8b6cd535SAndroid Build Coastguard Worker                                  document_wrapper.document(), schema_type_id));
706*8b6cd535SAndroid Build Coastguard Worker     }
707*8b6cd535SAndroid Build Coastguard Worker 
708*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
709*8b6cd535SAndroid Build Coastguard Worker         new_document_id,
710*8b6cd535SAndroid Build Coastguard Worker         DocumentAssociatedScoreData(
711*8b6cd535SAndroid Build Coastguard Worker             corpus_id, document_wrapper.document().score(),
712*8b6cd535SAndroid Build Coastguard Worker             document_wrapper.document().creation_timestamp_ms(),
713*8b6cd535SAndroid Build Coastguard Worker             scorable_property_cache_index,
714*8b6cd535SAndroid Build Coastguard Worker             document_wrapper.document().internal_fields().length_in_tokens())));
715*8b6cd535SAndroid Build Coastguard Worker 
716*8b6cd535SAndroid Build Coastguard Worker     int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
717*8b6cd535SAndroid Build Coastguard Worker         document_wrapper.document().creation_timestamp_ms(),
718*8b6cd535SAndroid Build Coastguard Worker         document_wrapper.document().ttl_ms());
719*8b6cd535SAndroid Build Coastguard Worker 
720*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(UpdateFilterCache(
721*8b6cd535SAndroid Build Coastguard Worker         new_document_id,
722*8b6cd535SAndroid Build Coastguard Worker         DocumentFilterData(namespace_id,
723*8b6cd535SAndroid Build Coastguard Worker                            new_doc_nsid_uri_fingerprint.fingerprint(),
724*8b6cd535SAndroid Build Coastguard Worker                            schema_type_id, expiration_timestamp_ms)));
725*8b6cd535SAndroid Build Coastguard Worker     iterator_status = iterator.Advance();
726*8b6cd535SAndroid Build Coastguard Worker   }
727*8b6cd535SAndroid Build Coastguard Worker 
728*8b6cd535SAndroid Build Coastguard Worker   if (!absl_ports::IsOutOfRange(iterator_status)) {
729*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(WARNING)
730*8b6cd535SAndroid Build Coastguard Worker         << "Failed to iterate through proto log while regenerating "
731*8b6cd535SAndroid Build Coastguard Worker            "derived files";
732*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::Annotate(iterator_status,
733*8b6cd535SAndroid Build Coastguard Worker                                 "Failed to iterate through proto log.");
734*8b6cd535SAndroid Build Coastguard Worker   }
735*8b6cd535SAndroid Build Coastguard Worker 
736*8b6cd535SAndroid Build Coastguard Worker   // Shrink usage_store_ to the correct size.
737*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
738*8b6cd535SAndroid Build Coastguard Worker       usage_store_->TruncateTo(document_id_mapper_->num_elements()));
739*8b6cd535SAndroid Build Coastguard Worker 
740*8b6cd535SAndroid Build Coastguard Worker   // Write the header
741*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateChecksum());
742*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
743*8b6cd535SAndroid Build Coastguard Worker }
744*8b6cd535SAndroid Build Coastguard Worker 
ResetDocumentKeyMapper()745*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
746*8b6cd535SAndroid Build Coastguard Worker   // Only one type of KeyMapper (either DynamicTrieKeyMapper or
747*8b6cd535SAndroid Build Coastguard Worker   // PersistentHashMapKeyMapper) will actually exist at any moment, but it is ok
748*8b6cd535SAndroid Build Coastguard Worker   // to call Delete() for both since Delete() returns OK if any of them doesn't
749*8b6cd535SAndroid Build Coastguard Worker   // exist.
750*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
751*8b6cd535SAndroid Build Coastguard Worker   document_key_mapper_.reset();
752*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
753*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
754*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status status =
755*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
756*8b6cd535SAndroid Build Coastguard Worker   if (!status.ok()) {
757*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status.error_message()
758*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to delete old dynamic trie key mapper";
759*8b6cd535SAndroid Build Coastguard Worker     return status;
760*8b6cd535SAndroid Build Coastguard Worker   }
761*8b6cd535SAndroid Build Coastguard Worker   status = PersistentHashMapKeyMapper<DocumentId>::Delete(
762*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeUriHashMapperWorkingPath(base_dir_));
763*8b6cd535SAndroid Build Coastguard Worker   if (!status.ok()) {
764*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status.error_message()
765*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to delete old persistent hash map key mapper";
766*8b6cd535SAndroid Build Coastguard Worker     return status;
767*8b6cd535SAndroid Build Coastguard Worker   }
768*8b6cd535SAndroid Build Coastguard Worker 
769*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
770*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
771*8b6cd535SAndroid Build Coastguard Worker   auto document_key_mapper_or =
772*8b6cd535SAndroid Build Coastguard Worker       CreateUriMapper(*filesystem_, base_dir_, use_persistent_hash_map_);
773*8b6cd535SAndroid Build Coastguard Worker   if (!document_key_mapper_or.ok()) {
774*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
775*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to re-init key mapper";
776*8b6cd535SAndroid Build Coastguard Worker     return document_key_mapper_or.status();
777*8b6cd535SAndroid Build Coastguard Worker   }
778*8b6cd535SAndroid Build Coastguard Worker   document_key_mapper_ = std::move(document_key_mapper_or).ValueOrDie();
779*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
780*8b6cd535SAndroid Build Coastguard Worker }
781*8b6cd535SAndroid Build Coastguard Worker 
ResetDocumentIdMapper()782*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
783*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
784*8b6cd535SAndroid Build Coastguard Worker   document_id_mapper_.reset();
785*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
786*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
787*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
788*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeDocumentIdMapperFilename(base_dir_));
789*8b6cd535SAndroid Build Coastguard Worker   if (!status.ok()) {
790*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status.error_message()
791*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to delete old document_id mapper";
792*8b6cd535SAndroid Build Coastguard Worker     return status;
793*8b6cd535SAndroid Build Coastguard Worker   }
794*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
795*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
796*8b6cd535SAndroid Build Coastguard Worker   auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
797*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
798*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFile::READ_WRITE_AUTO_SYNC);
799*8b6cd535SAndroid Build Coastguard Worker   if (!document_id_mapper_or.ok()) {
800*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_id_mapper_or.status().error_message()
801*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to re-init document_id mapper";
802*8b6cd535SAndroid Build Coastguard Worker     return document_id_mapper_or.status();
803*8b6cd535SAndroid Build Coastguard Worker   }
804*8b6cd535SAndroid Build Coastguard Worker   document_id_mapper_ = std::move(document_id_mapper_or).ValueOrDie();
805*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
806*8b6cd535SAndroid Build Coastguard Worker }
807*8b6cd535SAndroid Build Coastguard Worker 
ResetDocumentAssociatedScoreCache()808*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetDocumentAssociatedScoreCache() {
809*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
810*8b6cd535SAndroid Build Coastguard Worker   score_cache_.reset();
811*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
812*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeScoreCacheFilename(base_dir_)));
813*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(score_cache_,
814*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<DocumentAssociatedScoreData>::Create(
815*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeScoreCacheFilename(base_dir_),
816*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
817*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
818*8b6cd535SAndroid Build Coastguard Worker }
819*8b6cd535SAndroid Build Coastguard Worker 
ResetScorablePropertyCache()820*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetScorablePropertyCache() {
821*8b6cd535SAndroid Build Coastguard Worker   scorable_property_cache_.reset();
822*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
823*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFileBackedProtoLog<ScorablePropertySetProto>::Delete(
824*8b6cd535SAndroid Build Coastguard Worker           *filesystem_, MakeScorablePropertyCacheFilename(base_dir_)));
825*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
826*8b6cd535SAndroid Build Coastguard Worker       scorable_property_cache_,
827*8b6cd535SAndroid Build Coastguard Worker       MemoryMappedFileBackedProtoLog<ScorablePropertySetProto>::Create(
828*8b6cd535SAndroid Build Coastguard Worker           *filesystem_, MakeScorablePropertyCacheFilename(base_dir_)));
829*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
830*8b6cd535SAndroid Build Coastguard Worker }
831*8b6cd535SAndroid Build Coastguard Worker 
ResetCorpusAssociatedScoreCache()832*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetCorpusAssociatedScoreCache() {
833*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
834*8b6cd535SAndroid Build Coastguard Worker   corpus_score_cache_.reset();
835*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
836*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeCorpusScoreCache(base_dir_)));
837*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
838*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<CorpusAssociatedScoreData>::Create(
839*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeCorpusScoreCache(base_dir_),
840*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
841*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
842*8b6cd535SAndroid Build Coastguard Worker }
843*8b6cd535SAndroid Build Coastguard Worker 
ResetFilterCache()844*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetFilterCache() {
845*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
846*8b6cd535SAndroid Build Coastguard Worker   filter_cache_.reset();
847*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
848*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeFilterCacheFilename(base_dir_)));
849*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(filter_cache_,
850*8b6cd535SAndroid Build Coastguard Worker                          FileBackedVector<DocumentFilterData>::Create(
851*8b6cd535SAndroid Build Coastguard Worker                              *filesystem_, MakeFilterCacheFilename(base_dir_),
852*8b6cd535SAndroid Build Coastguard Worker                              MemoryMappedFile::READ_WRITE_AUTO_SYNC));
853*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
854*8b6cd535SAndroid Build Coastguard Worker }
855*8b6cd535SAndroid Build Coastguard Worker 
ResetNamespaceMapper()856*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
857*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
858*8b6cd535SAndroid Build Coastguard Worker   namespace_mapper_.reset();
859*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
860*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
861*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status status = DynamicTrieKeyMapper<NamespaceId>::Delete(
862*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeNamespaceMapperFilename(base_dir_));
863*8b6cd535SAndroid Build Coastguard Worker   if (!status.ok()) {
864*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status.error_message()
865*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to delete old namespace_id mapper";
866*8b6cd535SAndroid Build Coastguard Worker     return status;
867*8b6cd535SAndroid Build Coastguard Worker   }
868*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
869*8b6cd535SAndroid Build Coastguard Worker       namespace_mapper_,
870*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<NamespaceId>::Create(
871*8b6cd535SAndroid Build Coastguard Worker           *filesystem_, MakeNamespaceMapperFilename(base_dir_),
872*8b6cd535SAndroid Build Coastguard Worker           kNamespaceMapperMaxSize));
873*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
874*8b6cd535SAndroid Build Coastguard Worker }
875*8b6cd535SAndroid Build Coastguard Worker 
ResetCorpusMapper()876*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
877*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
878*8b6cd535SAndroid Build Coastguard Worker   corpus_mapper_.reset();
879*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
880*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
881*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status status = DynamicTrieKeyMapper<CorpusId>::Delete(
882*8b6cd535SAndroid Build Coastguard Worker       *filesystem_, MakeCorpusMapperFilename(base_dir_));
883*8b6cd535SAndroid Build Coastguard Worker   if (!status.ok()) {
884*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status.error_message()
885*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to delete old corpus_id mapper";
886*8b6cd535SAndroid Build Coastguard Worker     return status;
887*8b6cd535SAndroid Build Coastguard Worker   }
888*8b6cd535SAndroid Build Coastguard Worker   auto corpus_mapper_or =
889*8b6cd535SAndroid Build Coastguard Worker       DynamicTrieKeyMapper<CorpusId,
890*8b6cd535SAndroid Build Coastguard Worker                            fingerprint_util::FingerprintStringFormatter>::
891*8b6cd535SAndroid Build Coastguard Worker           Create(*filesystem_, MakeCorpusMapperFilename(base_dir_),
892*8b6cd535SAndroid Build Coastguard Worker                  kCorpusMapperMaxSize);
893*8b6cd535SAndroid Build Coastguard Worker   if (!corpus_mapper_or.ok()) {
894*8b6cd535SAndroid Build Coastguard Worker     return std::move(corpus_mapper_or).status();
895*8b6cd535SAndroid Build Coastguard Worker   }
896*8b6cd535SAndroid Build Coastguard Worker   corpus_mapper_ = std::move(corpus_mapper_or).ValueOrDie();
897*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
898*8b6cd535SAndroid Build Coastguard Worker }
899*8b6cd535SAndroid Build Coastguard Worker 
GetChecksum() const900*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<Crc32> DocumentStore::GetChecksum() const {
901*8b6cd535SAndroid Build Coastguard Worker   Crc32 total_checksum;
902*8b6cd535SAndroid Build Coastguard Worker 
903*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
904*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
905*8b6cd535SAndroid Build Coastguard Worker   auto checksum_or = document_log_->GetChecksum();
906*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
907*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
908*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentLog";
909*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
910*8b6cd535SAndroid Build Coastguard Worker   }
911*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_log_checksum = std::move(checksum_or).ValueOrDie();
912*8b6cd535SAndroid Build Coastguard Worker 
913*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
914*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
915*8b6cd535SAndroid Build Coastguard Worker   checksum_or = document_key_mapper_->GetChecksum();
916*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
917*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
918*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentKeyMapper";
919*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
920*8b6cd535SAndroid Build Coastguard Worker   }
921*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_key_mapper_checksum = std::move(checksum_or).ValueOrDie();
922*8b6cd535SAndroid Build Coastguard Worker 
923*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
924*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
925*8b6cd535SAndroid Build Coastguard Worker   checksum_or = document_id_mapper_->GetChecksum();
926*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
927*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
928*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentIdMapper";
929*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
930*8b6cd535SAndroid Build Coastguard Worker   }
931*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_id_mapper_checksum = std::move(checksum_or).ValueOrDie();
932*8b6cd535SAndroid Build Coastguard Worker 
933*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
934*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
935*8b6cd535SAndroid Build Coastguard Worker   checksum_or = score_cache_->GetChecksum();
936*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
937*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
938*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of score cache";
939*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
940*8b6cd535SAndroid Build Coastguard Worker   }
941*8b6cd535SAndroid Build Coastguard Worker   Crc32 score_cache_checksum = std::move(checksum_or).ValueOrDie();
942*8b6cd535SAndroid Build Coastguard Worker 
943*8b6cd535SAndroid Build Coastguard Worker   checksum_or = scorable_property_cache_->GetChecksum();
944*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
945*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
946*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of scorable property cache";
947*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
948*8b6cd535SAndroid Build Coastguard Worker   }
949*8b6cd535SAndroid Build Coastguard Worker   Crc32 scorable_property_cache_checksum = std::move(checksum_or).ValueOrDie();
950*8b6cd535SAndroid Build Coastguard Worker 
951*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
952*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
953*8b6cd535SAndroid Build Coastguard Worker   checksum_or = filter_cache_->GetChecksum();
954*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
955*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
956*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of filter cache";
957*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
958*8b6cd535SAndroid Build Coastguard Worker   }
959*8b6cd535SAndroid Build Coastguard Worker   Crc32 filter_cache_checksum = std::move(checksum_or).ValueOrDie();
960*8b6cd535SAndroid Build Coastguard Worker 
961*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
962*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
963*8b6cd535SAndroid Build Coastguard Worker   checksum_or = namespace_mapper_->GetChecksum();
964*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
965*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
966*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of namespace mapper";
967*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
968*8b6cd535SAndroid Build Coastguard Worker   }
969*8b6cd535SAndroid Build Coastguard Worker   Crc32 namespace_mapper_checksum = std::move(checksum_or).ValueOrDie();
970*8b6cd535SAndroid Build Coastguard Worker 
971*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
972*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
973*8b6cd535SAndroid Build Coastguard Worker   checksum_or = corpus_mapper_->GetChecksum();
974*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
975*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
976*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of corpus mapper";
977*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
978*8b6cd535SAndroid Build Coastguard Worker   }
979*8b6cd535SAndroid Build Coastguard Worker   Crc32 corpus_mapper_checksum = std::move(checksum_or).ValueOrDie();
980*8b6cd535SAndroid Build Coastguard Worker 
981*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
982*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
983*8b6cd535SAndroid Build Coastguard Worker   checksum_or = corpus_score_cache_->GetChecksum();
984*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
985*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(WARNING) << checksum_or.status().error_message()
986*8b6cd535SAndroid Build Coastguard Worker                        << "Failed to compute checksum of score cache";
987*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
988*8b6cd535SAndroid Build Coastguard Worker   }
989*8b6cd535SAndroid Build Coastguard Worker   Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
990*8b6cd535SAndroid Build Coastguard Worker 
991*8b6cd535SAndroid Build Coastguard Worker   // NOTE: We purposely don't include usage_store checksum here because we can't
992*8b6cd535SAndroid Build Coastguard Worker   // regenerate it from ground truth documents. If it gets corrupted, we'll just
993*8b6cd535SAndroid Build Coastguard Worker   // clear all usage reports, but we shouldn't throw everything else in the
994*8b6cd535SAndroid Build Coastguard Worker   // document store out.
995*8b6cd535SAndroid Build Coastguard Worker 
996*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_log_checksum.Get()));
997*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
998*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
999*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(score_cache_checksum.Get()));
1000*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(scorable_property_cache_checksum.Get()));
1001*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
1002*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
1003*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
1004*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(corpus_score_cache_checksum.Get()));
1005*8b6cd535SAndroid Build Coastguard Worker   return total_checksum;
1006*8b6cd535SAndroid Build Coastguard Worker }
1007*8b6cd535SAndroid Build Coastguard Worker 
UpdateChecksum()1008*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<Crc32> DocumentStore::UpdateChecksum() {
1009*8b6cd535SAndroid Build Coastguard Worker   Crc32 total_checksum;
1010*8b6cd535SAndroid Build Coastguard Worker 
1011*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1012*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1013*8b6cd535SAndroid Build Coastguard Worker   auto checksum_or = document_log_->UpdateChecksum();
1014*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1015*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1016*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentLog";
1017*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1018*8b6cd535SAndroid Build Coastguard Worker   }
1019*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_log_checksum = std::move(checksum_or).ValueOrDie();
1020*8b6cd535SAndroid Build Coastguard Worker 
1021*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1022*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1023*8b6cd535SAndroid Build Coastguard Worker   checksum_or = document_key_mapper_->UpdateChecksum();
1024*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1025*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1026*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentKeyMapper";
1027*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1028*8b6cd535SAndroid Build Coastguard Worker   }
1029*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_key_mapper_checksum = std::move(checksum_or).ValueOrDie();
1030*8b6cd535SAndroid Build Coastguard Worker 
1031*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1032*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1033*8b6cd535SAndroid Build Coastguard Worker   checksum_or = document_id_mapper_->UpdateChecksum();
1034*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1035*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1036*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of DocumentIdMapper";
1037*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1038*8b6cd535SAndroid Build Coastguard Worker   }
1039*8b6cd535SAndroid Build Coastguard Worker   Crc32 document_id_mapper_checksum = std::move(checksum_or).ValueOrDie();
1040*8b6cd535SAndroid Build Coastguard Worker 
1041*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1042*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1043*8b6cd535SAndroid Build Coastguard Worker   checksum_or = score_cache_->UpdateChecksum();
1044*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1045*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1046*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of score cache";
1047*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1048*8b6cd535SAndroid Build Coastguard Worker   }
1049*8b6cd535SAndroid Build Coastguard Worker   Crc32 score_cache_checksum = std::move(checksum_or).ValueOrDie();
1050*8b6cd535SAndroid Build Coastguard Worker 
1051*8b6cd535SAndroid Build Coastguard Worker   checksum_or = scorable_property_cache_->UpdateChecksum();
1052*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1053*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1054*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of scorable property cache";
1055*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1056*8b6cd535SAndroid Build Coastguard Worker   }
1057*8b6cd535SAndroid Build Coastguard Worker   Crc32 scorable_property_cache_checksum = std::move(checksum_or).ValueOrDie();
1058*8b6cd535SAndroid Build Coastguard Worker 
1059*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1060*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1061*8b6cd535SAndroid Build Coastguard Worker   checksum_or = filter_cache_->UpdateChecksum();
1062*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1063*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1064*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of filter cache";
1065*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1066*8b6cd535SAndroid Build Coastguard Worker   }
1067*8b6cd535SAndroid Build Coastguard Worker   Crc32 filter_cache_checksum = std::move(checksum_or).ValueOrDie();
1068*8b6cd535SAndroid Build Coastguard Worker 
1069*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1070*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1071*8b6cd535SAndroid Build Coastguard Worker   checksum_or = namespace_mapper_->UpdateChecksum();
1072*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1073*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1074*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of namespace mapper";
1075*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1076*8b6cd535SAndroid Build Coastguard Worker   }
1077*8b6cd535SAndroid Build Coastguard Worker   Crc32 namespace_mapper_checksum = std::move(checksum_or).ValueOrDie();
1078*8b6cd535SAndroid Build Coastguard Worker 
1079*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1080*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1081*8b6cd535SAndroid Build Coastguard Worker   checksum_or = corpus_mapper_->UpdateChecksum();
1082*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1083*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << checksum_or.status().error_message()
1084*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to compute checksum of corpus mapper";
1085*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1086*8b6cd535SAndroid Build Coastguard Worker   }
1087*8b6cd535SAndroid Build Coastguard Worker   Crc32 corpus_mapper_checksum = std::move(checksum_or).ValueOrDie();
1088*8b6cd535SAndroid Build Coastguard Worker 
1089*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1090*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1091*8b6cd535SAndroid Build Coastguard Worker   checksum_or = corpus_score_cache_->UpdateChecksum();
1092*8b6cd535SAndroid Build Coastguard Worker   if (!checksum_or.ok()) {
1093*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(WARNING) << checksum_or.status().error_message()
1094*8b6cd535SAndroid Build Coastguard Worker                        << "Failed to compute checksum of score cache";
1095*8b6cd535SAndroid Build Coastguard Worker     return checksum_or.status();
1096*8b6cd535SAndroid Build Coastguard Worker   }
1097*8b6cd535SAndroid Build Coastguard Worker   Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
1098*8b6cd535SAndroid Build Coastguard Worker 
1099*8b6cd535SAndroid Build Coastguard Worker   // NOTE: We purposely don't include usage_store checksum here because we can't
1100*8b6cd535SAndroid Build Coastguard Worker   // regenerate it from ground truth documents. If it gets corrupted, we'll just
1101*8b6cd535SAndroid Build Coastguard Worker   // clear all usage reports, but we shouldn't throw everything else in the
1102*8b6cd535SAndroid Build Coastguard Worker   // document store out.
1103*8b6cd535SAndroid Build Coastguard Worker 
1104*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_log_checksum.Get()));
1105*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
1106*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
1107*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(score_cache_checksum.Get()));
1108*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(scorable_property_cache_checksum.Get()));
1109*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
1110*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
1111*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
1112*8b6cd535SAndroid Build Coastguard Worker   total_checksum.Append(std::to_string(corpus_score_cache_checksum.Get()));
1113*8b6cd535SAndroid Build Coastguard Worker 
1114*8b6cd535SAndroid Build Coastguard Worker   // Write the header
1115*8b6cd535SAndroid Build Coastguard Worker   DocumentStore::Header header;
1116*8b6cd535SAndroid Build Coastguard Worker   header.magic = DocumentStore::Header::kMagic;
1117*8b6cd535SAndroid Build Coastguard Worker   header.checksum = total_checksum.Get();
1118*8b6cd535SAndroid Build Coastguard Worker 
1119*8b6cd535SAndroid Build Coastguard Worker   // This should overwrite the header.
1120*8b6cd535SAndroid Build Coastguard Worker   ScopedFd sfd(
1121*8b6cd535SAndroid Build Coastguard Worker       filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
1122*8b6cd535SAndroid Build Coastguard Worker   if (!sfd.is_valid() ||
1123*8b6cd535SAndroid Build Coastguard Worker       !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
1124*8b6cd535SAndroid Build Coastguard Worker       !filesystem_->DataSync(sfd.get())) {
1125*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError(absl_ports::StrCat(
1126*8b6cd535SAndroid Build Coastguard Worker         "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
1127*8b6cd535SAndroid Build Coastguard Worker   }
1128*8b6cd535SAndroid Build Coastguard Worker   return total_checksum;
1129*8b6cd535SAndroid Build Coastguard Worker }
1130*8b6cd535SAndroid Build Coastguard Worker 
HeaderExists()1131*8b6cd535SAndroid Build Coastguard Worker bool DocumentStore::HeaderExists() {
1132*8b6cd535SAndroid Build Coastguard Worker   if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) {
1133*8b6cd535SAndroid Build Coastguard Worker     return false;
1134*8b6cd535SAndroid Build Coastguard Worker   }
1135*8b6cd535SAndroid Build Coastguard Worker 
1136*8b6cd535SAndroid Build Coastguard Worker   int64_t file_size =
1137*8b6cd535SAndroid Build Coastguard Worker       filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str());
1138*8b6cd535SAndroid Build Coastguard Worker 
1139*8b6cd535SAndroid Build Coastguard Worker   // If it's been truncated to size 0 before, we consider it to be a new file
1140*8b6cd535SAndroid Build Coastguard Worker   return file_size != 0 && file_size != Filesystem::kBadFileSize;
1141*8b6cd535SAndroid Build Coastguard Worker }
1142*8b6cd535SAndroid Build Coastguard Worker 
1143*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::PutResult>
InternalPut(DocumentProto && document,PutDocumentStatsProto * put_document_stats)1144*8b6cd535SAndroid Build Coastguard Worker DocumentStore::InternalPut(DocumentProto&& document,
1145*8b6cd535SAndroid Build Coastguard Worker                            PutDocumentStatsProto* put_document_stats) {
1146*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
1147*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
1148*8b6cd535SAndroid Build Coastguard Worker 
1149*8b6cd535SAndroid Build Coastguard Worker   if (put_document_stats != nullptr) {
1150*8b6cd535SAndroid Build Coastguard Worker     put_document_stats->set_document_size(document.ByteSizeLong());
1151*8b6cd535SAndroid Build Coastguard Worker   }
1152*8b6cd535SAndroid Build Coastguard Worker 
1153*8b6cd535SAndroid Build Coastguard Worker   // Copy fields needed before they are moved
1154*8b6cd535SAndroid Build Coastguard Worker   std::string name_space = document.namespace_();
1155*8b6cd535SAndroid Build Coastguard Worker   std::string uri = document.uri();
1156*8b6cd535SAndroid Build Coastguard Worker   std::string schema = document.schema();
1157*8b6cd535SAndroid Build Coastguard Worker   int document_score = document.score();
1158*8b6cd535SAndroid Build Coastguard Worker   int32_t length_in_tokens = document.internal_fields().length_in_tokens();
1159*8b6cd535SAndroid Build Coastguard Worker   int64_t creation_timestamp_ms = document.creation_timestamp_ms();
1160*8b6cd535SAndroid Build Coastguard Worker 
1161*8b6cd535SAndroid Build Coastguard Worker   // Sets the creation timestamp if caller hasn't specified.
1162*8b6cd535SAndroid Build Coastguard Worker   if (document.creation_timestamp_ms() == 0) {
1163*8b6cd535SAndroid Build Coastguard Worker     creation_timestamp_ms = clock_.GetSystemTimeMilliseconds();
1164*8b6cd535SAndroid Build Coastguard Worker     document.set_creation_timestamp_ms(creation_timestamp_ms);
1165*8b6cd535SAndroid Build Coastguard Worker   }
1166*8b6cd535SAndroid Build Coastguard Worker 
1167*8b6cd535SAndroid Build Coastguard Worker   int64_t expiration_timestamp_ms =
1168*8b6cd535SAndroid Build Coastguard Worker       CalculateExpirationTimestampMs(creation_timestamp_ms, document.ttl_ms());
1169*8b6cd535SAndroid Build Coastguard Worker 
1170*8b6cd535SAndroid Build Coastguard Worker   // Update ground truth first
1171*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1172*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1173*8b6cd535SAndroid Build Coastguard Worker   DocumentWrapper document_wrapper = CreateDocumentWrapper(std::move(document));
1174*8b6cd535SAndroid Build Coastguard Worker   auto offset_or = document_log_->WriteProto(document_wrapper);
1175*8b6cd535SAndroid Build Coastguard Worker   if (!offset_or.ok()) {
1176*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << offset_or.status().error_message()
1177*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to write document";
1178*8b6cd535SAndroid Build Coastguard Worker     return offset_or.status();
1179*8b6cd535SAndroid Build Coastguard Worker   }
1180*8b6cd535SAndroid Build Coastguard Worker   int64_t file_offset = std::move(offset_or).ValueOrDie();
1181*8b6cd535SAndroid Build Coastguard Worker 
1182*8b6cd535SAndroid Build Coastguard Worker   // Get existing document id
1183*8b6cd535SAndroid Build Coastguard Worker   auto old_document_id_or = GetDocumentId(name_space, uri);
1184*8b6cd535SAndroid Build Coastguard Worker   if (!old_document_id_or.ok() &&
1185*8b6cd535SAndroid Build Coastguard Worker       !absl_ports::IsNotFound(old_document_id_or.status())) {
1186*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("Failed to read from key mapper");
1187*8b6cd535SAndroid Build Coastguard Worker   }
1188*8b6cd535SAndroid Build Coastguard Worker 
1189*8b6cd535SAndroid Build Coastguard Worker   // Creates a new document id, updates key mapper and document_id mapper
1190*8b6cd535SAndroid Build Coastguard Worker   DocumentId new_document_id = document_id_mapper_->num_elements();
1191*8b6cd535SAndroid Build Coastguard Worker   if (!IsDocumentIdValid(new_document_id)) {
1192*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::ResourceExhaustedError(
1193*8b6cd535SAndroid Build Coastguard Worker         "Exceeded maximum number of documents. Try calling Optimize to reclaim "
1194*8b6cd535SAndroid Build Coastguard Worker         "some space.");
1195*8b6cd535SAndroid Build Coastguard Worker   }
1196*8b6cd535SAndroid Build Coastguard Worker   PutResult put_result;
1197*8b6cd535SAndroid Build Coastguard Worker   put_result.new_document_id = new_document_id;
1198*8b6cd535SAndroid Build Coastguard Worker 
1199*8b6cd535SAndroid Build Coastguard Worker   // Update namespace maps
1200*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
1201*8b6cd535SAndroid Build Coastguard Worker       NamespaceId namespace_id,
1202*8b6cd535SAndroid Build Coastguard Worker       namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
1203*8b6cd535SAndroid Build Coastguard Worker 
1204*8b6cd535SAndroid Build Coastguard Worker   NamespaceIdFingerprint new_doc_nsid_uri_fingerprint(namespace_id, uri);
1205*8b6cd535SAndroid Build Coastguard Worker 
1206*8b6cd535SAndroid Build Coastguard Worker   // Updates key mapper and document_id mapper
1207*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
1208*8b6cd535SAndroid Build Coastguard Worker       new_doc_nsid_uri_fingerprint.EncodeToCString(), new_document_id));
1209*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
1210*8b6cd535SAndroid Build Coastguard Worker 
1211*8b6cd535SAndroid Build Coastguard Worker   // Update corpus maps
1212*8b6cd535SAndroid Build Coastguard Worker   NamespaceIdFingerprint corpus_nsid_schema_fingerprint(namespace_id, schema);
1213*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
1214*8b6cd535SAndroid Build Coastguard Worker       CorpusId corpus_id,
1215*8b6cd535SAndroid Build Coastguard Worker       corpus_mapper_->GetOrPut(corpus_nsid_schema_fingerprint.EncodeToCString(),
1216*8b6cd535SAndroid Build Coastguard Worker                                corpus_mapper_->num_keys()));
1217*8b6cd535SAndroid Build Coastguard Worker 
1218*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
1219*8b6cd535SAndroid Build Coastguard Worker                          GetCorpusAssociatedScoreDataToUpdate(corpus_id));
1220*8b6cd535SAndroid Build Coastguard Worker   scoring_data.AddDocument(length_in_tokens);
1221*8b6cd535SAndroid Build Coastguard Worker 
1222*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(
1223*8b6cd535SAndroid Build Coastguard Worker       UpdateCorpusAssociatedScoreCache(corpus_id, scoring_data));
1224*8b6cd535SAndroid Build Coastguard Worker 
1225*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
1226*8b6cd535SAndroid Build Coastguard Worker                          schema_store_->GetSchemaTypeId(schema));
1227*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
1228*8b6cd535SAndroid Build Coastguard Worker       int scorable_property_cache_index,
1229*8b6cd535SAndroid Build Coastguard Worker       UpdateScorablePropertyCache(document_wrapper.document(), schema_type_id));
1230*8b6cd535SAndroid Build Coastguard Worker 
1231*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
1232*8b6cd535SAndroid Build Coastguard Worker       new_document_id, DocumentAssociatedScoreData(
1233*8b6cd535SAndroid Build Coastguard Worker                            corpus_id, document_score, creation_timestamp_ms,
1234*8b6cd535SAndroid Build Coastguard Worker                            scorable_property_cache_index, length_in_tokens)));
1235*8b6cd535SAndroid Build Coastguard Worker 
1236*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateFilterCache(
1237*8b6cd535SAndroid Build Coastguard Worker       new_document_id,
1238*8b6cd535SAndroid Build Coastguard Worker       DocumentFilterData(namespace_id,
1239*8b6cd535SAndroid Build Coastguard Worker                          new_doc_nsid_uri_fingerprint.fingerprint(),
1240*8b6cd535SAndroid Build Coastguard Worker                          schema_type_id, expiration_timestamp_ms)));
1241*8b6cd535SAndroid Build Coastguard Worker 
1242*8b6cd535SAndroid Build Coastguard Worker   if (old_document_id_or.ok()) {
1243*8b6cd535SAndroid Build Coastguard Worker     // The old document exists, copy over the usage scores and delete the old
1244*8b6cd535SAndroid Build Coastguard Worker     // document.
1245*8b6cd535SAndroid Build Coastguard Worker     DocumentId old_document_id = old_document_id_or.ValueOrDie();
1246*8b6cd535SAndroid Build Coastguard Worker     put_result.old_document_id = old_document_id;
1247*8b6cd535SAndroid Build Coastguard Worker 
1248*8b6cd535SAndroid Build Coastguard Worker     ICING_RETURN_IF_ERROR(
1249*8b6cd535SAndroid Build Coastguard Worker         usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
1250*8b6cd535SAndroid Build Coastguard Worker                                        /*to_document_id=*/new_document_id));
1251*8b6cd535SAndroid Build Coastguard Worker 
1252*8b6cd535SAndroid Build Coastguard Worker     // Delete the old document. It's fine if it's not found since it might have
1253*8b6cd535SAndroid Build Coastguard Worker     // been deleted previously.
1254*8b6cd535SAndroid Build Coastguard Worker     auto delete_status =
1255*8b6cd535SAndroid Build Coastguard Worker         Delete(old_document_id, clock_.GetSystemTimeMilliseconds());
1256*8b6cd535SAndroid Build Coastguard Worker     if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
1257*8b6cd535SAndroid Build Coastguard Worker       // Real error, pass it up.
1258*8b6cd535SAndroid Build Coastguard Worker       return delete_status;
1259*8b6cd535SAndroid Build Coastguard Worker     }
1260*8b6cd535SAndroid Build Coastguard Worker   }
1261*8b6cd535SAndroid Build Coastguard Worker 
1262*8b6cd535SAndroid Build Coastguard Worker   if (put_document_stats != nullptr) {
1263*8b6cd535SAndroid Build Coastguard Worker     put_document_stats->set_document_store_latency_ms(
1264*8b6cd535SAndroid Build Coastguard Worker         put_timer->GetElapsedMilliseconds());
1265*8b6cd535SAndroid Build Coastguard Worker   }
1266*8b6cd535SAndroid Build Coastguard Worker 
1267*8b6cd535SAndroid Build Coastguard Worker   return put_result;
1268*8b6cd535SAndroid Build Coastguard Worker }
1269*8b6cd535SAndroid Build Coastguard Worker 
Get(const std::string_view name_space,const std::string_view uri,bool clear_internal_fields) const1270*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
1271*8b6cd535SAndroid Build Coastguard Worker     const std::string_view name_space, const std::string_view uri,
1272*8b6cd535SAndroid Build Coastguard Worker     bool clear_internal_fields) const {
1273*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/147231617): Make a better way to replace the error message in an
1274*8b6cd535SAndroid Build Coastguard Worker   // existing Status.
1275*8b6cd535SAndroid Build Coastguard Worker   auto document_id_or = GetDocumentId(name_space, uri);
1276*8b6cd535SAndroid Build Coastguard Worker   if (!document_id_or.ok()) {
1277*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(document_id_or.status())) {
1278*8b6cd535SAndroid Build Coastguard Worker       ICING_VLOG(1) << document_id_or.status().error_message();
1279*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::NotFoundError(absl_ports::StrCat(
1280*8b6cd535SAndroid Build Coastguard Worker           "Document (", name_space, ", ", uri, ") not found."));
1281*8b6cd535SAndroid Build Coastguard Worker     }
1282*8b6cd535SAndroid Build Coastguard Worker 
1283*8b6cd535SAndroid Build Coastguard Worker     // Real error. Log it in error level and pass it up.
1284*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_id_or.status().error_message();
1285*8b6cd535SAndroid Build Coastguard Worker     return std::move(document_id_or).status();
1286*8b6cd535SAndroid Build Coastguard Worker   }
1287*8b6cd535SAndroid Build Coastguard Worker   DocumentId document_id = document_id_or.ValueOrDie();
1288*8b6cd535SAndroid Build Coastguard Worker 
1289*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/147231617): Make a better way to replace the error message in an
1290*8b6cd535SAndroid Build Coastguard Worker   // existing Status.
1291*8b6cd535SAndroid Build Coastguard Worker   auto status_or = Get(document_id, clear_internal_fields);
1292*8b6cd535SAndroid Build Coastguard Worker   if (!status_or.ok()) {
1293*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(status_or.status())) {
1294*8b6cd535SAndroid Build Coastguard Worker       ICING_VLOG(1) << status_or.status().error_message();
1295*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::NotFoundError(absl_ports::StrCat(
1296*8b6cd535SAndroid Build Coastguard Worker           "Document (", name_space, ", ", uri, ") not found."));
1297*8b6cd535SAndroid Build Coastguard Worker     }
1298*8b6cd535SAndroid Build Coastguard Worker 
1299*8b6cd535SAndroid Build Coastguard Worker     // Real error. Log it in error level.
1300*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << status_or.status().error_message();
1301*8b6cd535SAndroid Build Coastguard Worker   }
1302*8b6cd535SAndroid Build Coastguard Worker   return status_or;
1303*8b6cd535SAndroid Build Coastguard Worker }
1304*8b6cd535SAndroid Build Coastguard Worker 
Get(DocumentId document_id,bool clear_internal_fields) const1305*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
1306*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, bool clear_internal_fields) const {
1307*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1308*8b6cd535SAndroid Build Coastguard Worker   auto document_filter_data_optional =
1309*8b6cd535SAndroid Build Coastguard Worker       GetAliveDocumentFilterData(document_id, current_time_ms);
1310*8b6cd535SAndroid Build Coastguard Worker   if (!document_filter_data_optional) {
1311*8b6cd535SAndroid Build Coastguard Worker     // The document doesn't exist. Let's check if the document id is invalid, we
1312*8b6cd535SAndroid Build Coastguard Worker     // will return InvalidArgumentError. Otherwise we should return NOT_FOUND
1313*8b6cd535SAndroid Build Coastguard Worker     // error.
1314*8b6cd535SAndroid Build Coastguard Worker     if (!IsDocumentIdValid(document_id)) {
1315*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
1316*8b6cd535SAndroid Build Coastguard Worker           "Document id '%d' invalid.", document_id));
1317*8b6cd535SAndroid Build Coastguard Worker     }
1318*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
1319*8b6cd535SAndroid Build Coastguard Worker         "Document id '%d' doesn't exist", document_id));
1320*8b6cd535SAndroid Build Coastguard Worker   }
1321*8b6cd535SAndroid Build Coastguard Worker 
1322*8b6cd535SAndroid Build Coastguard Worker   auto document_log_offset_or = document_id_mapper_->Get(document_id);
1323*8b6cd535SAndroid Build Coastguard Worker   if (!document_log_offset_or.ok()) {
1324*8b6cd535SAndroid Build Coastguard Worker     // Since we've just checked that our document_id is valid a few lines
1325*8b6cd535SAndroid Build Coastguard Worker     // above, there's no reason this should fail and an error should never
1326*8b6cd535SAndroid Build Coastguard Worker     // happen.
1327*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("Failed to find document offset.");
1328*8b6cd535SAndroid Build Coastguard Worker   }
1329*8b6cd535SAndroid Build Coastguard Worker   int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
1330*8b6cd535SAndroid Build Coastguard Worker 
1331*8b6cd535SAndroid Build Coastguard Worker   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
1332*8b6cd535SAndroid Build Coastguard Worker   // that can support error logging.
1333*8b6cd535SAndroid Build Coastguard Worker   auto document_wrapper_or = document_log_->ReadProto(document_log_offset);
1334*8b6cd535SAndroid Build Coastguard Worker   if (!document_wrapper_or.ok()) {
1335*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << document_wrapper_or.status().error_message()
1336*8b6cd535SAndroid Build Coastguard Worker                      << "Failed to read from document log";
1337*8b6cd535SAndroid Build Coastguard Worker     return document_wrapper_or.status();
1338*8b6cd535SAndroid Build Coastguard Worker   }
1339*8b6cd535SAndroid Build Coastguard Worker   DocumentWrapper document_wrapper =
1340*8b6cd535SAndroid Build Coastguard Worker       std::move(document_wrapper_or).ValueOrDie();
1341*8b6cd535SAndroid Build Coastguard Worker   if (clear_internal_fields) {
1342*8b6cd535SAndroid Build Coastguard Worker     document_wrapper.mutable_document()->clear_internal_fields();
1343*8b6cd535SAndroid Build Coastguard Worker   }
1344*8b6cd535SAndroid Build Coastguard Worker 
1345*8b6cd535SAndroid Build Coastguard Worker   return std::move(*document_wrapper.mutable_document());
1346*8b6cd535SAndroid Build Coastguard Worker }
1347*8b6cd535SAndroid Build Coastguard Worker 
GetScorablePropertySet(DocumentId document_id,int64_t current_time_ms) const1348*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<ScorablePropertySet> DocumentStore::GetScorablePropertySet(
1349*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, int64_t current_time_ms) const {
1350*8b6cd535SAndroid Build Coastguard Worker   if (!feature_flags_.enable_scorable_properties()) {
1351*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1352*8b6cd535SAndroid Build Coastguard Worker   }
1353*8b6cd535SAndroid Build Coastguard Worker 
1354*8b6cd535SAndroid Build Coastguard Worker   // Get scorable property cache index from the score_cache_
1355*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<const DocumentAssociatedScoreData*>
1356*8b6cd535SAndroid Build Coastguard Worker       score_data_or = score_cache_->Get(document_id);
1357*8b6cd535SAndroid Build Coastguard Worker   if (!score_data_or.ok()) {
1358*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1359*8b6cd535SAndroid Build Coastguard Worker   }
1360*8b6cd535SAndroid Build Coastguard Worker   if (score_data_or.ValueOrDie()->scorable_property_cache_index() ==
1361*8b6cd535SAndroid Build Coastguard Worker       kInvalidScorablePropertyCacheIndex) {
1362*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1363*8b6cd535SAndroid Build Coastguard Worker   }
1364*8b6cd535SAndroid Build Coastguard Worker 
1365*8b6cd535SAndroid Build Coastguard Worker   // Get ScorablePropertySetProto.
1366*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<ScorablePropertySetProto>
1367*8b6cd535SAndroid Build Coastguard Worker       scorable_property_set_proto_or = scorable_property_cache_->Read(
1368*8b6cd535SAndroid Build Coastguard Worker           score_data_or.ValueOrDie()->scorable_property_cache_index());
1369*8b6cd535SAndroid Build Coastguard Worker   if (!scorable_property_set_proto_or.ok()) {
1370*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1371*8b6cd535SAndroid Build Coastguard Worker   }
1372*8b6cd535SAndroid Build Coastguard Worker 
1373*8b6cd535SAndroid Build Coastguard Worker   // Get schema type id.
1374*8b6cd535SAndroid Build Coastguard Worker   auto document_filter_data_optional =
1375*8b6cd535SAndroid Build Coastguard Worker       GetAliveDocumentFilterData(document_id, current_time_ms);
1376*8b6cd535SAndroid Build Coastguard Worker   if (!document_filter_data_optional) {
1377*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1378*8b6cd535SAndroid Build Coastguard Worker   }
1379*8b6cd535SAndroid Build Coastguard Worker 
1380*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<std::unique_ptr<ScorablePropertySet>>
1381*8b6cd535SAndroid Build Coastguard Worker       scorable_property_set_or = ScorablePropertySet::Create(
1382*8b6cd535SAndroid Build Coastguard Worker           std::move(scorable_property_set_proto_or.ValueOrDie()),
1383*8b6cd535SAndroid Build Coastguard Worker           document_filter_data_optional.value().schema_type_id(),
1384*8b6cd535SAndroid Build Coastguard Worker           schema_store_);
1385*8b6cd535SAndroid Build Coastguard Worker   if (!scorable_property_set_or.ok()) {
1386*8b6cd535SAndroid Build Coastguard Worker     return nullptr;
1387*8b6cd535SAndroid Build Coastguard Worker   }
1388*8b6cd535SAndroid Build Coastguard Worker   return std::move(scorable_property_set_or.ValueOrDie());
1389*8b6cd535SAndroid Build Coastguard Worker }
1390*8b6cd535SAndroid Build Coastguard Worker 
GetDocumentId(const std::string_view name_space,const std::string_view uri) const1391*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
1392*8b6cd535SAndroid Build Coastguard Worker     const std::string_view name_space, const std::string_view uri) const {
1393*8b6cd535SAndroid Build Coastguard Worker   auto namespace_id_or = namespace_mapper_->Get(name_space);
1394*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::Status status = namespace_id_or.status();
1395*8b6cd535SAndroid Build Coastguard Worker   if (status.ok()) {
1396*8b6cd535SAndroid Build Coastguard Worker     NamespaceId namespace_id = namespace_id_or.ValueOrDie();
1397*8b6cd535SAndroid Build Coastguard Worker     NamespaceIdFingerprint doc_nsid_uri_fingerprint(namespace_id, uri);
1398*8b6cd535SAndroid Build Coastguard Worker     auto document_id_or =
1399*8b6cd535SAndroid Build Coastguard Worker         document_key_mapper_->Get(doc_nsid_uri_fingerprint.EncodeToCString());
1400*8b6cd535SAndroid Build Coastguard Worker     status = document_id_or.status();
1401*8b6cd535SAndroid Build Coastguard Worker     if (status.ok()) {
1402*8b6cd535SAndroid Build Coastguard Worker       // Guaranteed to have a DocumentId now
1403*8b6cd535SAndroid Build Coastguard Worker       return document_id_or.ValueOrDie();
1404*8b6cd535SAndroid Build Coastguard Worker     }
1405*8b6cd535SAndroid Build Coastguard Worker   }
1406*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::Annotate(
1407*8b6cd535SAndroid Build Coastguard Worker       status, absl_ports::StrCat(
1408*8b6cd535SAndroid Build Coastguard Worker                   "Failed to find DocumentId by key: ", name_space, ", ", uri));
1409*8b6cd535SAndroid Build Coastguard Worker }
1410*8b6cd535SAndroid Build Coastguard Worker 
GetDocumentId(const NamespaceIdFingerprint & doc_namespace_id_uri_fingerprint) const1411*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
1412*8b6cd535SAndroid Build Coastguard Worker     const NamespaceIdFingerprint& doc_namespace_id_uri_fingerprint) const {
1413*8b6cd535SAndroid Build Coastguard Worker   auto document_id_or = document_key_mapper_->Get(
1414*8b6cd535SAndroid Build Coastguard Worker       doc_namespace_id_uri_fingerprint.EncodeToCString());
1415*8b6cd535SAndroid Build Coastguard Worker   if (document_id_or.ok()) {
1416*8b6cd535SAndroid Build Coastguard Worker     return document_id_or.ValueOrDie();
1417*8b6cd535SAndroid Build Coastguard Worker   }
1418*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::Annotate(
1419*8b6cd535SAndroid Build Coastguard Worker       std::move(document_id_or).status(),
1420*8b6cd535SAndroid Build Coastguard Worker       "Failed to find DocumentId by namespace id + fingerprint");
1421*8b6cd535SAndroid Build Coastguard Worker }
1422*8b6cd535SAndroid Build Coastguard Worker 
GetAllNamespaces() const1423*8b6cd535SAndroid Build Coastguard Worker std::vector<std::string> DocumentStore::GetAllNamespaces() const {
1424*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
1425*8b6cd535SAndroid Build Coastguard Worker       GetNamespaceIdsToNamespaces(namespace_mapper_.get());
1426*8b6cd535SAndroid Build Coastguard Worker 
1427*8b6cd535SAndroid Build Coastguard Worker   std::unordered_set<NamespaceId> existing_namespace_ids;
1428*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1429*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
1430*8b6cd535SAndroid Build Coastguard Worker        ++document_id) {
1431*8b6cd535SAndroid Build Coastguard Worker     // filter_cache_->Get can only fail if document_id is < 0
1432*8b6cd535SAndroid Build Coastguard Worker     // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
1433*8b6cd535SAndroid Build Coastguard Worker     auto status_or_data = filter_cache_->Get(document_id);
1434*8b6cd535SAndroid Build Coastguard Worker     if (!status_or_data.ok()) {
1435*8b6cd535SAndroid Build Coastguard Worker       ICING_LOG(ERROR)
1436*8b6cd535SAndroid Build Coastguard Worker           << "Error while iterating over filter cache in GetAllNamespaces";
1437*8b6cd535SAndroid Build Coastguard Worker       return std::vector<std::string>();
1438*8b6cd535SAndroid Build Coastguard Worker     }
1439*8b6cd535SAndroid Build Coastguard Worker     const DocumentFilterData* data = status_or_data.ValueOrDie();
1440*8b6cd535SAndroid Build Coastguard Worker 
1441*8b6cd535SAndroid Build Coastguard Worker     if (GetAliveDocumentFilterData(document_id, current_time_ms)) {
1442*8b6cd535SAndroid Build Coastguard Worker       existing_namespace_ids.insert(data->namespace_id());
1443*8b6cd535SAndroid Build Coastguard Worker     }
1444*8b6cd535SAndroid Build Coastguard Worker   }
1445*8b6cd535SAndroid Build Coastguard Worker 
1446*8b6cd535SAndroid Build Coastguard Worker   std::vector<std::string> existing_namespaces;
1447*8b6cd535SAndroid Build Coastguard Worker   for (auto itr = existing_namespace_ids.begin();
1448*8b6cd535SAndroid Build Coastguard Worker        itr != existing_namespace_ids.end(); ++itr) {
1449*8b6cd535SAndroid Build Coastguard Worker     existing_namespaces.push_back(namespace_id_to_namespace.at(*itr));
1450*8b6cd535SAndroid Build Coastguard Worker   }
1451*8b6cd535SAndroid Build Coastguard Worker   return existing_namespaces;
1452*8b6cd535SAndroid Build Coastguard Worker }
1453*8b6cd535SAndroid Build Coastguard Worker 
GetAliveDocumentFilterData(DocumentId document_id,int64_t current_time_ms) const1454*8b6cd535SAndroid Build Coastguard Worker std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData(
1455*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, int64_t current_time_ms) const {
1456*8b6cd535SAndroid Build Coastguard Worker   if (IsDeleted(document_id)) {
1457*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1458*8b6cd535SAndroid Build Coastguard Worker   }
1459*8b6cd535SAndroid Build Coastguard Worker   return GetNonExpiredDocumentFilterData(document_id, current_time_ms);
1460*8b6cd535SAndroid Build Coastguard Worker }
1461*8b6cd535SAndroid Build Coastguard Worker 
1462*8b6cd535SAndroid Build Coastguard Worker std::optional<DocumentFilterData>
GetNonDeletedDocumentFilterData(DocumentId document_id) const1463*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetNonDeletedDocumentFilterData(DocumentId document_id) const {
1464*8b6cd535SAndroid Build Coastguard Worker   if (IsDeleted(document_id)) {
1465*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1466*8b6cd535SAndroid Build Coastguard Worker   }
1467*8b6cd535SAndroid Build Coastguard Worker 
1468*8b6cd535SAndroid Build Coastguard Worker   auto filter_data_or = filter_cache_->GetCopy(document_id);
1469*8b6cd535SAndroid Build Coastguard Worker   if (!filter_data_or.ok()) {
1470*8b6cd535SAndroid Build Coastguard Worker     // This would only happen if document_id is out of range of the
1471*8b6cd535SAndroid Build Coastguard Worker     // filter_cache, meaning we got some invalid document_id. Callers should
1472*8b6cd535SAndroid Build Coastguard Worker     // already have checked that their document_id is valid or used
1473*8b6cd535SAndroid Build Coastguard Worker     // DoesDocumentExist(WithStatus). Regardless, return std::nullopt since the
1474*8b6cd535SAndroid Build Coastguard Worker     // document doesn't exist.
1475*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1476*8b6cd535SAndroid Build Coastguard Worker   }
1477*8b6cd535SAndroid Build Coastguard Worker 
1478*8b6cd535SAndroid Build Coastguard Worker   // At this point, it's guaranteed that the document has not been deleted. It
1479*8b6cd535SAndroid Build Coastguard Worker   // could still be expired, but the filter data is guaranteed to be valid here.
1480*8b6cd535SAndroid Build Coastguard Worker   return std::move(filter_data_or).ValueOrDie();
1481*8b6cd535SAndroid Build Coastguard Worker }
1482*8b6cd535SAndroid Build Coastguard Worker 
IsDeleted(DocumentId document_id) const1483*8b6cd535SAndroid Build Coastguard Worker bool DocumentStore::IsDeleted(DocumentId document_id) const {
1484*8b6cd535SAndroid Build Coastguard Worker   auto file_offset_or = document_id_mapper_->Get(document_id);
1485*8b6cd535SAndroid Build Coastguard Worker   if (!file_offset_or.ok()) {
1486*8b6cd535SAndroid Build Coastguard Worker     // This would only happen if document_id is out of range of the
1487*8b6cd535SAndroid Build Coastguard Worker     // document_id_mapper, meaning we got some invalid document_id. Callers
1488*8b6cd535SAndroid Build Coastguard Worker     // should already have checked that their document_id is valid or used
1489*8b6cd535SAndroid Build Coastguard Worker     // DoesDocumentExist(WithStatus). Regardless, return true since the
1490*8b6cd535SAndroid Build Coastguard Worker     // document doesn't exist.
1491*8b6cd535SAndroid Build Coastguard Worker     return true;
1492*8b6cd535SAndroid Build Coastguard Worker   }
1493*8b6cd535SAndroid Build Coastguard Worker   int64_t file_offset = *file_offset_or.ValueOrDie();
1494*8b6cd535SAndroid Build Coastguard Worker   return file_offset == kDocDeletedFlag;
1495*8b6cd535SAndroid Build Coastguard Worker }
1496*8b6cd535SAndroid Build Coastguard Worker 
1497*8b6cd535SAndroid Build Coastguard Worker // Returns DocumentFilterData if the document is not expired. Otherwise,
1498*8b6cd535SAndroid Build Coastguard Worker // std::nullopt.
1499*8b6cd535SAndroid Build Coastguard Worker std::optional<DocumentFilterData>
GetNonExpiredDocumentFilterData(DocumentId document_id,int64_t current_time_ms) const1500*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id,
1501*8b6cd535SAndroid Build Coastguard Worker                                                int64_t current_time_ms) const {
1502*8b6cd535SAndroid Build Coastguard Worker   auto filter_data_or = filter_cache_->GetCopy(document_id);
1503*8b6cd535SAndroid Build Coastguard Worker   if (!filter_data_or.ok()) {
1504*8b6cd535SAndroid Build Coastguard Worker     // This would only happen if document_id is out of range of the
1505*8b6cd535SAndroid Build Coastguard Worker     // filter_cache, meaning we got some invalid document_id. Callers should
1506*8b6cd535SAndroid Build Coastguard Worker     // already have checked that their document_id is valid or used
1507*8b6cd535SAndroid Build Coastguard Worker     // DoesDocumentExist(WithStatus). Regardless, return std::nullopt since the
1508*8b6cd535SAndroid Build Coastguard Worker     // document doesn't exist.
1509*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1510*8b6cd535SAndroid Build Coastguard Worker   }
1511*8b6cd535SAndroid Build Coastguard Worker   DocumentFilterData document_filter_data = filter_data_or.ValueOrDie();
1512*8b6cd535SAndroid Build Coastguard Worker 
1513*8b6cd535SAndroid Build Coastguard Worker   // Check if it's past the expiration time
1514*8b6cd535SAndroid Build Coastguard Worker   if (current_time_ms >= document_filter_data.expiration_timestamp_ms()) {
1515*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1516*8b6cd535SAndroid Build Coastguard Worker   }
1517*8b6cd535SAndroid Build Coastguard Worker   return document_filter_data;
1518*8b6cd535SAndroid Build Coastguard Worker }
1519*8b6cd535SAndroid Build Coastguard Worker 
Delete(const std::string_view name_space,const std::string_view uri,int64_t current_time_ms)1520*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::Delete(
1521*8b6cd535SAndroid Build Coastguard Worker     const std::string_view name_space, const std::string_view uri,
1522*8b6cd535SAndroid Build Coastguard Worker     int64_t current_time_ms) {
1523*8b6cd535SAndroid Build Coastguard Worker   // Try to get the DocumentId first
1524*8b6cd535SAndroid Build Coastguard Worker   auto document_id_or = GetDocumentId(name_space, uri);
1525*8b6cd535SAndroid Build Coastguard Worker   if (!document_id_or.ok()) {
1526*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::Annotate(
1527*8b6cd535SAndroid Build Coastguard Worker         document_id_or.status(),
1528*8b6cd535SAndroid Build Coastguard Worker         absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
1529*8b6cd535SAndroid Build Coastguard Worker                            ", uri: ", uri));
1530*8b6cd535SAndroid Build Coastguard Worker   }
1531*8b6cd535SAndroid Build Coastguard Worker   return Delete(document_id_or.ValueOrDie(), current_time_ms);
1532*8b6cd535SAndroid Build Coastguard Worker }
1533*8b6cd535SAndroid Build Coastguard Worker 
Delete(DocumentId document_id,int64_t current_time_ms)1534*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
1535*8b6cd535SAndroid Build Coastguard Worker                                                  int64_t current_time_ms) {
1536*8b6cd535SAndroid Build Coastguard Worker   auto document_filter_data_optional =
1537*8b6cd535SAndroid Build Coastguard Worker       GetAliveDocumentFilterData(document_id, current_time_ms);
1538*8b6cd535SAndroid Build Coastguard Worker   if (!document_filter_data_optional) {
1539*8b6cd535SAndroid Build Coastguard Worker     // The document doesn't exist. We should return InvalidArgumentError if the
1540*8b6cd535SAndroid Build Coastguard Worker     // document id is invalid. Otherwise we should return NOT_FOUND error.
1541*8b6cd535SAndroid Build Coastguard Worker     if (!IsDocumentIdValid(document_id)) {
1542*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
1543*8b6cd535SAndroid Build Coastguard Worker           "Document id '%d' invalid.", document_id));
1544*8b6cd535SAndroid Build Coastguard Worker     }
1545*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
1546*8b6cd535SAndroid Build Coastguard Worker         "Document id '%d' doesn't exist", document_id));
1547*8b6cd535SAndroid Build Coastguard Worker   }
1548*8b6cd535SAndroid Build Coastguard Worker 
1549*8b6cd535SAndroid Build Coastguard Worker   auto document_log_offset_or = document_id_mapper_->Get(document_id);
1550*8b6cd535SAndroid Build Coastguard Worker   if (!document_log_offset_or.ok()) {
1551*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InternalError("Failed to find document offset.");
1552*8b6cd535SAndroid Build Coastguard Worker   }
1553*8b6cd535SAndroid Build Coastguard Worker   int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
1554*8b6cd535SAndroid Build Coastguard Worker 
1555*8b6cd535SAndroid Build Coastguard Worker   // Erases document proto.
1556*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
1557*8b6cd535SAndroid Build Coastguard Worker   return ClearDerivedData(document_id);
1558*8b6cd535SAndroid Build Coastguard Worker }
1559*8b6cd535SAndroid Build Coastguard Worker 
GetNamespaceId(std::string_view name_space) const1560*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
1561*8b6cd535SAndroid Build Coastguard Worker     std::string_view name_space) const {
1562*8b6cd535SAndroid Build Coastguard Worker   return namespace_mapper_->Get(name_space);
1563*8b6cd535SAndroid Build Coastguard Worker }
1564*8b6cd535SAndroid Build Coastguard Worker 
GetCorpusId(const std::string_view name_space,const std::string_view schema) const1565*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
1566*8b6cd535SAndroid Build Coastguard Worker     const std::string_view name_space, const std::string_view schema) const {
1567*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(NamespaceId namespace_id,
1568*8b6cd535SAndroid Build Coastguard Worker                          namespace_mapper_->Get(name_space));
1569*8b6cd535SAndroid Build Coastguard Worker   NamespaceIdFingerprint corpus_nsid_schema_fp(namespace_id, schema);
1570*8b6cd535SAndroid Build Coastguard Worker   return corpus_mapper_->Get(corpus_nsid_schema_fp.EncodeToCString());
1571*8b6cd535SAndroid Build Coastguard Worker }
1572*8b6cd535SAndroid Build Coastguard Worker 
GetResultGroupingEntryId(ResultSpecProto::ResultGroupingType result_group_type,const std::string_view name_space,const std::string_view schema) const1573*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
1574*8b6cd535SAndroid Build Coastguard Worker     ResultSpecProto::ResultGroupingType result_group_type,
1575*8b6cd535SAndroid Build Coastguard Worker     const std::string_view name_space, const std::string_view schema) const {
1576*8b6cd535SAndroid Build Coastguard Worker   auto namespace_id = GetNamespaceId(name_space);
1577*8b6cd535SAndroid Build Coastguard Worker   auto schema_type_id = schema_store_->GetSchemaTypeId(schema);
1578*8b6cd535SAndroid Build Coastguard Worker   switch (result_group_type) {
1579*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NONE:
1580*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::InvalidArgumentError(
1581*8b6cd535SAndroid Build Coastguard Worker           "Cannot group by ResultSpecProto::NONE");
1582*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::SCHEMA_TYPE:
1583*8b6cd535SAndroid Build Coastguard Worker       if (schema_type_id.ok()) {
1584*8b6cd535SAndroid Build Coastguard Worker         return schema_type_id.ValueOrDie();
1585*8b6cd535SAndroid Build Coastguard Worker       }
1586*8b6cd535SAndroid Build Coastguard Worker       break;
1587*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NAMESPACE:
1588*8b6cd535SAndroid Build Coastguard Worker       if (namespace_id.ok()) {
1589*8b6cd535SAndroid Build Coastguard Worker         return namespace_id.ValueOrDie();
1590*8b6cd535SAndroid Build Coastguard Worker       }
1591*8b6cd535SAndroid Build Coastguard Worker       break;
1592*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE:
1593*8b6cd535SAndroid Build Coastguard Worker       if (namespace_id.ok() && schema_type_id.ok()) {
1594*8b6cd535SAndroid Build Coastguard Worker         // TODO(b/258715421): Temporary workaround to get a
1595*8b6cd535SAndroid Build Coastguard Worker         //                    ResultGroupingEntryId given the Namespace string
1596*8b6cd535SAndroid Build Coastguard Worker         //                    and Schema string.
1597*8b6cd535SAndroid Build Coastguard Worker         return namespace_id.ValueOrDie() << 16 | schema_type_id.ValueOrDie();
1598*8b6cd535SAndroid Build Coastguard Worker       }
1599*8b6cd535SAndroid Build Coastguard Worker       break;
1600*8b6cd535SAndroid Build Coastguard Worker   }
1601*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::NotFoundError("Cannot generate ResultGrouping Entry Id");
1602*8b6cd535SAndroid Build Coastguard Worker }
1603*8b6cd535SAndroid Build Coastguard Worker 
GetResultGroupingEntryId(ResultSpecProto::ResultGroupingType result_group_type,const NamespaceId namespace_id,const SchemaTypeId schema_type_id) const1604*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId(
1605*8b6cd535SAndroid Build Coastguard Worker     ResultSpecProto::ResultGroupingType result_group_type,
1606*8b6cd535SAndroid Build Coastguard Worker     const NamespaceId namespace_id, const SchemaTypeId schema_type_id) const {
1607*8b6cd535SAndroid Build Coastguard Worker   switch (result_group_type) {
1608*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NONE:
1609*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::InvalidArgumentError(
1610*8b6cd535SAndroid Build Coastguard Worker           "Cannot group by ResultSpecProto::NONE");
1611*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::SCHEMA_TYPE:
1612*8b6cd535SAndroid Build Coastguard Worker       return schema_type_id;
1613*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NAMESPACE:
1614*8b6cd535SAndroid Build Coastguard Worker       return namespace_id;
1615*8b6cd535SAndroid Build Coastguard Worker     case ResultSpecProto::NAMESPACE_AND_SCHEMA_TYPE:
1616*8b6cd535SAndroid Build Coastguard Worker       // TODO(b/258715421): Temporary workaround to get a ResultGroupingEntryId
1617*8b6cd535SAndroid Build Coastguard Worker       //                    given the Namespace Id and SchemaType Id.
1618*8b6cd535SAndroid Build Coastguard Worker       return namespace_id << 16 | schema_type_id;
1619*8b6cd535SAndroid Build Coastguard Worker   }
1620*8b6cd535SAndroid Build Coastguard Worker   return absl_ports::NotFoundError("Cannot generate ResultGrouping Entry Id");
1621*8b6cd535SAndroid Build Coastguard Worker }
1622*8b6cd535SAndroid Build Coastguard Worker 
1623*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
GetDocumentAssociatedScoreData(DocumentId document_id) const1624*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
1625*8b6cd535SAndroid Build Coastguard Worker   auto score_data_or = score_cache_->GetCopy(document_id);
1626*8b6cd535SAndroid Build Coastguard Worker   if (!score_data_or.ok()) {
1627*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
1628*8b6cd535SAndroid Build Coastguard Worker                      << " from score_cache_";
1629*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::NotFoundError(
1630*8b6cd535SAndroid Build Coastguard Worker         std::move(score_data_or).status().error_message());
1631*8b6cd535SAndroid Build Coastguard Worker   }
1632*8b6cd535SAndroid Build Coastguard Worker 
1633*8b6cd535SAndroid Build Coastguard Worker   DocumentAssociatedScoreData document_associated_score_data =
1634*8b6cd535SAndroid Build Coastguard Worker       std::move(score_data_or).ValueOrDie();
1635*8b6cd535SAndroid Build Coastguard Worker   return document_associated_score_data;
1636*8b6cd535SAndroid Build Coastguard Worker }
1637*8b6cd535SAndroid Build Coastguard Worker 
1638*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
GetCorpusAssociatedScoreData(CorpusId corpus_id) const1639*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
1640*8b6cd535SAndroid Build Coastguard Worker   return corpus_score_cache_->GetCopy(corpus_id);
1641*8b6cd535SAndroid Build Coastguard Worker }
1642*8b6cd535SAndroid Build Coastguard Worker 
1643*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const1644*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
1645*8b6cd535SAndroid Build Coastguard Worker   auto corpus_scoring_data_or = GetCorpusAssociatedScoreData(corpus_id);
1646*8b6cd535SAndroid Build Coastguard Worker   if (!corpus_scoring_data_or.ok() &&
1647*8b6cd535SAndroid Build Coastguard Worker       absl_ports::IsOutOfRange(corpus_scoring_data_or.status())) {
1648*8b6cd535SAndroid Build Coastguard Worker     // OUT_OF_RANGE is the StatusCode returned when a corpus id is added to
1649*8b6cd535SAndroid Build Coastguard Worker     // corpus_score_cache_ for the first time. Return a default
1650*8b6cd535SAndroid Build Coastguard Worker     // CorpusAssociatedScoreData object in this case.
1651*8b6cd535SAndroid Build Coastguard Worker     return CorpusAssociatedScoreData();
1652*8b6cd535SAndroid Build Coastguard Worker   }
1653*8b6cd535SAndroid Build Coastguard Worker 
1654*8b6cd535SAndroid Build Coastguard Worker   return corpus_scoring_data_or;
1655*8b6cd535SAndroid Build Coastguard Worker }
1656*8b6cd535SAndroid Build Coastguard Worker 
1657*8b6cd535SAndroid Build Coastguard Worker // TODO(b/273826815): Decide on and adopt a consistent pattern for handling
1658*8b6cd535SAndroid Build Coastguard Worker // NOT_FOUND 'errors' returned by our internal classes.
GetUsageScores(DocumentId document_id,int64_t current_time_ms) const1659*8b6cd535SAndroid Build Coastguard Worker std::optional<UsageStore::UsageScores> DocumentStore::GetUsageScores(
1660*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, int64_t current_time_ms) const {
1661*8b6cd535SAndroid Build Coastguard Worker   std::optional<DocumentFilterData> opt =
1662*8b6cd535SAndroid Build Coastguard Worker       GetAliveDocumentFilterData(document_id, current_time_ms);
1663*8b6cd535SAndroid Build Coastguard Worker   if (!opt) {
1664*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1665*8b6cd535SAndroid Build Coastguard Worker   }
1666*8b6cd535SAndroid Build Coastguard Worker   if (document_id >= usage_store_->num_elements()) {
1667*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1668*8b6cd535SAndroid Build Coastguard Worker   }
1669*8b6cd535SAndroid Build Coastguard Worker   auto usage_scores_or = usage_store_->GetUsageScores(document_id);
1670*8b6cd535SAndroid Build Coastguard Worker   if (!usage_scores_or.ok()) {
1671*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(ERROR) << "Error retrieving usage for " << document_id << ": "
1672*8b6cd535SAndroid Build Coastguard Worker                      << usage_scores_or.status().error_message();
1673*8b6cd535SAndroid Build Coastguard Worker     return std::nullopt;
1674*8b6cd535SAndroid Build Coastguard Worker   }
1675*8b6cd535SAndroid Build Coastguard Worker   return std::move(usage_scores_or).ValueOrDie();
1676*8b6cd535SAndroid Build Coastguard Worker }
1677*8b6cd535SAndroid Build Coastguard Worker 
ReportUsage(const UsageReport & usage_report)1678*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ReportUsage(
1679*8b6cd535SAndroid Build Coastguard Worker     const UsageReport& usage_report) {
1680*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(DocumentId document_id,
1681*8b6cd535SAndroid Build Coastguard Worker                          GetDocumentId(usage_report.document_namespace(),
1682*8b6cd535SAndroid Build Coastguard Worker                                        usage_report.document_uri()));
1683*8b6cd535SAndroid Build Coastguard Worker   // We can use the internal version here because we got our document_id from
1684*8b6cd535SAndroid Build Coastguard Worker   // our internal data structures. We would have thrown some error if the
1685*8b6cd535SAndroid Build Coastguard Worker   // namespace and/or uri were incorrect.
1686*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1687*8b6cd535SAndroid Build Coastguard Worker   if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
1688*8b6cd535SAndroid Build Coastguard Worker     // Document was probably deleted or expired.
1689*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::NotFoundError(absl_ports::StrCat(
1690*8b6cd535SAndroid Build Coastguard Worker         "Couldn't report usage on a nonexistent document: (namespace: '",
1691*8b6cd535SAndroid Build Coastguard Worker         usage_report.document_namespace(), "', uri: '",
1692*8b6cd535SAndroid Build Coastguard Worker         usage_report.document_uri(), "')"));
1693*8b6cd535SAndroid Build Coastguard Worker   }
1694*8b6cd535SAndroid Build Coastguard Worker 
1695*8b6cd535SAndroid Build Coastguard Worker   return usage_store_->AddUsageReport(usage_report, document_id);
1696*8b6cd535SAndroid Build Coastguard Worker }
1697*8b6cd535SAndroid Build Coastguard Worker 
DeleteByNamespace(std::string_view name_space)1698*8b6cd535SAndroid Build Coastguard Worker DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
1699*8b6cd535SAndroid Build Coastguard Worker     std::string_view name_space) {
1700*8b6cd535SAndroid Build Coastguard Worker   DeleteByGroupResult result;
1701*8b6cd535SAndroid Build Coastguard Worker   auto namespace_id_or = namespace_mapper_->Get(name_space);
1702*8b6cd535SAndroid Build Coastguard Worker   if (!namespace_id_or.ok()) {
1703*8b6cd535SAndroid Build Coastguard Worker     result.status = absl_ports::Annotate(
1704*8b6cd535SAndroid Build Coastguard Worker         namespace_id_or.status(),
1705*8b6cd535SAndroid Build Coastguard Worker         absl_ports::StrCat("Failed to find namespace: ", name_space));
1706*8b6cd535SAndroid Build Coastguard Worker     return result;
1707*8b6cd535SAndroid Build Coastguard Worker   }
1708*8b6cd535SAndroid Build Coastguard Worker   NamespaceId namespace_id = namespace_id_or.ValueOrDie();
1709*8b6cd535SAndroid Build Coastguard Worker   auto num_deleted_or = BatchDelete(namespace_id, kInvalidSchemaTypeId);
1710*8b6cd535SAndroid Build Coastguard Worker   if (!num_deleted_or.ok()) {
1711*8b6cd535SAndroid Build Coastguard Worker     result.status = std::move(num_deleted_or).status();
1712*8b6cd535SAndroid Build Coastguard Worker     return result;
1713*8b6cd535SAndroid Build Coastguard Worker   }
1714*8b6cd535SAndroid Build Coastguard Worker 
1715*8b6cd535SAndroid Build Coastguard Worker   result.num_docs_deleted = num_deleted_or.ValueOrDie();
1716*8b6cd535SAndroid Build Coastguard Worker   if (result.num_docs_deleted <= 0) {
1717*8b6cd535SAndroid Build Coastguard Worker     // Treat the fact that no existing documents had this namespace to be the
1718*8b6cd535SAndroid Build Coastguard Worker     // same as this namespace not existing at all.
1719*8b6cd535SAndroid Build Coastguard Worker     result.status = absl_ports::NotFoundError(
1720*8b6cd535SAndroid Build Coastguard Worker         absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
1721*8b6cd535SAndroid Build Coastguard Worker     return result;
1722*8b6cd535SAndroid Build Coastguard Worker   }
1723*8b6cd535SAndroid Build Coastguard Worker 
1724*8b6cd535SAndroid Build Coastguard Worker   return result;
1725*8b6cd535SAndroid Build Coastguard Worker }
1726*8b6cd535SAndroid Build Coastguard Worker 
DeleteBySchemaType(std::string_view schema_type)1727*8b6cd535SAndroid Build Coastguard Worker DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
1728*8b6cd535SAndroid Build Coastguard Worker     std::string_view schema_type) {
1729*8b6cd535SAndroid Build Coastguard Worker   DeleteByGroupResult result;
1730*8b6cd535SAndroid Build Coastguard Worker   auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
1731*8b6cd535SAndroid Build Coastguard Worker   if (!schema_type_id_or.ok()) {
1732*8b6cd535SAndroid Build Coastguard Worker     result.status = absl_ports::Annotate(
1733*8b6cd535SAndroid Build Coastguard Worker         schema_type_id_or.status(),
1734*8b6cd535SAndroid Build Coastguard Worker         absl_ports::StrCat("Failed to find schema type. schema_type: ",
1735*8b6cd535SAndroid Build Coastguard Worker                            schema_type));
1736*8b6cd535SAndroid Build Coastguard Worker     return result;
1737*8b6cd535SAndroid Build Coastguard Worker   }
1738*8b6cd535SAndroid Build Coastguard Worker   SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
1739*8b6cd535SAndroid Build Coastguard Worker   auto num_deleted_or = BatchDelete(kInvalidNamespaceId, schema_type_id);
1740*8b6cd535SAndroid Build Coastguard Worker   if (!num_deleted_or.ok()) {
1741*8b6cd535SAndroid Build Coastguard Worker     result.status = std::move(num_deleted_or).status();
1742*8b6cd535SAndroid Build Coastguard Worker     return result;
1743*8b6cd535SAndroid Build Coastguard Worker   }
1744*8b6cd535SAndroid Build Coastguard Worker 
1745*8b6cd535SAndroid Build Coastguard Worker   result.num_docs_deleted = num_deleted_or.ValueOrDie();
1746*8b6cd535SAndroid Build Coastguard Worker   if (result.num_docs_deleted <= 0) {
1747*8b6cd535SAndroid Build Coastguard Worker     result.status = absl_ports::NotFoundError(absl_ports::StrCat(
1748*8b6cd535SAndroid Build Coastguard Worker         "No documents found with schema type '", schema_type, "'"));
1749*8b6cd535SAndroid Build Coastguard Worker     return result;
1750*8b6cd535SAndroid Build Coastguard Worker   }
1751*8b6cd535SAndroid Build Coastguard Worker 
1752*8b6cd535SAndroid Build Coastguard Worker   return result;
1753*8b6cd535SAndroid Build Coastguard Worker }
1754*8b6cd535SAndroid Build Coastguard Worker 
BatchDelete(NamespaceId namespace_id,SchemaTypeId schema_type_id)1755*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
1756*8b6cd535SAndroid Build Coastguard Worker     NamespaceId namespace_id, SchemaTypeId schema_type_id) {
1757*8b6cd535SAndroid Build Coastguard Worker   // Tracks if there were any existing documents with this namespace that we
1758*8b6cd535SAndroid Build Coastguard Worker   // will mark as deleted.
1759*8b6cd535SAndroid Build Coastguard Worker   int num_updated_documents = 0;
1760*8b6cd535SAndroid Build Coastguard Worker 
1761*8b6cd535SAndroid Build Coastguard Worker   // Traverse FilterCache and delete all docs that match namespace_id and
1762*8b6cd535SAndroid Build Coastguard Worker   // schema_type_id.
1763*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1764*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
1765*8b6cd535SAndroid Build Coastguard Worker        ++document_id) {
1766*8b6cd535SAndroid Build Coastguard Worker     // filter_cache_->Get can only fail if document_id is < 0
1767*8b6cd535SAndroid Build Coastguard Worker     // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
1768*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* data,
1769*8b6cd535SAndroid Build Coastguard Worker                            filter_cache_->Get(document_id));
1770*8b6cd535SAndroid Build Coastguard Worker 
1771*8b6cd535SAndroid Build Coastguard Worker     // Check namespace only when the input namespace id is valid.
1772*8b6cd535SAndroid Build Coastguard Worker     if (namespace_id != kInvalidNamespaceId &&
1773*8b6cd535SAndroid Build Coastguard Worker         (data->namespace_id() == kInvalidNamespaceId ||
1774*8b6cd535SAndroid Build Coastguard Worker          data->namespace_id() != namespace_id)) {
1775*8b6cd535SAndroid Build Coastguard Worker       // The document has already been hard-deleted or isn't from the desired
1776*8b6cd535SAndroid Build Coastguard Worker       // namespace.
1777*8b6cd535SAndroid Build Coastguard Worker       continue;
1778*8b6cd535SAndroid Build Coastguard Worker     }
1779*8b6cd535SAndroid Build Coastguard Worker 
1780*8b6cd535SAndroid Build Coastguard Worker     // Check schema type only when the input schema type id is valid.
1781*8b6cd535SAndroid Build Coastguard Worker     if (schema_type_id != kInvalidSchemaTypeId &&
1782*8b6cd535SAndroid Build Coastguard Worker         (data->schema_type_id() == kInvalidSchemaTypeId ||
1783*8b6cd535SAndroid Build Coastguard Worker          data->schema_type_id() != schema_type_id)) {
1784*8b6cd535SAndroid Build Coastguard Worker       // The document has already been hard-deleted or doesn't have the
1785*8b6cd535SAndroid Build Coastguard Worker       // desired schema type.
1786*8b6cd535SAndroid Build Coastguard Worker       continue;
1787*8b6cd535SAndroid Build Coastguard Worker     }
1788*8b6cd535SAndroid Build Coastguard Worker 
1789*8b6cd535SAndroid Build Coastguard Worker     // The document has the desired namespace and schema type, it either
1790*8b6cd535SAndroid Build Coastguard Worker     // exists or has expired.
1791*8b6cd535SAndroid Build Coastguard Worker     libtextclassifier3::Status delete_status =
1792*8b6cd535SAndroid Build Coastguard Worker         Delete(document_id, current_time_ms);
1793*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(delete_status)) {
1794*8b6cd535SAndroid Build Coastguard Worker       continue;
1795*8b6cd535SAndroid Build Coastguard Worker     } else if (!delete_status.ok()) {
1796*8b6cd535SAndroid Build Coastguard Worker       // Real error, pass up.
1797*8b6cd535SAndroid Build Coastguard Worker       return delete_status;
1798*8b6cd535SAndroid Build Coastguard Worker     }
1799*8b6cd535SAndroid Build Coastguard Worker     ++num_updated_documents;
1800*8b6cd535SAndroid Build Coastguard Worker   }
1801*8b6cd535SAndroid Build Coastguard Worker 
1802*8b6cd535SAndroid Build Coastguard Worker   return num_updated_documents;
1803*8b6cd535SAndroid Build Coastguard Worker }
1804*8b6cd535SAndroid Build Coastguard Worker 
PersistToDisk(PersistType::Code persist_type)1805*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::PersistToDisk(
1806*8b6cd535SAndroid Build Coastguard Worker     PersistType::Code persist_type) {
1807*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_log_->PersistToDisk());
1808*8b6cd535SAndroid Build Coastguard Worker   if (persist_type == PersistType::LITE) {
1809*8b6cd535SAndroid Build Coastguard Worker     // only persist the document log.
1810*8b6cd535SAndroid Build Coastguard Worker     return libtextclassifier3::Status::OK;
1811*8b6cd535SAndroid Build Coastguard Worker   }
1812*8b6cd535SAndroid Build Coastguard Worker   if (persist_type == PersistType::RECOVERY_PROOF) {
1813*8b6cd535SAndroid Build Coastguard Worker     return UpdateChecksum().status();
1814*8b6cd535SAndroid Build Coastguard Worker   }
1815*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_key_mapper_->PersistToDisk());
1816*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_id_mapper_->PersistToDisk());
1817*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk());
1818*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(scorable_property_cache_->PersistToDisk());
1819*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
1820*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
1821*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
1822*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
1823*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(corpus_score_cache_->PersistToDisk());
1824*8b6cd535SAndroid Build Coastguard Worker 
1825*8b6cd535SAndroid Build Coastguard Worker   // Update the combined checksum and write to header file.
1826*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateChecksum());
1827*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
1828*8b6cd535SAndroid Build Coastguard Worker }
1829*8b6cd535SAndroid Build Coastguard Worker 
GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t> & value_or,int64_t default_value)1830*8b6cd535SAndroid Build Coastguard Worker int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
1831*8b6cd535SAndroid Build Coastguard Worker                           int64_t default_value) {
1832*8b6cd535SAndroid Build Coastguard Worker   return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
1833*8b6cd535SAndroid Build Coastguard Worker }
1834*8b6cd535SAndroid Build Coastguard Worker 
GetMemberStorageInfo() const1835*8b6cd535SAndroid Build Coastguard Worker DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
1836*8b6cd535SAndroid Build Coastguard Worker   DocumentStorageInfoProto storage_info;
1837*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_document_log_size(
1838*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(document_log_->GetDiskUsage(), -1));
1839*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_key_mapper_size(
1840*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
1841*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_document_id_mapper_size(
1842*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
1843*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_score_cache_size(
1844*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
1845*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_scorable_property_cache_size(
1846*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(scorable_property_cache_->GetDiskUsage(), -1));
1847*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_filter_cache_size(
1848*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
1849*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_namespace_id_mapper_size(
1850*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
1851*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_corpus_mapper_size(
1852*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
1853*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_corpus_score_cache_size(
1854*8b6cd535SAndroid Build Coastguard Worker       GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
1855*8b6cd535SAndroid Build Coastguard Worker   return storage_info;
1856*8b6cd535SAndroid Build Coastguard Worker }
1857*8b6cd535SAndroid Build Coastguard Worker 
CalculateDocumentStatusCounts(DocumentStorageInfoProto storage_info) const1858*8b6cd535SAndroid Build Coastguard Worker DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
1859*8b6cd535SAndroid Build Coastguard Worker     DocumentStorageInfoProto storage_info) const {
1860*8b6cd535SAndroid Build Coastguard Worker   int total_num_alive = 0;
1861*8b6cd535SAndroid Build Coastguard Worker   int total_num_expired = 0;
1862*8b6cd535SAndroid Build Coastguard Worker   int total_num_deleted = 0;
1863*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
1864*8b6cd535SAndroid Build Coastguard Worker       GetNamespaceIdsToNamespaces(namespace_mapper_.get());
1865*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<std::string, NamespaceStorageInfoProto>
1866*8b6cd535SAndroid Build Coastguard Worker       namespace_to_storage_info;
1867*8b6cd535SAndroid Build Coastguard Worker 
1868*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1869*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0;
1870*8b6cd535SAndroid Build Coastguard Worker        document_id < document_id_mapper_->num_elements(); ++document_id) {
1871*8b6cd535SAndroid Build Coastguard Worker     // Check if it's deleted first.
1872*8b6cd535SAndroid Build Coastguard Worker     if (IsDeleted(document_id)) {
1873*8b6cd535SAndroid Build Coastguard Worker       // We don't have the namespace id of hard deleted documents anymore, so
1874*8b6cd535SAndroid Build Coastguard Worker       // we can't add to our namespace storage info.
1875*8b6cd535SAndroid Build Coastguard Worker       ++total_num_deleted;
1876*8b6cd535SAndroid Build Coastguard Worker       continue;
1877*8b6cd535SAndroid Build Coastguard Worker     }
1878*8b6cd535SAndroid Build Coastguard Worker 
1879*8b6cd535SAndroid Build Coastguard Worker     // At this point, the document is either alive or expired, we can get
1880*8b6cd535SAndroid Build Coastguard Worker     // namespace info for it.
1881*8b6cd535SAndroid Build Coastguard Worker     auto filter_data_or = filter_cache_->Get(document_id);
1882*8b6cd535SAndroid Build Coastguard Worker     if (!filter_data_or.ok()) {
1883*8b6cd535SAndroid Build Coastguard Worker       ICING_VLOG(1) << "Error trying to get filter data for document store "
1884*8b6cd535SAndroid Build Coastguard Worker                        "storage info counts.";
1885*8b6cd535SAndroid Build Coastguard Worker       continue;
1886*8b6cd535SAndroid Build Coastguard Worker     }
1887*8b6cd535SAndroid Build Coastguard Worker     const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
1888*8b6cd535SAndroid Build Coastguard Worker     auto itr = namespace_id_to_namespace.find(filter_data->namespace_id());
1889*8b6cd535SAndroid Build Coastguard Worker     if (itr == namespace_id_to_namespace.end()) {
1890*8b6cd535SAndroid Build Coastguard Worker       ICING_VLOG(1) << "Error trying to find namespace for document store "
1891*8b6cd535SAndroid Build Coastguard Worker                        "storage info counts.";
1892*8b6cd535SAndroid Build Coastguard Worker       continue;
1893*8b6cd535SAndroid Build Coastguard Worker     }
1894*8b6cd535SAndroid Build Coastguard Worker     const std::string& name_space = itr->second;
1895*8b6cd535SAndroid Build Coastguard Worker 
1896*8b6cd535SAndroid Build Coastguard Worker     // Always set the namespace, if the NamespaceStorageInfoProto didn't exist
1897*8b6cd535SAndroid Build Coastguard Worker     // before, we'll get back a default instance of it.
1898*8b6cd535SAndroid Build Coastguard Worker     NamespaceStorageInfoProto& namespace_storage_info =
1899*8b6cd535SAndroid Build Coastguard Worker         namespace_to_storage_info[name_space];
1900*8b6cd535SAndroid Build Coastguard Worker     namespace_storage_info.set_namespace_(name_space);
1901*8b6cd535SAndroid Build Coastguard Worker 
1902*8b6cd535SAndroid Build Coastguard Worker     // Get usage scores
1903*8b6cd535SAndroid Build Coastguard Worker     auto usage_scores_or = usage_store_->GetUsageScores(document_id);
1904*8b6cd535SAndroid Build Coastguard Worker     if (!usage_scores_or.ok()) {
1905*8b6cd535SAndroid Build Coastguard Worker       ICING_VLOG(1) << "Error trying to get usage scores for document store "
1906*8b6cd535SAndroid Build Coastguard Worker                        "storage info counts.";
1907*8b6cd535SAndroid Build Coastguard Worker       continue;
1908*8b6cd535SAndroid Build Coastguard Worker     }
1909*8b6cd535SAndroid Build Coastguard Worker     UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
1910*8b6cd535SAndroid Build Coastguard Worker 
1911*8b6cd535SAndroid Build Coastguard Worker     // Update our stats
1912*8b6cd535SAndroid Build Coastguard Worker     if (!GetNonExpiredDocumentFilterData(document_id, current_time_ms)) {
1913*8b6cd535SAndroid Build Coastguard Worker       ++total_num_expired;
1914*8b6cd535SAndroid Build Coastguard Worker       namespace_storage_info.set_num_expired_documents(
1915*8b6cd535SAndroid Build Coastguard Worker           namespace_storage_info.num_expired_documents() + 1);
1916*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type1_count > 0) {
1917*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_expired_documents_usage_type1(
1918*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_expired_documents_usage_type1() + 1);
1919*8b6cd535SAndroid Build Coastguard Worker       }
1920*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type2_count > 0) {
1921*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_expired_documents_usage_type2(
1922*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_expired_documents_usage_type2() + 1);
1923*8b6cd535SAndroid Build Coastguard Worker       }
1924*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type3_count > 0) {
1925*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_expired_documents_usage_type3(
1926*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_expired_documents_usage_type3() + 1);
1927*8b6cd535SAndroid Build Coastguard Worker       }
1928*8b6cd535SAndroid Build Coastguard Worker     } else {
1929*8b6cd535SAndroid Build Coastguard Worker       ++total_num_alive;
1930*8b6cd535SAndroid Build Coastguard Worker       namespace_storage_info.set_num_alive_documents(
1931*8b6cd535SAndroid Build Coastguard Worker           namespace_storage_info.num_alive_documents() + 1);
1932*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type1_count > 0) {
1933*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_alive_documents_usage_type1(
1934*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_alive_documents_usage_type1() + 1);
1935*8b6cd535SAndroid Build Coastguard Worker       }
1936*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type2_count > 0) {
1937*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_alive_documents_usage_type2(
1938*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_alive_documents_usage_type2() + 1);
1939*8b6cd535SAndroid Build Coastguard Worker       }
1940*8b6cd535SAndroid Build Coastguard Worker       if (usage_scores.usage_type3_count > 0) {
1941*8b6cd535SAndroid Build Coastguard Worker         namespace_storage_info.set_num_alive_documents_usage_type3(
1942*8b6cd535SAndroid Build Coastguard Worker             namespace_storage_info.num_alive_documents_usage_type3() + 1);
1943*8b6cd535SAndroid Build Coastguard Worker       }
1944*8b6cd535SAndroid Build Coastguard Worker     }
1945*8b6cd535SAndroid Build Coastguard Worker   }
1946*8b6cd535SAndroid Build Coastguard Worker 
1947*8b6cd535SAndroid Build Coastguard Worker   for (auto& itr : namespace_to_storage_info) {
1948*8b6cd535SAndroid Build Coastguard Worker     storage_info.mutable_namespace_storage_info()->Add(std::move(itr.second));
1949*8b6cd535SAndroid Build Coastguard Worker   }
1950*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_num_alive_documents(total_num_alive);
1951*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_num_deleted_documents(total_num_deleted);
1952*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_num_expired_documents(total_num_expired);
1953*8b6cd535SAndroid Build Coastguard Worker   return storage_info;
1954*8b6cd535SAndroid Build Coastguard Worker }
1955*8b6cd535SAndroid Build Coastguard Worker 
GetStorageInfo() const1956*8b6cd535SAndroid Build Coastguard Worker DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
1957*8b6cd535SAndroid Build Coastguard Worker   DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
1958*8b6cd535SAndroid Build Coastguard Worker   int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
1959*8b6cd535SAndroid Build Coastguard Worker   if (directory_size != Filesystem::kBadFileSize) {
1960*8b6cd535SAndroid Build Coastguard Worker     storage_info.set_document_store_size(directory_size);
1961*8b6cd535SAndroid Build Coastguard Worker   } else {
1962*8b6cd535SAndroid Build Coastguard Worker     storage_info.set_document_store_size(-1);
1963*8b6cd535SAndroid Build Coastguard Worker   }
1964*8b6cd535SAndroid Build Coastguard Worker   storage_info.set_num_namespaces(namespace_mapper_->num_keys());
1965*8b6cd535SAndroid Build Coastguard Worker   return CalculateDocumentStatusCounts(std::move(storage_info));
1966*8b6cd535SAndroid Build Coastguard Worker }
1967*8b6cd535SAndroid Build Coastguard Worker 
UpdateSchemaStore(const SchemaStore * schema_store)1968*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
1969*8b6cd535SAndroid Build Coastguard Worker     const SchemaStore* schema_store) {
1970*8b6cd535SAndroid Build Coastguard Worker   // Update all references to the SchemaStore
1971*8b6cd535SAndroid Build Coastguard Worker   schema_store_ = schema_store;
1972*8b6cd535SAndroid Build Coastguard Worker   document_validator_.UpdateSchemaStore(schema_store);
1973*8b6cd535SAndroid Build Coastguard Worker 
1974*8b6cd535SAndroid Build Coastguard Worker   int size = document_id_mapper_->num_elements();
1975*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
1976*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < size; document_id++) {
1977*8b6cd535SAndroid Build Coastguard Worker     auto document_or = Get(document_id);
1978*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(document_or.status())) {
1979*8b6cd535SAndroid Build Coastguard Worker       // Skip nonexistent documents
1980*8b6cd535SAndroid Build Coastguard Worker       continue;
1981*8b6cd535SAndroid Build Coastguard Worker     } else if (!document_or.ok()) {
1982*8b6cd535SAndroid Build Coastguard Worker       // Real error, pass up
1983*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::Annotate(
1984*8b6cd535SAndroid Build Coastguard Worker           document_or.status(),
1985*8b6cd535SAndroid Build Coastguard Worker           IcingStringUtil::StringPrintf(
1986*8b6cd535SAndroid Build Coastguard Worker               "Failed to retrieve Document for DocumentId %d", document_id));
1987*8b6cd535SAndroid Build Coastguard Worker     }
1988*8b6cd535SAndroid Build Coastguard Worker 
1989*8b6cd535SAndroid Build Coastguard Worker     // Guaranteed to have a document now.
1990*8b6cd535SAndroid Build Coastguard Worker     DocumentProto document = document_or.ValueOrDie();
1991*8b6cd535SAndroid Build Coastguard Worker 
1992*8b6cd535SAndroid Build Coastguard Worker     // Revalidate that this document is still compatible
1993*8b6cd535SAndroid Build Coastguard Worker     if (document_validator_.Validate(document).ok()) {
1994*8b6cd535SAndroid Build Coastguard Worker       // Update the SchemaTypeId for this entry
1995*8b6cd535SAndroid Build Coastguard Worker       ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
1996*8b6cd535SAndroid Build Coastguard Worker                              schema_store_->GetSchemaTypeId(document.schema()));
1997*8b6cd535SAndroid Build Coastguard Worker       ICING_ASSIGN_OR_RETURN(
1998*8b6cd535SAndroid Build Coastguard Worker           typename FileBackedVector<DocumentFilterData>::MutableView
1999*8b6cd535SAndroid Build Coastguard Worker               doc_filter_data_view,
2000*8b6cd535SAndroid Build Coastguard Worker           filter_cache_->GetMutable(document_id));
2001*8b6cd535SAndroid Build Coastguard Worker       doc_filter_data_view.Get().set_schema_type_id(schema_type_id);
2002*8b6cd535SAndroid Build Coastguard Worker     } else {
2003*8b6cd535SAndroid Build Coastguard Worker       // Document is no longer valid with the new SchemaStore. Mark as
2004*8b6cd535SAndroid Build Coastguard Worker       // deleted
2005*8b6cd535SAndroid Build Coastguard Worker       auto delete_status =
2006*8b6cd535SAndroid Build Coastguard Worker           Delete(document.namespace_(), document.uri(), current_time_ms);
2007*8b6cd535SAndroid Build Coastguard Worker       if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
2008*8b6cd535SAndroid Build Coastguard Worker         // Real error, pass up
2009*8b6cd535SAndroid Build Coastguard Worker         return delete_status;
2010*8b6cd535SAndroid Build Coastguard Worker       }
2011*8b6cd535SAndroid Build Coastguard Worker     }
2012*8b6cd535SAndroid Build Coastguard Worker   }
2013*8b6cd535SAndroid Build Coastguard Worker 
2014*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
2015*8b6cd535SAndroid Build Coastguard Worker }
2016*8b6cd535SAndroid Build Coastguard Worker 
OptimizedUpdateSchemaStore(const SchemaStore * schema_store,const SchemaStore::SetSchemaResult & set_schema_result)2017*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
2018*8b6cd535SAndroid Build Coastguard Worker     const SchemaStore* schema_store,
2019*8b6cd535SAndroid Build Coastguard Worker     const SchemaStore::SetSchemaResult& set_schema_result) {
2020*8b6cd535SAndroid Build Coastguard Worker   if (!set_schema_result.success) {
2021*8b6cd535SAndroid Build Coastguard Worker     // No new schema was set, no work to be done
2022*8b6cd535SAndroid Build Coastguard Worker     return libtextclassifier3::Status::OK;
2023*8b6cd535SAndroid Build Coastguard Worker   }
2024*8b6cd535SAndroid Build Coastguard Worker 
2025*8b6cd535SAndroid Build Coastguard Worker   // Update all references to the SchemaStore
2026*8b6cd535SAndroid Build Coastguard Worker   schema_store_ = schema_store;
2027*8b6cd535SAndroid Build Coastguard Worker   document_validator_.UpdateSchemaStore(schema_store);
2028*8b6cd535SAndroid Build Coastguard Worker 
2029*8b6cd535SAndroid Build Coastguard Worker   int size = document_id_mapper_->num_elements();
2030*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
2031*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < size; document_id++) {
2032*8b6cd535SAndroid Build Coastguard Worker     if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
2033*8b6cd535SAndroid Build Coastguard Worker       // Skip nonexistent documents
2034*8b6cd535SAndroid Build Coastguard Worker       continue;
2035*8b6cd535SAndroid Build Coastguard Worker     }
2036*8b6cd535SAndroid Build Coastguard Worker 
2037*8b6cd535SAndroid Build Coastguard Worker     // Guaranteed that the document exists now.
2038*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
2039*8b6cd535SAndroid Build Coastguard Worker                            filter_cache_->Get(document_id));
2040*8b6cd535SAndroid Build Coastguard Worker 
2041*8b6cd535SAndroid Build Coastguard Worker     bool delete_document = set_schema_result.schema_types_deleted_by_id.count(
2042*8b6cd535SAndroid Build Coastguard Worker                                filter_data->schema_type_id()) != 0;
2043*8b6cd535SAndroid Build Coastguard Worker 
2044*8b6cd535SAndroid Build Coastguard Worker     // Check if we need to update the FilterCache entry for this document. It
2045*8b6cd535SAndroid Build Coastguard Worker     // may have been assigned a different SchemaTypeId in the new SchemaStore.
2046*8b6cd535SAndroid Build Coastguard Worker     bool update_filter_cache =
2047*8b6cd535SAndroid Build Coastguard Worker         set_schema_result.old_schema_type_ids_changed.count(
2048*8b6cd535SAndroid Build Coastguard Worker             filter_data->schema_type_id()) != 0;
2049*8b6cd535SAndroid Build Coastguard Worker 
2050*8b6cd535SAndroid Build Coastguard Worker     // Check if we need to revalidate this document if the type is now
2051*8b6cd535SAndroid Build Coastguard Worker     // incompatible
2052*8b6cd535SAndroid Build Coastguard Worker     bool revalidate_document =
2053*8b6cd535SAndroid Build Coastguard Worker         set_schema_result.schema_types_incompatible_by_id.count(
2054*8b6cd535SAndroid Build Coastguard Worker             filter_data->schema_type_id()) != 0;
2055*8b6cd535SAndroid Build Coastguard Worker 
2056*8b6cd535SAndroid Build Coastguard Worker     if (update_filter_cache || revalidate_document) {
2057*8b6cd535SAndroid Build Coastguard Worker       ICING_ASSIGN_OR_RETURN(DocumentProto document, Get(document_id));
2058*8b6cd535SAndroid Build Coastguard Worker 
2059*8b6cd535SAndroid Build Coastguard Worker       if (update_filter_cache) {
2060*8b6cd535SAndroid Build Coastguard Worker         ICING_ASSIGN_OR_RETURN(
2061*8b6cd535SAndroid Build Coastguard Worker             SchemaTypeId schema_type_id,
2062*8b6cd535SAndroid Build Coastguard Worker             schema_store_->GetSchemaTypeId(document.schema()));
2063*8b6cd535SAndroid Build Coastguard Worker         ICING_ASSIGN_OR_RETURN(
2064*8b6cd535SAndroid Build Coastguard Worker             typename FileBackedVector<DocumentFilterData>::MutableView
2065*8b6cd535SAndroid Build Coastguard Worker                 doc_filter_data_view,
2066*8b6cd535SAndroid Build Coastguard Worker             filter_cache_->GetMutable(document_id));
2067*8b6cd535SAndroid Build Coastguard Worker         doc_filter_data_view.Get().set_schema_type_id(schema_type_id);
2068*8b6cd535SAndroid Build Coastguard Worker       }
2069*8b6cd535SAndroid Build Coastguard Worker       if (revalidate_document) {
2070*8b6cd535SAndroid Build Coastguard Worker         delete_document = !document_validator_.Validate(document).ok();
2071*8b6cd535SAndroid Build Coastguard Worker       }
2072*8b6cd535SAndroid Build Coastguard Worker     }
2073*8b6cd535SAndroid Build Coastguard Worker 
2074*8b6cd535SAndroid Build Coastguard Worker     if (delete_document) {
2075*8b6cd535SAndroid Build Coastguard Worker       // Document is no longer valid with the new SchemaStore. Mark as deleted
2076*8b6cd535SAndroid Build Coastguard Worker       auto delete_status = Delete(document_id, current_time_ms);
2077*8b6cd535SAndroid Build Coastguard Worker       if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
2078*8b6cd535SAndroid Build Coastguard Worker         // Real error, pass up
2079*8b6cd535SAndroid Build Coastguard Worker         return delete_status;
2080*8b6cd535SAndroid Build Coastguard Worker       }
2081*8b6cd535SAndroid Build Coastguard Worker     }
2082*8b6cd535SAndroid Build Coastguard Worker   }
2083*8b6cd535SAndroid Build Coastguard Worker 
2084*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
2085*8b6cd535SAndroid Build Coastguard Worker }
2086*8b6cd535SAndroid Build Coastguard Worker 
RegenerateScorablePropertyCache(const std::unordered_set<SchemaTypeId> & schema_type_ids)2087*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::RegenerateScorablePropertyCache(
2088*8b6cd535SAndroid Build Coastguard Worker     const std::unordered_set<SchemaTypeId>& schema_type_ids) {
2089*8b6cd535SAndroid Build Coastguard Worker   if (schema_type_ids.empty()) {
2090*8b6cd535SAndroid Build Coastguard Worker     return libtextclassifier3::Status::OK;
2091*8b6cd535SAndroid Build Coastguard Worker   }
2092*8b6cd535SAndroid Build Coastguard Worker 
2093*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
2094*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0;
2095*8b6cd535SAndroid Build Coastguard Worker        document_id < document_id_mapper_->num_elements(); ++document_id) {
2096*8b6cd535SAndroid Build Coastguard Worker     if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
2097*8b6cd535SAndroid Build Coastguard Worker       continue;
2098*8b6cd535SAndroid Build Coastguard Worker     }
2099*8b6cd535SAndroid Build Coastguard Worker     // Guaranteed that the document exists now.
2100*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
2101*8b6cd535SAndroid Build Coastguard Worker                            filter_cache_->Get(document_id));
2102*8b6cd535SAndroid Build Coastguard Worker     SchemaTypeId schema_type_id = filter_data->schema_type_id();
2103*8b6cd535SAndroid Build Coastguard Worker     if (schema_type_ids.find(schema_type_id) == schema_type_ids.end()) {
2104*8b6cd535SAndroid Build Coastguard Worker       continue;
2105*8b6cd535SAndroid Build Coastguard Worker     }
2106*8b6cd535SAndroid Build Coastguard Worker 
2107*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(DocumentProto document, Get(document_id));
2108*8b6cd535SAndroid Build Coastguard Worker     int32_t scorable_property_cache_index = kInvalidScorablePropertyCacheIndex;
2109*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(
2110*8b6cd535SAndroid Build Coastguard Worker         scorable_property_cache_index,
2111*8b6cd535SAndroid Build Coastguard Worker         UpdateScorablePropertyCache(document, schema_type_id));
2112*8b6cd535SAndroid Build Coastguard Worker 
2113*8b6cd535SAndroid Build Coastguard Worker     // Update the score_cache_ with the new scorable property cache index.
2114*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(
2115*8b6cd535SAndroid Build Coastguard Worker         typename FileBackedVector<DocumentAssociatedScoreData>::MutableView
2116*8b6cd535SAndroid Build Coastguard Worker             doc_score_data_view,
2117*8b6cd535SAndroid Build Coastguard Worker         score_cache_->GetMutable(document_id));
2118*8b6cd535SAndroid Build Coastguard Worker     doc_score_data_view.Get().set_scorable_property_cache_index(
2119*8b6cd535SAndroid Build Coastguard Worker         scorable_property_cache_index);
2120*8b6cd535SAndroid Build Coastguard Worker   }
2121*8b6cd535SAndroid Build Coastguard Worker 
2122*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
2123*8b6cd535SAndroid Build Coastguard Worker }
2124*8b6cd535SAndroid Build Coastguard Worker 
2125*8b6cd535SAndroid Build Coastguard Worker // TODO(b/121227117): Implement Optimize()
Optimize()2126*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::Optimize() {
2127*8b6cd535SAndroid Build Coastguard Worker   return libtextclassifier3::Status::OK;
2128*8b6cd535SAndroid Build Coastguard Worker }
2129*8b6cd535SAndroid Build Coastguard Worker 
2130*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
OptimizeInto(const std::string & new_directory,const LanguageSegmenter * lang_segmenter,std::unordered_set<std::string> && potentially_optimizable_blob_handles,OptimizeStatsProto * stats) const2131*8b6cd535SAndroid Build Coastguard Worker DocumentStore::OptimizeInto(
2132*8b6cd535SAndroid Build Coastguard Worker     const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
2133*8b6cd535SAndroid Build Coastguard Worker     std::unordered_set<std::string>&& potentially_optimizable_blob_handles,
2134*8b6cd535SAndroid Build Coastguard Worker     OptimizeStatsProto* stats) const {
2135*8b6cd535SAndroid Build Coastguard Worker   // Validates directory
2136*8b6cd535SAndroid Build Coastguard Worker   if (new_directory == base_dir_) {
2137*8b6cd535SAndroid Build Coastguard Worker     return absl_ports::InvalidArgumentError(
2138*8b6cd535SAndroid Build Coastguard Worker         "New directory is the same as the current one.");
2139*8b6cd535SAndroid Build Coastguard Worker   }
2140*8b6cd535SAndroid Build Coastguard Worker 
2141*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
2142*8b6cd535SAndroid Build Coastguard Worker       auto doc_store_create_result,
2143*8b6cd535SAndroid Build Coastguard Worker       DocumentStore::Create(
2144*8b6cd535SAndroid Build Coastguard Worker           filesystem_, new_directory, &clock_, schema_store_, &feature_flags_,
2145*8b6cd535SAndroid Build Coastguard Worker           /*force_recovery_and_revalidate_documents=*/false, pre_mapping_fbv_,
2146*8b6cd535SAndroid Build Coastguard Worker           use_persistent_hash_map_, compression_level_,
2147*8b6cd535SAndroid Build Coastguard Worker           /*initialize_stats=*/nullptr));
2148*8b6cd535SAndroid Build Coastguard Worker   std::unique_ptr<DocumentStore> new_doc_store =
2149*8b6cd535SAndroid Build Coastguard Worker       std::move(doc_store_create_result.document_store);
2150*8b6cd535SAndroid Build Coastguard Worker 
2151*8b6cd535SAndroid Build Coastguard Worker   // Writes all valid docs into new document store (new directory)
2152*8b6cd535SAndroid Build Coastguard Worker   int document_cnt = document_id_mapper_->num_elements();
2153*8b6cd535SAndroid Build Coastguard Worker   int num_deleted_documents = 0;
2154*8b6cd535SAndroid Build Coastguard Worker   int num_expired_documents = 0;
2155*8b6cd535SAndroid Build Coastguard Worker   UsageStore::UsageScores default_usage;
2156*8b6cd535SAndroid Build Coastguard Worker   OptimizeResult result;
2157*8b6cd535SAndroid Build Coastguard Worker   result.document_id_old_to_new.resize(document_cnt, kInvalidDocumentId);
2158*8b6cd535SAndroid Build Coastguard Worker 
2159*8b6cd535SAndroid Build Coastguard Worker   result.dead_blob_handles = std::move(potentially_optimizable_blob_handles);
2160*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<std::string, std::vector<std::string>>
2161*8b6cd535SAndroid Build Coastguard Worker       type_blob_property_map;
2162*8b6cd535SAndroid Build Coastguard Worker   if (!result.dead_blob_handles.empty()) {
2163*8b6cd535SAndroid Build Coastguard Worker     // Get the blob property map from the schema store.
2164*8b6cd535SAndroid Build Coastguard Worker     if (num_documents() == 0) {
2165*8b6cd535SAndroid Build Coastguard Worker       return result;
2166*8b6cd535SAndroid Build Coastguard Worker     }
2167*8b6cd535SAndroid Build Coastguard Worker     auto type_blob_property_map_or = schema_store_->ConstructBlobPropertyMap();
2168*8b6cd535SAndroid Build Coastguard Worker     if (!type_blob_property_map_or.ok()) {
2169*8b6cd535SAndroid Build Coastguard Worker       // If we fail to retrieve this map when there *are* documents in
2170*8b6cd535SAndroid Build Coastguard Worker       // doc store, then something is seriously wrong. Return error.
2171*8b6cd535SAndroid Build Coastguard Worker       return type_blob_property_map_or.status();
2172*8b6cd535SAndroid Build Coastguard Worker     }
2173*8b6cd535SAndroid Build Coastguard Worker     type_blob_property_map = std::move(type_blob_property_map_or).ValueOrDie();
2174*8b6cd535SAndroid Build Coastguard Worker   }
2175*8b6cd535SAndroid Build Coastguard Worker 
2176*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
2177*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < document_cnt; document_id++) {
2178*8b6cd535SAndroid Build Coastguard Worker     auto document_or = Get(document_id, /*clear_internal_fields=*/false);
2179*8b6cd535SAndroid Build Coastguard Worker     if (absl_ports::IsNotFound(document_or.status())) {
2180*8b6cd535SAndroid Build Coastguard Worker       if (IsDeleted(document_id)) {
2181*8b6cd535SAndroid Build Coastguard Worker         ++num_deleted_documents;
2182*8b6cd535SAndroid Build Coastguard Worker       } else if (!GetNonExpiredDocumentFilterData(document_id,
2183*8b6cd535SAndroid Build Coastguard Worker                                                   current_time_ms)) {
2184*8b6cd535SAndroid Build Coastguard Worker         ++num_expired_documents;
2185*8b6cd535SAndroid Build Coastguard Worker       }
2186*8b6cd535SAndroid Build Coastguard Worker       continue;
2187*8b6cd535SAndroid Build Coastguard Worker     } else if (!document_or.ok()) {
2188*8b6cd535SAndroid Build Coastguard Worker       // Real error, pass up
2189*8b6cd535SAndroid Build Coastguard Worker       return absl_ports::Annotate(
2190*8b6cd535SAndroid Build Coastguard Worker           document_or.status(),
2191*8b6cd535SAndroid Build Coastguard Worker           IcingStringUtil::StringPrintf(
2192*8b6cd535SAndroid Build Coastguard Worker               "Failed to retrieve Document for DocumentId %d", document_id));
2193*8b6cd535SAndroid Build Coastguard Worker     }
2194*8b6cd535SAndroid Build Coastguard Worker 
2195*8b6cd535SAndroid Build Coastguard Worker     // Guaranteed to have a document now.
2196*8b6cd535SAndroid Build Coastguard Worker     DocumentProto document_to_keep = std::move(document_or).ValueOrDie();
2197*8b6cd535SAndroid Build Coastguard Worker     // Remove blobs that still have reference are removed from the
2198*8b6cd535SAndroid Build Coastguard Worker     // expired_blob_handles. So that all remaining are dead blob.
2199*8b6cd535SAndroid Build Coastguard Worker     RemoveAliveBlobHandles(document_to_keep, type_blob_property_map,
2200*8b6cd535SAndroid Build Coastguard Worker                            result.dead_blob_handles);
2201*8b6cd535SAndroid Build Coastguard Worker 
2202*8b6cd535SAndroid Build Coastguard Worker     libtextclassifier3::StatusOr<PutResult> put_result_or;
2203*8b6cd535SAndroid Build Coastguard Worker     if (document_to_keep.internal_fields().length_in_tokens() == 0) {
2204*8b6cd535SAndroid Build Coastguard Worker       auto tokenized_document_or = TokenizedDocument::Create(
2205*8b6cd535SAndroid Build Coastguard Worker           schema_store_, lang_segmenter, document_to_keep);
2206*8b6cd535SAndroid Build Coastguard Worker       if (!tokenized_document_or.ok()) {
2207*8b6cd535SAndroid Build Coastguard Worker         return absl_ports::Annotate(
2208*8b6cd535SAndroid Build Coastguard Worker             tokenized_document_or.status(),
2209*8b6cd535SAndroid Build Coastguard Worker             IcingStringUtil::StringPrintf(
2210*8b6cd535SAndroid Build Coastguard Worker                 "Failed to tokenize Document for DocumentId %d", document_id));
2211*8b6cd535SAndroid Build Coastguard Worker       }
2212*8b6cd535SAndroid Build Coastguard Worker       TokenizedDocument tokenized_document(
2213*8b6cd535SAndroid Build Coastguard Worker           std::move(tokenized_document_or).ValueOrDie());
2214*8b6cd535SAndroid Build Coastguard Worker       put_result_or = new_doc_store->Put(
2215*8b6cd535SAndroid Build Coastguard Worker           std::move(document_to_keep), tokenized_document.num_string_tokens());
2216*8b6cd535SAndroid Build Coastguard Worker     } else {
2217*8b6cd535SAndroid Build Coastguard Worker       // TODO(b/144458732): Implement a more robust version of
2218*8b6cd535SAndroid Build Coastguard Worker       // TC_ASSIGN_OR_RETURN that can support error logging.
2219*8b6cd535SAndroid Build Coastguard Worker       put_result_or = new_doc_store->InternalPut(std::move(document_to_keep));
2220*8b6cd535SAndroid Build Coastguard Worker     }
2221*8b6cd535SAndroid Build Coastguard Worker     if (!put_result_or.ok()) {
2222*8b6cd535SAndroid Build Coastguard Worker       ICING_LOG(ERROR) << put_result_or.status().error_message()
2223*8b6cd535SAndroid Build Coastguard Worker                        << "Failed to write into new document store";
2224*8b6cd535SAndroid Build Coastguard Worker       return put_result_or.status();
2225*8b6cd535SAndroid Build Coastguard Worker     }
2226*8b6cd535SAndroid Build Coastguard Worker 
2227*8b6cd535SAndroid Build Coastguard Worker     DocumentId new_document_id = put_result_or.ValueOrDie().new_document_id;
2228*8b6cd535SAndroid Build Coastguard Worker     result.document_id_old_to_new[document_id] = new_document_id;
2229*8b6cd535SAndroid Build Coastguard Worker 
2230*8b6cd535SAndroid Build Coastguard Worker     // Copy over usage scores.
2231*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
2232*8b6cd535SAndroid Build Coastguard Worker                            usage_store_->GetUsageScores(document_id));
2233*8b6cd535SAndroid Build Coastguard Worker     if (!(usage_scores == default_usage)) {
2234*8b6cd535SAndroid Build Coastguard Worker       // If the usage scores for this document are the default (no usage),
2235*8b6cd535SAndroid Build Coastguard Worker       // then don't bother setting it. No need to possibly allocate storage if
2236*8b6cd535SAndroid Build Coastguard Worker       // there's nothing interesting to store.
2237*8b6cd535SAndroid Build Coastguard Worker       ICING_RETURN_IF_ERROR(
2238*8b6cd535SAndroid Build Coastguard Worker           new_doc_store->SetUsageScores(new_document_id, usage_scores));
2239*8b6cd535SAndroid Build Coastguard Worker     }
2240*8b6cd535SAndroid Build Coastguard Worker   }
2241*8b6cd535SAndroid Build Coastguard Worker   // Construct namespace_id_old_to_new
2242*8b6cd535SAndroid Build Coastguard Worker   int namespace_cnt = namespace_mapper_->num_keys();
2243*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<NamespaceId, std::string> old_namespaces =
2244*8b6cd535SAndroid Build Coastguard Worker       GetNamespaceIdsToNamespaces(namespace_mapper_.get());
2245*8b6cd535SAndroid Build Coastguard Worker   if (namespace_cnt != old_namespaces.size()) {
2246*8b6cd535SAndroid Build Coastguard Worker     // This really shouldn't happen. If it really happens, then:
2247*8b6cd535SAndroid Build Coastguard Worker     // - It won't block DocumentStore optimization, so don't return error here.
2248*8b6cd535SAndroid Build Coastguard Worker     // - Instead, write a warning log here and hint the caller to rebuild index.
2249*8b6cd535SAndroid Build Coastguard Worker     ICING_LOG(WARNING) << "Unexpected old namespace count " << namespace_cnt
2250*8b6cd535SAndroid Build Coastguard Worker                        << " vs " << old_namespaces.size();
2251*8b6cd535SAndroid Build Coastguard Worker     result.should_rebuild_index = true;
2252*8b6cd535SAndroid Build Coastguard Worker   } else {
2253*8b6cd535SAndroid Build Coastguard Worker     result.namespace_id_old_to_new.resize(namespace_cnt, kInvalidNamespaceId);
2254*8b6cd535SAndroid Build Coastguard Worker     for (const auto& [old_namespace_id, ns] : old_namespaces) {
2255*8b6cd535SAndroid Build Coastguard Worker       if (old_namespace_id >= result.namespace_id_old_to_new.size()) {
2256*8b6cd535SAndroid Build Coastguard Worker         // This really shouldn't happen. If it really happens, then:
2257*8b6cd535SAndroid Build Coastguard Worker         // - It won't block DocumentStore optimization, so don't return error
2258*8b6cd535SAndroid Build Coastguard Worker         //   here.
2259*8b6cd535SAndroid Build Coastguard Worker         // - Instead, write a warning log here and hint the caller to rebuild
2260*8b6cd535SAndroid Build Coastguard Worker         //   index.
2261*8b6cd535SAndroid Build Coastguard Worker         ICING_LOG(WARNING) << "Found unexpected namespace id "
2262*8b6cd535SAndroid Build Coastguard Worker                            << old_namespace_id << ". Should be in range 0 to "
2263*8b6cd535SAndroid Build Coastguard Worker                            << result.namespace_id_old_to_new.size()
2264*8b6cd535SAndroid Build Coastguard Worker                            << " (exclusive).";
2265*8b6cd535SAndroid Build Coastguard Worker         result.namespace_id_old_to_new.clear();
2266*8b6cd535SAndroid Build Coastguard Worker         result.should_rebuild_index = true;
2267*8b6cd535SAndroid Build Coastguard Worker         break;
2268*8b6cd535SAndroid Build Coastguard Worker       }
2269*8b6cd535SAndroid Build Coastguard Worker 
2270*8b6cd535SAndroid Build Coastguard Worker       auto new_namespace_id_or = new_doc_store->namespace_mapper_->Get(ns);
2271*8b6cd535SAndroid Build Coastguard Worker       if (!new_namespace_id_or.ok()) {
2272*8b6cd535SAndroid Build Coastguard Worker         if (absl_ports::IsNotFound(new_namespace_id_or.status())) {
2273*8b6cd535SAndroid Build Coastguard Worker           continue;
2274*8b6cd535SAndroid Build Coastguard Worker         }
2275*8b6cd535SAndroid Build Coastguard Worker         // Real error, return it.
2276*8b6cd535SAndroid Build Coastguard Worker         return std::move(new_namespace_id_or).status();
2277*8b6cd535SAndroid Build Coastguard Worker       }
2278*8b6cd535SAndroid Build Coastguard Worker 
2279*8b6cd535SAndroid Build Coastguard Worker       NamespaceId new_namespace_id = new_namespace_id_or.ValueOrDie();
2280*8b6cd535SAndroid Build Coastguard Worker       // Safe to use bracket to assign given that we've checked the range above.
2281*8b6cd535SAndroid Build Coastguard Worker       result.namespace_id_old_to_new[old_namespace_id] = new_namespace_id;
2282*8b6cd535SAndroid Build Coastguard Worker     }
2283*8b6cd535SAndroid Build Coastguard Worker   }
2284*8b6cd535SAndroid Build Coastguard Worker 
2285*8b6cd535SAndroid Build Coastguard Worker   if (stats != nullptr) {
2286*8b6cd535SAndroid Build Coastguard Worker     stats->set_num_original_documents(document_cnt);
2287*8b6cd535SAndroid Build Coastguard Worker     stats->set_num_deleted_documents(num_deleted_documents);
2288*8b6cd535SAndroid Build Coastguard Worker     stats->set_num_expired_documents(num_expired_documents);
2289*8b6cd535SAndroid Build Coastguard Worker     stats->set_num_original_namespaces(namespace_cnt);
2290*8b6cd535SAndroid Build Coastguard Worker     stats->set_num_deleted_namespaces(
2291*8b6cd535SAndroid Build Coastguard Worker         namespace_cnt - new_doc_store->namespace_mapper_->num_keys());
2292*8b6cd535SAndroid Build Coastguard Worker   }
2293*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
2294*8b6cd535SAndroid Build Coastguard Worker   return result;
2295*8b6cd535SAndroid Build Coastguard Worker }
2296*8b6cd535SAndroid Build Coastguard Worker 
2297*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentStore::OptimizeInfo>
GetOptimizeInfo() const2298*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetOptimizeInfo() const {
2299*8b6cd535SAndroid Build Coastguard Worker   OptimizeInfo optimize_info;
2300*8b6cd535SAndroid Build Coastguard Worker 
2301*8b6cd535SAndroid Build Coastguard Worker   // Figure out our ratio of optimizable/total docs.
2302*8b6cd535SAndroid Build Coastguard Worker   int32_t num_documents = document_id_mapper_->num_elements();
2303*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
2304*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
2305*8b6cd535SAndroid Build Coastguard Worker        ++document_id) {
2306*8b6cd535SAndroid Build Coastguard Worker     if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
2307*8b6cd535SAndroid Build Coastguard Worker       ++optimize_info.optimizable_docs;
2308*8b6cd535SAndroid Build Coastguard Worker     }
2309*8b6cd535SAndroid Build Coastguard Worker 
2310*8b6cd535SAndroid Build Coastguard Worker     ++optimize_info.total_docs;
2311*8b6cd535SAndroid Build Coastguard Worker   }
2312*8b6cd535SAndroid Build Coastguard Worker 
2313*8b6cd535SAndroid Build Coastguard Worker   if (optimize_info.total_docs == 0) {
2314*8b6cd535SAndroid Build Coastguard Worker     // Can exit early since there's nothing to calculate.
2315*8b6cd535SAndroid Build Coastguard Worker     return optimize_info;
2316*8b6cd535SAndroid Build Coastguard Worker   }
2317*8b6cd535SAndroid Build Coastguard Worker 
2318*8b6cd535SAndroid Build Coastguard Worker   // Get the total element size.
2319*8b6cd535SAndroid Build Coastguard Worker   //
2320*8b6cd535SAndroid Build Coastguard Worker   // We use file size instead of disk usage here because the files are not
2321*8b6cd535SAndroid Build Coastguard Worker   // sparse, so it's more accurate. Disk usage rounds up to the nearest block
2322*8b6cd535SAndroid Build Coastguard Worker   // size.
2323*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t document_log_file_size,
2324*8b6cd535SAndroid Build Coastguard Worker                          document_log_->GetElementsFileSize());
2325*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_file_size,
2326*8b6cd535SAndroid Build Coastguard Worker                          document_id_mapper_->GetElementsFileSize());
2327*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t score_cache_file_size,
2328*8b6cd535SAndroid Build Coastguard Worker                          score_cache_->GetElementsFileSize());
2329*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t scorable_property_cache_file_size,
2330*8b6cd535SAndroid Build Coastguard Worker                          scorable_property_cache_->GetElementsFileSize());
2331*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_file_size,
2332*8b6cd535SAndroid Build Coastguard Worker                          filter_cache_->GetElementsFileSize());
2333*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_file_size,
2334*8b6cd535SAndroid Build Coastguard Worker                          corpus_score_cache_->GetElementsFileSize());
2335*8b6cd535SAndroid Build Coastguard Worker 
2336*8b6cd535SAndroid Build Coastguard Worker   // Usage store might be sparse, but we'll still use file size for more
2337*8b6cd535SAndroid Build Coastguard Worker   // accurate counting.
2338*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t usage_store_file_size,
2339*8b6cd535SAndroid Build Coastguard Worker                          usage_store_->GetElementsFileSize());
2340*8b6cd535SAndroid Build Coastguard Worker 
2341*8b6cd535SAndroid Build Coastguard Worker   // We use a combined disk usage and file size for the DynamicTrieKeyMapper
2342*8b6cd535SAndroid Build Coastguard Worker   // because it's backed by a trie, which has some sparse property bitmaps.
2343*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
2344*8b6cd535SAndroid Build Coastguard Worker                          document_key_mapper_->GetElementsSize());
2345*8b6cd535SAndroid Build Coastguard Worker 
2346*8b6cd535SAndroid Build Coastguard Worker   // We don't include the namespace_mapper or the corpus_mapper because it's
2347*8b6cd535SAndroid Build Coastguard Worker   // not clear if we could recover any space even if Optimize were called.
2348*8b6cd535SAndroid Build Coastguard Worker   // Deleting 100s of documents could still leave a few documents of a
2349*8b6cd535SAndroid Build Coastguard Worker   // namespace, and then there would be no change.
2350*8b6cd535SAndroid Build Coastguard Worker 
2351*8b6cd535SAndroid Build Coastguard Worker   int64_t total_size = document_log_file_size + document_key_mapper_size +
2352*8b6cd535SAndroid Build Coastguard Worker                        document_id_mapper_file_size + score_cache_file_size +
2353*8b6cd535SAndroid Build Coastguard Worker                        scorable_property_cache_file_size +
2354*8b6cd535SAndroid Build Coastguard Worker                        filter_cache_file_size + corpus_score_cache_file_size +
2355*8b6cd535SAndroid Build Coastguard Worker                        usage_store_file_size;
2356*8b6cd535SAndroid Build Coastguard Worker 
2357*8b6cd535SAndroid Build Coastguard Worker   optimize_info.estimated_optimizable_bytes =
2358*8b6cd535SAndroid Build Coastguard Worker       total_size * optimize_info.optimizable_docs / optimize_info.total_docs;
2359*8b6cd535SAndroid Build Coastguard Worker   return optimize_info;
2360*8b6cd535SAndroid Build Coastguard Worker }
2361*8b6cd535SAndroid Build Coastguard Worker 
UpdateCorpusAssociatedScoreCache(CorpusId corpus_id,const CorpusAssociatedScoreData & score_data)2362*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::UpdateCorpusAssociatedScoreCache(
2363*8b6cd535SAndroid Build Coastguard Worker     CorpusId corpus_id, const CorpusAssociatedScoreData& score_data) {
2364*8b6cd535SAndroid Build Coastguard Worker   return corpus_score_cache_->Set(corpus_id, score_data);
2365*8b6cd535SAndroid Build Coastguard Worker }
2366*8b6cd535SAndroid Build Coastguard Worker 
UpdateDocumentAssociatedScoreCache(DocumentId document_id,const DocumentAssociatedScoreData & score_data)2367*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::UpdateDocumentAssociatedScoreCache(
2368*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, const DocumentAssociatedScoreData& score_data) {
2369*8b6cd535SAndroid Build Coastguard Worker   return score_cache_->Set(document_id, score_data);
2370*8b6cd535SAndroid Build Coastguard Worker }
2371*8b6cd535SAndroid Build Coastguard Worker 
UpdateFilterCache(DocumentId document_id,const DocumentFilterData & filter_data)2372*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::UpdateFilterCache(
2373*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, const DocumentFilterData& filter_data) {
2374*8b6cd535SAndroid Build Coastguard Worker   return filter_cache_->Set(document_id, filter_data);
2375*8b6cd535SAndroid Build Coastguard Worker }
2376*8b6cd535SAndroid Build Coastguard Worker 
ClearDerivedData(DocumentId document_id)2377*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::ClearDerivedData(
2378*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id) {
2379*8b6cd535SAndroid Build Coastguard Worker   // We intentionally leave the data in key_mapper_ because locating that data
2380*8b6cd535SAndroid Build Coastguard Worker   // requires fetching namespace and uri. Leaving data in key_mapper_ should
2381*8b6cd535SAndroid Build Coastguard Worker   // be fine because the data is hashed.
2382*8b6cd535SAndroid Build Coastguard Worker 
2383*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(document_id_mapper_->Set(document_id, kDocDeletedFlag));
2384*8b6cd535SAndroid Build Coastguard Worker 
2385*8b6cd535SAndroid Build Coastguard Worker   // Resets the score cache entry
2386*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
2387*8b6cd535SAndroid Build Coastguard Worker       document_id,
2388*8b6cd535SAndroid Build Coastguard Worker       DocumentAssociatedScoreData(
2389*8b6cd535SAndroid Build Coastguard Worker           kInvalidCorpusId,
2390*8b6cd535SAndroid Build Coastguard Worker           /*document_score=*/-1,
2391*8b6cd535SAndroid Build Coastguard Worker           /*creation_timestamp_ms=*/-1,
2392*8b6cd535SAndroid Build Coastguard Worker           /*scorable_property_cache_index=*/kInvalidScorablePropertyCacheIndex,
2393*8b6cd535SAndroid Build Coastguard Worker           /*length_in_tokens=*/0)));
2394*8b6cd535SAndroid Build Coastguard Worker 
2395*8b6cd535SAndroid Build Coastguard Worker   // Resets the filter cache entry
2396*8b6cd535SAndroid Build Coastguard Worker   ICING_RETURN_IF_ERROR(UpdateFilterCache(
2397*8b6cd535SAndroid Build Coastguard Worker       document_id,
2398*8b6cd535SAndroid Build Coastguard Worker       DocumentFilterData(kInvalidNamespaceId, /*uri_fingerprint=*/0,
2399*8b6cd535SAndroid Build Coastguard Worker                          kInvalidSchemaTypeId,
2400*8b6cd535SAndroid Build Coastguard Worker                          /*expiration_timestamp_ms=*/-1)));
2401*8b6cd535SAndroid Build Coastguard Worker 
2402*8b6cd535SAndroid Build Coastguard Worker   // Clears the usage scores.
2403*8b6cd535SAndroid Build Coastguard Worker   return usage_store_->DeleteUsageScores(document_id);
2404*8b6cd535SAndroid Build Coastguard Worker }
2405*8b6cd535SAndroid Build Coastguard Worker 
SetUsageScores(DocumentId document_id,const UsageStore::UsageScores & usage_scores)2406*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status DocumentStore::SetUsageScores(
2407*8b6cd535SAndroid Build Coastguard Worker     DocumentId document_id, const UsageStore::UsageScores& usage_scores) {
2408*8b6cd535SAndroid Build Coastguard Worker   return usage_store_->SetUsageScores(document_id, usage_scores);
2409*8b6cd535SAndroid Build Coastguard Worker }
2410*8b6cd535SAndroid Build Coastguard Worker 
2411*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<
2412*8b6cd535SAndroid Build Coastguard Worker     google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
CollectCorpusInfo() const2413*8b6cd535SAndroid Build Coastguard Worker DocumentStore::CollectCorpusInfo() const {
2414*8b6cd535SAndroid Build Coastguard Worker   google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo> corpus_info;
2415*8b6cd535SAndroid Build Coastguard Worker   libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
2416*8b6cd535SAndroid Build Coastguard Worker       schema_store_->GetSchema();
2417*8b6cd535SAndroid Build Coastguard Worker   if (!schema_proto_or.ok()) {
2418*8b6cd535SAndroid Build Coastguard Worker     return corpus_info;
2419*8b6cd535SAndroid Build Coastguard Worker   }
2420*8b6cd535SAndroid Build Coastguard Worker   // Maps from CorpusId to the corresponding protocol buffer in the result.
2421*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<CorpusId, DocumentDebugInfoProto::CorpusInfo*> info_map;
2422*8b6cd535SAndroid Build Coastguard Worker   std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
2423*8b6cd535SAndroid Build Coastguard Worker       GetNamespaceIdsToNamespaces(namespace_mapper_.get());
2424*8b6cd535SAndroid Build Coastguard Worker   const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
2425*8b6cd535SAndroid Build Coastguard Worker   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
2426*8b6cd535SAndroid Build Coastguard Worker   for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
2427*8b6cd535SAndroid Build Coastguard Worker        ++document_id) {
2428*8b6cd535SAndroid Build Coastguard Worker     if (!GetAliveDocumentFilterData(document_id, current_time_ms)) {
2429*8b6cd535SAndroid Build Coastguard Worker       continue;
2430*8b6cd535SAndroid Build Coastguard Worker     }
2431*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
2432*8b6cd535SAndroid Build Coastguard Worker                            filter_cache_->Get(document_id));
2433*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(const DocumentAssociatedScoreData* score_data,
2434*8b6cd535SAndroid Build Coastguard Worker                            score_cache_->Get(document_id));
2435*8b6cd535SAndroid Build Coastguard Worker     const std::string& name_space =
2436*8b6cd535SAndroid Build Coastguard Worker         namespace_id_to_namespace[filter_data->namespace_id()];
2437*8b6cd535SAndroid Build Coastguard Worker     const std::string& schema =
2438*8b6cd535SAndroid Build Coastguard Worker         schema_proto->types()[filter_data->schema_type_id()].schema_type();
2439*8b6cd535SAndroid Build Coastguard Worker     auto iter = info_map.find(score_data->corpus_id());
2440*8b6cd535SAndroid Build Coastguard Worker     if (iter == info_map.end()) {
2441*8b6cd535SAndroid Build Coastguard Worker       DocumentDebugInfoProto::CorpusInfo* entry = corpus_info.Add();
2442*8b6cd535SAndroid Build Coastguard Worker       entry->set_namespace_(name_space);
2443*8b6cd535SAndroid Build Coastguard Worker       entry->set_schema(schema);
2444*8b6cd535SAndroid Build Coastguard Worker       iter = info_map.insert({score_data->corpus_id(), entry}).first;
2445*8b6cd535SAndroid Build Coastguard Worker     }
2446*8b6cd535SAndroid Build Coastguard Worker     iter->second->set_total_documents(iter->second->total_documents() + 1);
2447*8b6cd535SAndroid Build Coastguard Worker     iter->second->set_total_token(iter->second->total_token() +
2448*8b6cd535SAndroid Build Coastguard Worker                                   score_data->length_in_tokens());
2449*8b6cd535SAndroid Build Coastguard Worker   }
2450*8b6cd535SAndroid Build Coastguard Worker   return corpus_info;
2451*8b6cd535SAndroid Build Coastguard Worker }
2452*8b6cd535SAndroid Build Coastguard Worker 
2453*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentDebugInfoProto>
GetDebugInfo(int verbosity) const2454*8b6cd535SAndroid Build Coastguard Worker DocumentStore::GetDebugInfo(int verbosity) const {
2455*8b6cd535SAndroid Build Coastguard Worker   DocumentDebugInfoProto debug_info;
2456*8b6cd535SAndroid Build Coastguard Worker   *debug_info.mutable_document_storage_info() = GetStorageInfo();
2457*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(Crc32 crc, GetChecksum());
2458*8b6cd535SAndroid Build Coastguard Worker   debug_info.set_crc(crc.Get());
2459*8b6cd535SAndroid Build Coastguard Worker   if (verbosity > 0) {
2460*8b6cd535SAndroid Build Coastguard Worker     ICING_ASSIGN_OR_RETURN(
2461*8b6cd535SAndroid Build Coastguard Worker         google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
2462*8b6cd535SAndroid Build Coastguard Worker             corpus_info,
2463*8b6cd535SAndroid Build Coastguard Worker         CollectCorpusInfo());
2464*8b6cd535SAndroid Build Coastguard Worker     *debug_info.mutable_corpus_info() = std::move(corpus_info);
2465*8b6cd535SAndroid Build Coastguard Worker   }
2466*8b6cd535SAndroid Build Coastguard Worker   return debug_info;
2467*8b6cd535SAndroid Build Coastguard Worker }
2468*8b6cd535SAndroid Build Coastguard Worker 
UpdateScorablePropertyCache(const DocumentProto & document,SchemaTypeId schema_type_id)2469*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<int> DocumentStore::UpdateScorablePropertyCache(
2470*8b6cd535SAndroid Build Coastguard Worker     const DocumentProto& document, SchemaTypeId schema_type_id) {
2471*8b6cd535SAndroid Build Coastguard Worker   if (!feature_flags_.enable_scorable_properties()) {
2472*8b6cd535SAndroid Build Coastguard Worker     return kInvalidScorablePropertyCacheIndex;
2473*8b6cd535SAndroid Build Coastguard Worker   }
2474*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
2475*8b6cd535SAndroid Build Coastguard Worker       const std::vector<ScorablePropertyManager::ScorablePropertyInfo>*
2476*8b6cd535SAndroid Build Coastguard Worker           ordered_scorable_property_info,
2477*8b6cd535SAndroid Build Coastguard Worker       schema_store_->GetOrderedScorablePropertyInfo(schema_type_id));
2478*8b6cd535SAndroid Build Coastguard Worker   if (ordered_scorable_property_info == nullptr ||
2479*8b6cd535SAndroid Build Coastguard Worker       ordered_scorable_property_info->empty()) {
2480*8b6cd535SAndroid Build Coastguard Worker     // No scorable property defined under the schema config of the
2481*8b6cd535SAndroid Build Coastguard Worker     // schema_type_id.
2482*8b6cd535SAndroid Build Coastguard Worker     return kInvalidScorablePropertyCacheIndex;
2483*8b6cd535SAndroid Build Coastguard Worker   }
2484*8b6cd535SAndroid Build Coastguard Worker   ICING_ASSIGN_OR_RETURN(
2485*8b6cd535SAndroid Build Coastguard Worker       std::unique_ptr<ScorablePropertySet> scorable_property_set,
2486*8b6cd535SAndroid Build Coastguard Worker       ScorablePropertySet::Create(document, schema_type_id, schema_store_));
2487*8b6cd535SAndroid Build Coastguard Worker 
2488*8b6cd535SAndroid Build Coastguard Worker   return scorable_property_cache_->Write(
2489*8b6cd535SAndroid Build Coastguard Worker       scorable_property_set->GetScorablePropertySetProto());
2490*8b6cd535SAndroid Build Coastguard Worker }
2491*8b6cd535SAndroid Build Coastguard Worker 
2492*8b6cd535SAndroid Build Coastguard Worker }  // namespace lib
2493*8b6cd535SAndroid Build Coastguard Worker }  // namespace icing
2494