xref: /aosp_15_r20/external/icing/icing/store/dynamic-trie-key-mapper.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
16 #define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
17 
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/absl_ports/str_join.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/legacy/index/icing-dynamic-trie.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/store/key-mapper.h"
34 #include "icing/util/crc32.h"
35 #include "icing/util/logging.h"
36 #include "icing/util/status-macros.h"
37 
38 namespace icing {
39 namespace lib {
40 
41 // File-backed mapping between the string key and a trivially copyable value
42 // type.
43 //
44 // DynamicTrieKeyMapper is thread-compatible
45 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
46 class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
47  public:
48   // Returns an initialized instance of DynamicTrieKeyMapper that can
49   // immediately handle read/write operations.
50   // Returns any encountered IO errors.
51   //
52   // base_dir : Base directory used to save all the files required to persist
53   //            DynamicTrieKeyMapper. If this base_dir was previously used to
54   //            create a DynamicTrieKeyMapper, then this existing data would be
55   //            loaded. Otherwise, an empty DynamicTrieKeyMapper would be
56   //            created.
57   // maximum_size_bytes : The maximum allowable size of the key mapper storage.
58   static libtextclassifier3::StatusOr<
59       std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
60   Create(const Filesystem& filesystem, std::string_view base_dir,
61          int maximum_size_bytes);
62 
63   // Deletes all the files associated with the DynamicTrieKeyMapper.
64   //
65   // base_dir : Base directory used to save all the files required to persist
66   //            DynamicTrieKeyMapper. Should be the same as passed into
67   //            Create().
68   //
69   // Returns
70   //   OK on success
71   //   INTERNAL_ERROR on I/O error
72   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
73                                            std::string_view base_dir);
74 
75   ~DynamicTrieKeyMapper() override = default;
76 
77   libtextclassifier3::Status Put(std::string_view key, T value) override;
78 
79   libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
80                                            T next_value) override;
81 
82   libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
83 
84   bool Delete(std::string_view key) override;
85 
86   std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
87       const override;
88 
num_keys()89   int32_t num_keys() const override { return trie_.size(); }
90 
91   libtextclassifier3::Status PersistToDisk() override;
92 
93   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
94 
95   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
96 
97   libtextclassifier3::StatusOr<Crc32> UpdateChecksum() override;
98 
99   libtextclassifier3::StatusOr<Crc32> GetChecksum() const override;
100 
101  private:
102   class Iterator : public KeyMapper<T, Formatter>::Iterator {
103    public:
Iterator(const IcingDynamicTrie & trie)104     explicit Iterator(const IcingDynamicTrie& trie)
105         : itr_(trie, /*prefix=*/""), start_(true) {}
106 
107     ~Iterator() override = default;
108 
Advance()109     bool Advance() override {
110       if (start_) {
111         start_ = false;
112         return itr_.IsValid();
113       }
114       return itr_.Advance();
115     }
116 
GetKey()117     std::string_view GetKey() const override { return itr_.GetKey(); }
118 
GetValue()119     T GetValue() const override {
120       T value;
121       memcpy(&value, itr_.GetValue(), sizeof(T));
122       return value;
123     }
124 
125    private:
126     IcingDynamicTrie::Iterator itr_;
127 
128     // TODO(b/241784804): remove this flag after changing IcingDynamicTrie to
129     //                    follow the common iterator pattern in our codebase.
130     bool start_;
131   };
132 
133   static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
134   static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
135 
136   // Use DynamicTrieKeyMapper::Create() to instantiate.
137   explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
138 
139   // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
140   // instance of DynamicTrieKeyMapper on disk and gets ready to process
141   // read/write operations.
142   //
143   // Returns any encountered IO errors.
144   libtextclassifier3::Status Initialize(int maximum_size_bytes);
145 
146   const std::string file_prefix_;
147 
148   // TODO(adorokhine) Filesystem is a forked class that's available both in
149   // icing and icing namespaces. We will need icing::Filesystem in order
150   // to use IcingDynamicTrie. Filesystem class should be fully refactored
151   // to have a single definition across both namespaces. Such a class should
152   // use icing (and general google3) coding conventions and behave like
153   // a proper C++ class.
154   const IcingFilesystem icing_filesystem_;
155   IcingDynamicTrie trie_;
156 
157   static_assert(std::is_trivially_copyable<T>::value,
158                 "T must be trivially copyable");
159 };
160 
161 template <typename T, typename Formatter>
162 libtextclassifier3::StatusOr<
163     std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)164 DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
165                                            std::string_view base_dir,
166                                            int maximum_size_bytes) {
167   // We create a subdirectory since the trie creates and stores multiple files.
168   // This makes it easier to isolate the trie files away from other files that
169   // could potentially be in the same base_dir, and makes it easier to delete.
170   const std::string key_mapper_dir =
171       absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
172   if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
173     return absl_ports::InternalError(absl_ports::StrCat(
174         "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
175   }
176   auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
177       new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
178   ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
179   return mapper;
180 }
181 
182 template <typename T, typename Formatter>
Delete(const Filesystem & filesystem,std::string_view base_dir)183 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
184     const Filesystem& filesystem, std::string_view base_dir) {
185   std::string key_mapper_dir =
186       absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
187   if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
188     return absl_ports::InternalError(absl_ports::StrCat(
189         "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
190   }
191   return libtextclassifier3::Status::OK;
192 }
193 
194 template <typename T, typename Formatter>
DynamicTrieKeyMapper(std::string_view key_mapper_dir)195 DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
196     std::string_view key_mapper_dir)
197     : file_prefix_(
198           absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
199       trie_(file_prefix_,
200             IcingDynamicTrie::RuntimeOptions().set_storage_policy(
201                 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
202             &icing_filesystem_) {}
203 
204 template <typename T, typename Formatter>
Initialize(int maximum_size_bytes)205 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
206     int maximum_size_bytes) {
207   IcingDynamicTrie::Options options;
208   // Divide the max space between the three internal arrays: nodes, nexts and
209   // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
210   // MaxSuffixesSize is in units of bytes.
211   options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
212   options.max_nexts = options.max_nodes;
213   options.max_suffixes_size =
214       sizeof(IcingDynamicTrie::Node) * options.max_nodes;
215   options.value_size = sizeof(T);
216 
217   if (!trie_.CreateIfNotExist(options)) {
218     return absl_ports::InternalError(absl_ports::StrCat(
219         "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
220   }
221   if (!trie_.Init()) {
222     return absl_ports::InternalError(absl_ports::StrCat(
223         "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
224   }
225   return libtextclassifier3::Status::OK;
226 }
227 
228 template <typename T, typename Formatter>
GetOrPut(std::string_view key,T next_value)229 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
230     std::string_view key, T next_value) {
231   uint32_t value_index;
232   libtextclassifier3::Status status =
233       trie_.Insert(key, &next_value, &value_index,
234                    /*replace=*/false);
235   if (!status.ok()) {
236     ICING_LOG(DBG) << "Unable to insert key " << key
237                    << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
238                    << status.error_message();
239     return status;
240   }
241   // This memory address could be unaligned since we're just grabbing the value
242   // from somewhere in the trie's suffix array. The suffix array is filled with
243   // chars, so the address might not be aligned to T values.
244   const T* unaligned_value =
245       static_cast<const T*>(trie_.GetValueAtIndex(value_index));
246 
247   // memcpy the value to ensure that the returned value here is in a T-aligned
248   // address
249   T aligned_value;
250   memcpy(&aligned_value, unaligned_value, sizeof(T));
251   return aligned_value;
252 }
253 
254 template <typename T, typename Formatter>
Put(std::string_view key,T value)255 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
256     std::string_view key, T value) {
257   libtextclassifier3::Status status = trie_.Insert(key, &value);
258   if (!status.ok()) {
259     ICING_LOG(DBG) << "Unable to insert key " << key
260                    << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
261                    << status.error_message();
262     return status;
263   }
264   return libtextclassifier3::Status::OK;
265 }
266 
267 template <typename T, typename Formatter>
Get(std::string_view key)268 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
269     std::string_view key) const {
270   T value;
271   if (!trie_.Find(key, &value)) {
272     return absl_ports::NotFoundError(
273         absl_ports::StrCat("Key not found ", Formatter()(key),
274                            " in DynamicTrieKeyMapper ", file_prefix_, "."));
275   }
276   return value;
277 }
278 
279 template <typename T, typename Formatter>
Delete(std::string_view key)280 bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
281   return trie_.Delete(key);
282 }
283 
284 template <typename T, typename Formatter>
285 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator>
GetIterator()286 DynamicTrieKeyMapper<T, Formatter>::GetIterator() const {
287   return std::make_unique<DynamicTrieKeyMapper<T, Formatter>::Iterator>(trie_);
288 }
289 
290 template <typename T, typename Formatter>
PersistToDisk()291 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
292   if (!trie_.Sync()) {
293     return absl_ports::InternalError(absl_ports::StrCat(
294         "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
295   }
296 
297   return libtextclassifier3::Status::OK;
298 }
299 
300 template <typename T, typename Formatter>
301 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()302 DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
303   int64_t size = trie_.GetDiskUsage();
304   if (size == IcingFilesystem::kBadFileSize || size < 0) {
305     return absl_ports::InternalError("Failed to get disk usage of key mapper");
306   }
307   return size;
308 }
309 
310 template <typename T, typename Formatter>
311 libtextclassifier3::StatusOr<int64_t>
GetElementsSize()312 DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
313   int64_t size = trie_.GetElementsSize();
314   if (size == IcingFilesystem::kBadFileSize || size < 0) {
315     return absl_ports::InternalError(
316         "Failed to get disk usage of elements in the key mapper");
317   }
318   return size;
319 }
320 
321 template <typename T, typename Formatter>
322 libtextclassifier3::StatusOr<Crc32>
UpdateChecksum()323 DynamicTrieKeyMapper<T, Formatter>::UpdateChecksum() {
324   return trie_.UpdateCrc();
325 }
326 
327 template <typename T, typename Formatter>
328 libtextclassifier3::StatusOr<Crc32>
GetChecksum()329 DynamicTrieKeyMapper<T, Formatter>::GetChecksum() const {
330   return trie_.GetCrc();
331 }
332 
333 }  // namespace lib
334 }  // namespace icing
335 
336 #endif  // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
337