1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
16 #define ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
17
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/absl_ports/str_join.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/legacy/index/icing-dynamic-trie.h"
32 #include "icing/legacy/index/icing-filesystem.h"
33 #include "icing/store/key-mapper.h"
34 #include "icing/util/crc32.h"
35 #include "icing/util/logging.h"
36 #include "icing/util/status-macros.h"
37
38 namespace icing {
39 namespace lib {
40
41 // File-backed mapping between the string key and a trivially copyable value
42 // type.
43 //
44 // DynamicTrieKeyMapper is thread-compatible
45 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
46 class DynamicTrieKeyMapper : public KeyMapper<T, Formatter> {
47 public:
48 // Returns an initialized instance of DynamicTrieKeyMapper that can
49 // immediately handle read/write operations.
50 // Returns any encountered IO errors.
51 //
52 // base_dir : Base directory used to save all the files required to persist
53 // DynamicTrieKeyMapper. If this base_dir was previously used to
54 // create a DynamicTrieKeyMapper, then this existing data would be
55 // loaded. Otherwise, an empty DynamicTrieKeyMapper would be
56 // created.
57 // maximum_size_bytes : The maximum allowable size of the key mapper storage.
58 static libtextclassifier3::StatusOr<
59 std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
60 Create(const Filesystem& filesystem, std::string_view base_dir,
61 int maximum_size_bytes);
62
63 // Deletes all the files associated with the DynamicTrieKeyMapper.
64 //
65 // base_dir : Base directory used to save all the files required to persist
66 // DynamicTrieKeyMapper. Should be the same as passed into
67 // Create().
68 //
69 // Returns
70 // OK on success
71 // INTERNAL_ERROR on I/O error
72 static libtextclassifier3::Status Delete(const Filesystem& filesystem,
73 std::string_view base_dir);
74
75 ~DynamicTrieKeyMapper() override = default;
76
77 libtextclassifier3::Status Put(std::string_view key, T value) override;
78
79 libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
80 T next_value) override;
81
82 libtextclassifier3::StatusOr<T> Get(std::string_view key) const override;
83
84 bool Delete(std::string_view key) override;
85
86 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
87 const override;
88
num_keys()89 int32_t num_keys() const override { return trie_.size(); }
90
91 libtextclassifier3::Status PersistToDisk() override;
92
93 libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override;
94
95 libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override;
96
97 libtextclassifier3::StatusOr<Crc32> UpdateChecksum() override;
98
99 libtextclassifier3::StatusOr<Crc32> GetChecksum() const override;
100
101 private:
102 class Iterator : public KeyMapper<T, Formatter>::Iterator {
103 public:
Iterator(const IcingDynamicTrie & trie)104 explicit Iterator(const IcingDynamicTrie& trie)
105 : itr_(trie, /*prefix=*/""), start_(true) {}
106
107 ~Iterator() override = default;
108
Advance()109 bool Advance() override {
110 if (start_) {
111 start_ = false;
112 return itr_.IsValid();
113 }
114 return itr_.Advance();
115 }
116
GetKey()117 std::string_view GetKey() const override { return itr_.GetKey(); }
118
GetValue()119 T GetValue() const override {
120 T value;
121 memcpy(&value, itr_.GetValue(), sizeof(T));
122 return value;
123 }
124
125 private:
126 IcingDynamicTrie::Iterator itr_;
127
128 // TODO(b/241784804): remove this flag after changing IcingDynamicTrie to
129 // follow the common iterator pattern in our codebase.
130 bool start_;
131 };
132
133 static constexpr char kDynamicTrieKeyMapperDir[] = "key_mapper_dir";
134 static constexpr char kDynamicTrieKeyMapperPrefix[] = "key_mapper";
135
136 // Use DynamicTrieKeyMapper::Create() to instantiate.
137 explicit DynamicTrieKeyMapper(std::string_view key_mapper_dir);
138
139 // Load any existing DynamicTrieKeyMapper data from disk, or creates a new
140 // instance of DynamicTrieKeyMapper on disk and gets ready to process
141 // read/write operations.
142 //
143 // Returns any encountered IO errors.
144 libtextclassifier3::Status Initialize(int maximum_size_bytes);
145
146 const std::string file_prefix_;
147
148 // TODO(adorokhine) Filesystem is a forked class that's available both in
149 // icing and icing namespaces. We will need icing::Filesystem in order
150 // to use IcingDynamicTrie. Filesystem class should be fully refactored
151 // to have a single definition across both namespaces. Such a class should
152 // use icing (and general google3) coding conventions and behave like
153 // a proper C++ class.
154 const IcingFilesystem icing_filesystem_;
155 IcingDynamicTrie trie_;
156
157 static_assert(std::is_trivially_copyable<T>::value,
158 "T must be trivially copyable");
159 };
160
161 template <typename T, typename Formatter>
162 libtextclassifier3::StatusOr<
163 std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)164 DynamicTrieKeyMapper<T, Formatter>::Create(const Filesystem& filesystem,
165 std::string_view base_dir,
166 int maximum_size_bytes) {
167 // We create a subdirectory since the trie creates and stores multiple files.
168 // This makes it easier to isolate the trie files away from other files that
169 // could potentially be in the same base_dir, and makes it easier to delete.
170 const std::string key_mapper_dir =
171 absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
172 if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
173 return absl_ports::InternalError(absl_ports::StrCat(
174 "Failed to create DynamicTrieKeyMapper directory: ", key_mapper_dir));
175 }
176 auto mapper = std::unique_ptr<DynamicTrieKeyMapper<T, Formatter>>(
177 new DynamicTrieKeyMapper<T, Formatter>(key_mapper_dir));
178 ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
179 return mapper;
180 }
181
182 template <typename T, typename Formatter>
Delete(const Filesystem & filesystem,std::string_view base_dir)183 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Delete(
184 const Filesystem& filesystem, std::string_view base_dir) {
185 std::string key_mapper_dir =
186 absl_ports::StrCat(base_dir, "/", kDynamicTrieKeyMapperDir);
187 if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
188 return absl_ports::InternalError(absl_ports::StrCat(
189 "Failed to delete DynamicTrieKeyMapper directory: ", key_mapper_dir));
190 }
191 return libtextclassifier3::Status::OK;
192 }
193
194 template <typename T, typename Formatter>
DynamicTrieKeyMapper(std::string_view key_mapper_dir)195 DynamicTrieKeyMapper<T, Formatter>::DynamicTrieKeyMapper(
196 std::string_view key_mapper_dir)
197 : file_prefix_(
198 absl_ports::StrCat(key_mapper_dir, "/", kDynamicTrieKeyMapperPrefix)),
199 trie_(file_prefix_,
200 IcingDynamicTrie::RuntimeOptions().set_storage_policy(
201 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
202 &icing_filesystem_) {}
203
204 template <typename T, typename Formatter>
Initialize(int maximum_size_bytes)205 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Initialize(
206 int maximum_size_bytes) {
207 IcingDynamicTrie::Options options;
208 // Divide the max space between the three internal arrays: nodes, nexts and
209 // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
210 // MaxSuffixesSize is in units of bytes.
211 options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
212 options.max_nexts = options.max_nodes;
213 options.max_suffixes_size =
214 sizeof(IcingDynamicTrie::Node) * options.max_nodes;
215 options.value_size = sizeof(T);
216
217 if (!trie_.CreateIfNotExist(options)) {
218 return absl_ports::InternalError(absl_ports::StrCat(
219 "Failed to create DynamicTrieKeyMapper file: ", file_prefix_));
220 }
221 if (!trie_.Init()) {
222 return absl_ports::InternalError(absl_ports::StrCat(
223 "Failed to init DynamicTrieKeyMapper file: ", file_prefix_));
224 }
225 return libtextclassifier3::Status::OK;
226 }
227
228 template <typename T, typename Formatter>
GetOrPut(std::string_view key,T next_value)229 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::GetOrPut(
230 std::string_view key, T next_value) {
231 uint32_t value_index;
232 libtextclassifier3::Status status =
233 trie_.Insert(key, &next_value, &value_index,
234 /*replace=*/false);
235 if (!status.ok()) {
236 ICING_LOG(DBG) << "Unable to insert key " << key
237 << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
238 << status.error_message();
239 return status;
240 }
241 // This memory address could be unaligned since we're just grabbing the value
242 // from somewhere in the trie's suffix array. The suffix array is filled with
243 // chars, so the address might not be aligned to T values.
244 const T* unaligned_value =
245 static_cast<const T*>(trie_.GetValueAtIndex(value_index));
246
247 // memcpy the value to ensure that the returned value here is in a T-aligned
248 // address
249 T aligned_value;
250 memcpy(&aligned_value, unaligned_value, sizeof(T));
251 return aligned_value;
252 }
253
254 template <typename T, typename Formatter>
Put(std::string_view key,T value)255 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::Put(
256 std::string_view key, T value) {
257 libtextclassifier3::Status status = trie_.Insert(key, &value);
258 if (!status.ok()) {
259 ICING_LOG(DBG) << "Unable to insert key " << key
260 << " into DynamicTrieKeyMapper " << file_prefix_ << ".\n"
261 << status.error_message();
262 return status;
263 }
264 return libtextclassifier3::Status::OK;
265 }
266
267 template <typename T, typename Formatter>
Get(std::string_view key)268 libtextclassifier3::StatusOr<T> DynamicTrieKeyMapper<T, Formatter>::Get(
269 std::string_view key) const {
270 T value;
271 if (!trie_.Find(key, &value)) {
272 return absl_ports::NotFoundError(
273 absl_ports::StrCat("Key not found ", Formatter()(key),
274 " in DynamicTrieKeyMapper ", file_prefix_, "."));
275 }
276 return value;
277 }
278
279 template <typename T, typename Formatter>
Delete(std::string_view key)280 bool DynamicTrieKeyMapper<T, Formatter>::Delete(std::string_view key) {
281 return trie_.Delete(key);
282 }
283
284 template <typename T, typename Formatter>
285 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator>
GetIterator()286 DynamicTrieKeyMapper<T, Formatter>::GetIterator() const {
287 return std::make_unique<DynamicTrieKeyMapper<T, Formatter>::Iterator>(trie_);
288 }
289
290 template <typename T, typename Formatter>
PersistToDisk()291 libtextclassifier3::Status DynamicTrieKeyMapper<T, Formatter>::PersistToDisk() {
292 if (!trie_.Sync()) {
293 return absl_ports::InternalError(absl_ports::StrCat(
294 "Failed to sync DynamicTrieKeyMapper file: ", file_prefix_));
295 }
296
297 return libtextclassifier3::Status::OK;
298 }
299
300 template <typename T, typename Formatter>
301 libtextclassifier3::StatusOr<int64_t>
GetDiskUsage()302 DynamicTrieKeyMapper<T, Formatter>::GetDiskUsage() const {
303 int64_t size = trie_.GetDiskUsage();
304 if (size == IcingFilesystem::kBadFileSize || size < 0) {
305 return absl_ports::InternalError("Failed to get disk usage of key mapper");
306 }
307 return size;
308 }
309
310 template <typename T, typename Formatter>
311 libtextclassifier3::StatusOr<int64_t>
GetElementsSize()312 DynamicTrieKeyMapper<T, Formatter>::GetElementsSize() const {
313 int64_t size = trie_.GetElementsSize();
314 if (size == IcingFilesystem::kBadFileSize || size < 0) {
315 return absl_ports::InternalError(
316 "Failed to get disk usage of elements in the key mapper");
317 }
318 return size;
319 }
320
321 template <typename T, typename Formatter>
322 libtextclassifier3::StatusOr<Crc32>
UpdateChecksum()323 DynamicTrieKeyMapper<T, Formatter>::UpdateChecksum() {
324 return trie_.UpdateCrc();
325 }
326
327 template <typename T, typename Formatter>
328 libtextclassifier3::StatusOr<Crc32>
GetChecksum()329 DynamicTrieKeyMapper<T, Formatter>::GetChecksum() const {
330 return trie_.GetCrc();
331 }
332
333 } // namespace lib
334 } // namespace icing
335
336 #endif // ICING_STORE_DYNAMIC_TRIE_KEY_MAPPER_H_
337