1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
16 #define ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
17
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <type_traits>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/str_join.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/file/persistent-hash-map.h"
29 #include "icing/store/key-mapper.h"
30 #include "icing/util/crc32.h"
31 #include "icing/util/status-macros.h"
32
33 namespace icing {
34 namespace lib {
35
36 // File-backed mapping between the string key and a trivially copyable value
37 // type.
38 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
39 class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
40 public:
41 static constexpr int32_t kDefaultMaxNumEntries =
42 PersistentHashMap::Entry::kMaxNumEntries;
43 static constexpr int32_t kDefaultAverageKVByteSize =
44 PersistentHashMap::Options::kDefaultAverageKVByteSize;
45 static constexpr int32_t kDefaultMaxLoadFactorPercent =
46 PersistentHashMap::Options::kDefaultMaxLoadFactorPercent;
47
48 // Returns an initialized instance of PersistentHashMapKeyMapper that can
49 // immediately handle read/write operations.
50 // Returns any encountered IO errors.
51 //
52 // filesystem: Object to make system level calls
53 // working_path: Working directory used to save all the files required to
54 // persist PersistentHashMapKeyMapper. If this working_path was
55 // previously used to create a PersistentHashMapKeyMapper, then
56 // this existing data would be loaded. Otherwise, an empty
57 // PersistentHashMapKeyMapper would be created. See
58 // PersistentStorage for more details about the concept of
59 // working_path.
60 // pre_mapping_fbv: flag indicating whether memory map max possible file size
61 // for underlying FileBackedVector before growing the actual
62 // file size.
63 // max_num_entries: max # of kvps. It will be used to compute 3 storages size.
64 // average_kv_byte_size: average byte size of a single key + serialized value.
65 // It will be used to compute kv_storage size.
66 // max_load_factor_percent: percentage of the max loading for the hash map.
67 // load_factor_percent = 100 * num_keys / num_buckets
68 // If load_factor_percent exceeds
69 // max_load_factor_percent, then rehash will be
70 // invoked (and # of buckets will be doubled).
71 // Note that load_factor_percent exceeding 100 is
72 // considered valid.
73 static libtextclassifier3::StatusOr<
74 std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
75 Create(const Filesystem& filesystem, std::string working_path,
76 bool pre_mapping_fbv, int32_t max_num_entries = kDefaultMaxNumEntries,
77 int32_t average_kv_byte_size = kDefaultAverageKVByteSize,
78 int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent);
79
80 // Deletes working_path (and all the files under it recursively) associated
81 // with the PersistentHashMapKeyMapper.
82 //
83 // working_path: Working directory used to save all the files required to
84 // persist PersistentHashMapKeyMapper. Should be the same as
85 // passed into Create().
86 //
87 // Returns:
88 // OK on success
89 // INTERNAL_ERROR on I/O error
90 static libtextclassifier3::Status Delete(const Filesystem& filesystem,
91 const std::string& working_path);
92
93 ~PersistentHashMapKeyMapper() override = default;
94
Put(std::string_view key,T value)95 libtextclassifier3::Status Put(std::string_view key, T value) override {
96 return persistent_hash_map_->Put(key, &value);
97 }
98
GetOrPut(std::string_view key,T next_value)99 libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
100 T next_value) override {
101 ICING_RETURN_IF_ERROR(persistent_hash_map_->GetOrPut(key, &next_value));
102 return next_value;
103 }
104
Get(std::string_view key)105 libtextclassifier3::StatusOr<T> Get(std::string_view key) const override {
106 T value;
107 ICING_RETURN_IF_ERROR(persistent_hash_map_->Get(key, &value));
108 return value;
109 }
110
Delete(std::string_view key)111 bool Delete(std::string_view key) override {
112 return persistent_hash_map_->Delete(key).ok();
113 }
114
GetIterator()115 std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
116 const override {
117 return std::make_unique<PersistentHashMapKeyMapper<T, Formatter>::Iterator>(
118 persistent_hash_map_.get());
119 }
120
num_keys()121 int32_t num_keys() const override { return persistent_hash_map_->size(); }
122
PersistToDisk()123 libtextclassifier3::Status PersistToDisk() override {
124 return persistent_hash_map_->PersistToDisk();
125 }
126
GetDiskUsage()127 libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override {
128 return persistent_hash_map_->GetDiskUsage();
129 }
130
GetElementsSize()131 libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override {
132 return persistent_hash_map_->GetElementsSize();
133 }
134
UpdateChecksum()135 libtextclassifier3::StatusOr<Crc32> UpdateChecksum() override {
136 return persistent_hash_map_->UpdateChecksums();
137 }
138
GetChecksum()139 libtextclassifier3::StatusOr<Crc32> GetChecksum() const override {
140 return persistent_hash_map_->GetChecksum();
141 }
142
143 private:
144 class Iterator : public KeyMapper<T, Formatter>::Iterator {
145 public:
Iterator(const PersistentHashMap * persistent_hash_map)146 explicit Iterator(const PersistentHashMap* persistent_hash_map)
147 : itr_(persistent_hash_map->GetIterator()) {}
148
149 ~Iterator() override = default;
150
Advance()151 bool Advance() override { return itr_.Advance(); }
152
GetKey()153 std::string_view GetKey() const override { return itr_.GetKey(); }
154
GetValue()155 T GetValue() const override {
156 T value;
157 memcpy(&value, itr_.GetValue(), sizeof(T));
158 return value;
159 }
160
161 private:
162 PersistentHashMap::Iterator itr_;
163 };
164
165 // Use PersistentHashMapKeyMapper::Create() to instantiate.
PersistentHashMapKeyMapper(std::unique_ptr<PersistentHashMap> persistent_hash_map)166 explicit PersistentHashMapKeyMapper(
167 std::unique_ptr<PersistentHashMap> persistent_hash_map)
168 : persistent_hash_map_(std::move(persistent_hash_map)) {}
169
170 std::unique_ptr<PersistentHashMap> persistent_hash_map_;
171
172 static_assert(std::is_trivially_copyable<T>::value,
173 "T must be trivially copyable");
174 };
175
176 template <typename T, typename Formatter>
177 /* static */ libtextclassifier3::StatusOr<
178 std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string working_path,bool pre_mapping_fbv,int32_t max_num_entries,int32_t average_kv_byte_size,int32_t max_load_factor_percent)179 PersistentHashMapKeyMapper<T, Formatter>::Create(
180 const Filesystem& filesystem, std::string working_path,
181 bool pre_mapping_fbv, int32_t max_num_entries, int32_t average_kv_byte_size,
182 int32_t max_load_factor_percent) {
183 ICING_ASSIGN_OR_RETURN(
184 std::unique_ptr<PersistentHashMap> persistent_hash_map,
185 PersistentHashMap::Create(
186 filesystem, std::move(working_path),
187 PersistentHashMap::Options(
188 /*value_type_size_in=*/sizeof(T),
189 /*max_num_entries_in=*/max_num_entries,
190 /*max_load_factor_percent_in=*/max_load_factor_percent,
191 /*average_kv_byte_size_in=*/average_kv_byte_size,
192 /*init_num_buckets_in=*/
193 PersistentHashMap::Options::kDefaultInitNumBuckets,
194 /*pre_mapping_fbv_in=*/pre_mapping_fbv)));
195 return std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>(
196 new PersistentHashMapKeyMapper<T, Formatter>(
197 std::move(persistent_hash_map)));
198 }
199
200 template <typename T, typename Formatter>
201 /* static */ libtextclassifier3::Status
Delete(const Filesystem & filesystem,const std::string & working_path)202 PersistentHashMapKeyMapper<T, Formatter>::Delete(
203 const Filesystem& filesystem, const std::string& working_path) {
204 return PersistentHashMap::Discard(filesystem, working_path);
205 }
206
207 } // namespace lib
208 } // namespace icing
209
210 #endif // ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
211