xref: /aosp_15_r20/external/icing/icing/store/persistent-hash-map-key-mapper.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
16 #define ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <type_traits>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/str_join.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/file/persistent-hash-map.h"
29 #include "icing/store/key-mapper.h"
30 #include "icing/util/crc32.h"
31 #include "icing/util/status-macros.h"
32 
33 namespace icing {
34 namespace lib {
35 
36 // File-backed mapping between the string key and a trivially copyable value
37 // type.
38 template <typename T, typename Formatter = absl_ports::DefaultFormatter>
39 class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> {
40  public:
41   static constexpr int32_t kDefaultMaxNumEntries =
42       PersistentHashMap::Entry::kMaxNumEntries;
43   static constexpr int32_t kDefaultAverageKVByteSize =
44       PersistentHashMap::Options::kDefaultAverageKVByteSize;
45   static constexpr int32_t kDefaultMaxLoadFactorPercent =
46       PersistentHashMap::Options::kDefaultMaxLoadFactorPercent;
47 
48   // Returns an initialized instance of PersistentHashMapKeyMapper that can
49   // immediately handle read/write operations.
50   // Returns any encountered IO errors.
51   //
52   // filesystem: Object to make system level calls
53   // working_path: Working directory used to save all the files required to
54   //               persist PersistentHashMapKeyMapper. If this working_path was
55   //               previously used to create a PersistentHashMapKeyMapper, then
56   //               this existing data would be loaded. Otherwise, an empty
57   //               PersistentHashMapKeyMapper would be created. See
58   //               PersistentStorage for more details about the concept of
59   //               working_path.
60   // pre_mapping_fbv: flag indicating whether memory map max possible file size
61   //                  for underlying FileBackedVector before growing the actual
62   //                  file size.
63   // max_num_entries: max # of kvps. It will be used to compute 3 storages size.
64   // average_kv_byte_size: average byte size of a single key + serialized value.
65   //                       It will be used to compute kv_storage size.
66   // max_load_factor_percent: percentage of the max loading for the hash map.
67   //                          load_factor_percent = 100 * num_keys / num_buckets
68   //                          If load_factor_percent exceeds
69   //                          max_load_factor_percent, then rehash will be
70   //                          invoked (and # of buckets will be doubled).
71   //                          Note that load_factor_percent exceeding 100 is
72   //                          considered valid.
73   static libtextclassifier3::StatusOr<
74       std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
75   Create(const Filesystem& filesystem, std::string working_path,
76          bool pre_mapping_fbv, int32_t max_num_entries = kDefaultMaxNumEntries,
77          int32_t average_kv_byte_size = kDefaultAverageKVByteSize,
78          int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent);
79 
80   // Deletes working_path (and all the files under it recursively) associated
81   // with the PersistentHashMapKeyMapper.
82   //
83   // working_path: Working directory used to save all the files required to
84   //               persist PersistentHashMapKeyMapper. Should be the same as
85   //               passed into Create().
86   //
87   // Returns:
88   //   OK on success
89   //   INTERNAL_ERROR on I/O error
90   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
91                                            const std::string& working_path);
92 
93   ~PersistentHashMapKeyMapper() override = default;
94 
Put(std::string_view key,T value)95   libtextclassifier3::Status Put(std::string_view key, T value) override {
96     return persistent_hash_map_->Put(key, &value);
97   }
98 
GetOrPut(std::string_view key,T next_value)99   libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key,
100                                            T next_value) override {
101     ICING_RETURN_IF_ERROR(persistent_hash_map_->GetOrPut(key, &next_value));
102     return next_value;
103   }
104 
Get(std::string_view key)105   libtextclassifier3::StatusOr<T> Get(std::string_view key) const override {
106     T value;
107     ICING_RETURN_IF_ERROR(persistent_hash_map_->Get(key, &value));
108     return value;
109   }
110 
Delete(std::string_view key)111   bool Delete(std::string_view key) override {
112     return persistent_hash_map_->Delete(key).ok();
113   }
114 
GetIterator()115   std::unique_ptr<typename KeyMapper<T, Formatter>::Iterator> GetIterator()
116       const override {
117     return std::make_unique<PersistentHashMapKeyMapper<T, Formatter>::Iterator>(
118         persistent_hash_map_.get());
119   }
120 
num_keys()121   int32_t num_keys() const override { return persistent_hash_map_->size(); }
122 
PersistToDisk()123   libtextclassifier3::Status PersistToDisk() override {
124     return persistent_hash_map_->PersistToDisk();
125   }
126 
GetDiskUsage()127   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const override {
128     return persistent_hash_map_->GetDiskUsage();
129   }
130 
GetElementsSize()131   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const override {
132     return persistent_hash_map_->GetElementsSize();
133   }
134 
UpdateChecksum()135   libtextclassifier3::StatusOr<Crc32> UpdateChecksum() override {
136     return persistent_hash_map_->UpdateChecksums();
137   }
138 
GetChecksum()139   libtextclassifier3::StatusOr<Crc32> GetChecksum() const override {
140     return persistent_hash_map_->GetChecksum();
141   }
142 
143  private:
144   class Iterator : public KeyMapper<T, Formatter>::Iterator {
145    public:
Iterator(const PersistentHashMap * persistent_hash_map)146     explicit Iterator(const PersistentHashMap* persistent_hash_map)
147         : itr_(persistent_hash_map->GetIterator()) {}
148 
149     ~Iterator() override = default;
150 
Advance()151     bool Advance() override { return itr_.Advance(); }
152 
GetKey()153     std::string_view GetKey() const override { return itr_.GetKey(); }
154 
GetValue()155     T GetValue() const override {
156       T value;
157       memcpy(&value, itr_.GetValue(), sizeof(T));
158       return value;
159     }
160 
161    private:
162     PersistentHashMap::Iterator itr_;
163   };
164 
165   // Use PersistentHashMapKeyMapper::Create() to instantiate.
PersistentHashMapKeyMapper(std::unique_ptr<PersistentHashMap> persistent_hash_map)166   explicit PersistentHashMapKeyMapper(
167       std::unique_ptr<PersistentHashMap> persistent_hash_map)
168       : persistent_hash_map_(std::move(persistent_hash_map)) {}
169 
170   std::unique_ptr<PersistentHashMap> persistent_hash_map_;
171 
172   static_assert(std::is_trivially_copyable<T>::value,
173                 "T must be trivially copyable");
174 };
175 
176 template <typename T, typename Formatter>
177 /* static */ libtextclassifier3::StatusOr<
178     std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>>
Create(const Filesystem & filesystem,std::string working_path,bool pre_mapping_fbv,int32_t max_num_entries,int32_t average_kv_byte_size,int32_t max_load_factor_percent)179 PersistentHashMapKeyMapper<T, Formatter>::Create(
180     const Filesystem& filesystem, std::string working_path,
181     bool pre_mapping_fbv, int32_t max_num_entries, int32_t average_kv_byte_size,
182     int32_t max_load_factor_percent) {
183   ICING_ASSIGN_OR_RETURN(
184       std::unique_ptr<PersistentHashMap> persistent_hash_map,
185       PersistentHashMap::Create(
186           filesystem, std::move(working_path),
187           PersistentHashMap::Options(
188               /*value_type_size_in=*/sizeof(T),
189               /*max_num_entries_in=*/max_num_entries,
190               /*max_load_factor_percent_in=*/max_load_factor_percent,
191               /*average_kv_byte_size_in=*/average_kv_byte_size,
192               /*init_num_buckets_in=*/
193               PersistentHashMap::Options::kDefaultInitNumBuckets,
194               /*pre_mapping_fbv_in=*/pre_mapping_fbv)));
195   return std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>(
196       new PersistentHashMapKeyMapper<T, Formatter>(
197           std::move(persistent_hash_map)));
198 }
199 
200 template <typename T, typename Formatter>
201 /* static */ libtextclassifier3::Status
Delete(const Filesystem & filesystem,const std::string & working_path)202 PersistentHashMapKeyMapper<T, Formatter>::Delete(
203     const Filesystem& filesystem, const std::string& working_path) {
204   return PersistentHashMap::Discard(filesystem, working_path);
205 }
206 
207 }  // namespace lib
208 }  // namespace icing
209 
210 #endif  // ICING_STORE_PERSISTENT_HASH_MAP_KEY_MAPPER_H_
211