1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 16 #define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <string_view> 22 23 #include "icing/text_classifier/lib3/utils/base/status.h" 24 #include "icing/text_classifier/lib3/utils/base/statusor.h" 25 #include "icing/file/persistent-storage.h" 26 #include "icing/index/iterator/doc-hit-info-iterator.h" 27 #include "icing/schema/schema-store.h" 28 #include "icing/schema/section.h" 29 #include "icing/store/document-id.h" 30 #include "icing/store/document-store.h" 31 #include "icing/util/crc32.h" 32 33 namespace icing { 34 namespace lib { 35 36 template <typename T> 37 class NumericIndex : public PersistentStorage { 38 public: 39 using value_type = T; 40 41 // Editor class for batch adding new records into numeric index for a given 42 // property, DocumentId and SectionId. The caller should use BufferKey to 43 // buffer a key (calls several times for multiple keys) and finally call 44 // IndexAllBufferedKeys to batch add all buffered keys (with DocumentId + 45 // SectionId info, i.e. BasicHit) into numeric index. 46 // 47 // For example, there are values = [5, 1, 10, -100] in DocumentId = 5, 48 // SectionId = 1 (property "timestamp"). 49 // Then the client should call BufferKey(5), BufferKey(1), BufferKey(10), 50 // BufferKey(-100) first, and finally call IndexAllBufferedKeys once to batch 51 // add these records into numeric index. 52 class Editor { 53 public: Editor(std::string_view property_path,DocumentId document_id,SectionId section_id)54 explicit Editor(std::string_view property_path, DocumentId document_id, 55 SectionId section_id) 56 : property_path_(property_path), 57 document_id_(document_id), 58 section_id_(section_id) {} 59 60 virtual ~Editor() = default; 61 62 // Buffers a new key. 63 // 64 // Returns: 65 // - OK on success 66 // - Any other errors, depending on the actual implementation 67 virtual libtextclassifier3::Status BufferKey(T key) = 0; 68 69 // Adds all buffered keys into numeric index. 70 // 71 // Returns: 72 // - OK on success 73 // - Any other errors, depending on the actual implementation 74 virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0; 75 76 protected: 77 std::string property_path_; 78 DocumentId document_id_; 79 SectionId section_id_; 80 }; 81 82 // Iterator class for numeric index range query [key_lower, key_upper] 83 // (inclusive for both side) on a given property (see GetIterator). There are 84 // some basic requirements for implementation: 85 // - Iterates through all relevant doc hits. 86 // - Merges multiple SectionIds of doc hits with same DocumentId into a single 87 // SectionIdMask and constructs DocHitInfo. 88 // - Returns DocHitInfo in descending DocumentId order. 89 // 90 // For example, relevant doc hits (DocumentId, SectionId) are [(2, 0), (4, 3), 91 // (2, 1), (6, 2), (4, 2)]. Advance() and GetDocHitInfo() should return 92 // DocHitInfo(6, SectionIdMask(2)), DocHitInfo(4, SectionIdMask(2, 3)) and 93 // DocHitInfo(2, SectionIdMask(0, 1)). 94 class Iterator { 95 public: Iterator(T key_lower,T key_upper)96 explicit Iterator(T key_lower, T key_upper) 97 : key_lower_(key_lower), key_upper_(key_upper) {} 98 99 virtual ~Iterator() = default; 100 101 virtual libtextclassifier3::Status Advance() = 0; 102 103 virtual DocHitInfo GetDocHitInfo() const = 0; 104 105 virtual int32_t GetNumAdvanceCalls() const = 0; 106 107 virtual int32_t GetNumBlocksInspected() const = 0; 108 109 protected: 110 T key_lower_; 111 T key_upper_; 112 }; 113 114 virtual ~NumericIndex() = default; 115 116 // Returns an Editor instance for adding new records into numeric index for a 117 // given property, DocumentId and SectionId. See Editor for more details. 118 virtual std::unique_ptr<Editor> Edit(std::string_view property_path, 119 DocumentId document_id, 120 SectionId section_id) = 0; 121 122 // Returns a DocHitInfoIteratorNumeric (in DocHitInfoIterator interface type 123 // format) for iterating through all docs which have the specified (numeric) 124 // property contents in range [key_lower, key_upper]. 125 // 126 // In general, different numeric index implementations require different data 127 // iterator implementations, so class Iterator is an abstraction of the data 128 // iterator and DocHitInfoIteratorNumeric can work with any implementation of 129 // it. See Iterator and DocHitInfoIteratorNumeric for more details. 130 // 131 // Returns: 132 // - std::unique_ptr<DocHitInfoIterator> on success 133 // - NOT_FOUND_ERROR if there is no numeric index for property_path 134 // - INVALID_ARGUMENT_ERROR if key_lower > key_upper 135 // - Any other errors, depending on the actual implementation 136 virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> 137 GetIterator(std::string_view property_path, T key_lower, T key_upper, 138 const DocumentStore& document_store, 139 const SchemaStore& schema_store, 140 int64_t current_time_ms) const = 0; 141 142 // Reduces internal file sizes by reclaiming space and ids of deleted 143 // documents. Numeric index will convert all data (hits) to the new document 144 // ids and regenerate all index files. If all data in a property path are 145 // completely deleted, then the underlying storage must be discarded as well. 146 // 147 // - document_id_old_to_new: a map for converting old document id to new 148 // document id. 149 // - new_last_added_document_id: will be used to update the last added 150 // document id in the numeric index. 151 // 152 // Returns: 153 // - OK on success 154 // - Any other errors, depending on the actual implementation 155 virtual libtextclassifier3::Status Optimize( 156 const std::vector<DocumentId>& document_id_old_to_new, 157 DocumentId new_last_added_document_id) = 0; 158 159 // Clears all data in the integer index and set last_added_document_id to 160 // kInvalidDocumentId. 161 // 162 // Returns: 163 // - OK on success 164 // - Any other errors, depending on the actual implementation 165 virtual libtextclassifier3::Status Clear() = 0; 166 167 // Returns the largest document_id added to the index. Note that DocumentIds 168 // are always inserted in increasing order. 169 virtual DocumentId last_added_document_id() const = 0; 170 171 // Sets last_added_document_id to document_id so long as document_id > 172 // last_added_document_id() or last_added_document_id() is invalid. 173 virtual void set_last_added_document_id(DocumentId document_id) = 0; 174 175 // The number of individual indices that the NumericIndex has created to 176 // search over all indexed properties thus far. 177 virtual int num_property_indices() const = 0; 178 179 protected: NumericIndex(const Filesystem & filesystem,std::string && working_path,PersistentStorage::WorkingPathType working_path_type)180 explicit NumericIndex(const Filesystem& filesystem, 181 std::string&& working_path, 182 PersistentStorage::WorkingPathType working_path_type) 183 : PersistentStorage(filesystem, std::move(working_path), 184 working_path_type) {} 185 186 virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0; 187 188 virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0; 189 190 virtual libtextclassifier3::Status WriteMetadata() override = 0; 191 192 virtual libtextclassifier3::StatusOr<Crc32> UpdateStoragesChecksum() 193 override = 0; 194 195 virtual libtextclassifier3::StatusOr<Crc32> GetInfoChecksum() 196 const override = 0; 197 198 virtual libtextclassifier3::StatusOr<Crc32> GetStoragesChecksum() 199 const override = 0; 200 201 virtual Crcs& crcs() override = 0; 202 virtual const Crcs& crcs() const override = 0; 203 }; 204 205 } // namespace lib 206 } // namespace icing 207 208 #endif // ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_ 209