xref: /aosp_15_r20/external/icing/icing/index/numeric/numeric-index.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
16 #define ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/file/persistent-storage.h"
26 #include "icing/index/iterator/doc-hit-info-iterator.h"
27 #include "icing/schema/schema-store.h"
28 #include "icing/schema/section.h"
29 #include "icing/store/document-id.h"
30 #include "icing/store/document-store.h"
31 #include "icing/util/crc32.h"
32 
33 namespace icing {
34 namespace lib {
35 
36 template <typename T>
37 class NumericIndex : public PersistentStorage {
38  public:
39   using value_type = T;
40 
41   // Editor class for batch adding new records into numeric index for a given
42   // property, DocumentId and SectionId. The caller should use BufferKey to
43   // buffer a key (calls several times for multiple keys) and finally call
44   // IndexAllBufferedKeys to batch add all buffered keys (with DocumentId +
45   // SectionId info, i.e. BasicHit) into numeric index.
46   //
47   // For example, there are values = [5, 1, 10, -100] in DocumentId = 5,
48   // SectionId = 1 (property "timestamp").
49   // Then the client should call BufferKey(5), BufferKey(1), BufferKey(10),
50   // BufferKey(-100) first, and finally call IndexAllBufferedKeys once to batch
51   // add these records into numeric index.
52   class Editor {
53    public:
Editor(std::string_view property_path,DocumentId document_id,SectionId section_id)54     explicit Editor(std::string_view property_path, DocumentId document_id,
55                     SectionId section_id)
56         : property_path_(property_path),
57           document_id_(document_id),
58           section_id_(section_id) {}
59 
60     virtual ~Editor() = default;
61 
62     // Buffers a new key.
63     //
64     // Returns:
65     //   - OK on success
66     //   - Any other errors, depending on the actual implementation
67     virtual libtextclassifier3::Status BufferKey(T key) = 0;
68 
69     // Adds all buffered keys into numeric index.
70     //
71     // Returns:
72     //   - OK on success
73     //   - Any other errors, depending on the actual implementation
74     virtual libtextclassifier3::Status IndexAllBufferedKeys() && = 0;
75 
76    protected:
77     std::string property_path_;
78     DocumentId document_id_;
79     SectionId section_id_;
80   };
81 
82   // Iterator class for numeric index range query [key_lower, key_upper]
83   // (inclusive for both side) on a given property (see GetIterator). There are
84   // some basic requirements for implementation:
85   // - Iterates through all relevant doc hits.
86   // - Merges multiple SectionIds of doc hits with same DocumentId into a single
87   //   SectionIdMask and constructs DocHitInfo.
88   // - Returns DocHitInfo in descending DocumentId order.
89   //
90   // For example, relevant doc hits (DocumentId, SectionId) are [(2, 0), (4, 3),
91   // (2, 1), (6, 2), (4, 2)]. Advance() and GetDocHitInfo() should return
92   // DocHitInfo(6, SectionIdMask(2)), DocHitInfo(4, SectionIdMask(2, 3)) and
93   // DocHitInfo(2, SectionIdMask(0, 1)).
94   class Iterator {
95    public:
Iterator(T key_lower,T key_upper)96     explicit Iterator(T key_lower, T key_upper)
97         : key_lower_(key_lower), key_upper_(key_upper) {}
98 
99     virtual ~Iterator() = default;
100 
101     virtual libtextclassifier3::Status Advance() = 0;
102 
103     virtual DocHitInfo GetDocHitInfo() const = 0;
104 
105     virtual int32_t GetNumAdvanceCalls() const = 0;
106 
107     virtual int32_t GetNumBlocksInspected() const = 0;
108 
109    protected:
110     T key_lower_;
111     T key_upper_;
112   };
113 
114   virtual ~NumericIndex() = default;
115 
116   // Returns an Editor instance for adding new records into numeric index for a
117   // given property, DocumentId and SectionId. See Editor for more details.
118   virtual std::unique_ptr<Editor> Edit(std::string_view property_path,
119                                        DocumentId document_id,
120                                        SectionId section_id) = 0;
121 
122   // Returns a DocHitInfoIteratorNumeric (in DocHitInfoIterator interface type
123   // format) for iterating through all docs which have the specified (numeric)
124   // property contents in range [key_lower, key_upper].
125   //
126   // In general, different numeric index implementations require different data
127   // iterator implementations, so class Iterator is an abstraction of the data
128   // iterator and DocHitInfoIteratorNumeric can work with any implementation of
129   // it. See Iterator and DocHitInfoIteratorNumeric for more details.
130   //
131   // Returns:
132   //   - std::unique_ptr<DocHitInfoIterator> on success
133   //   - NOT_FOUND_ERROR if there is no numeric index for property_path
134   //   - INVALID_ARGUMENT_ERROR if key_lower > key_upper
135   //   - Any other errors, depending on the actual implementation
136   virtual libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
137   GetIterator(std::string_view property_path, T key_lower, T key_upper,
138               const DocumentStore& document_store,
139               const SchemaStore& schema_store,
140               int64_t current_time_ms) const = 0;
141 
142   // Reduces internal file sizes by reclaiming space and ids of deleted
143   // documents. Numeric index will convert all data (hits) to the new document
144   // ids and regenerate all index files. If all data in a property path are
145   // completely deleted, then the underlying storage must be discarded as well.
146   //
147   // - document_id_old_to_new: a map for converting old document id to new
148   //   document id.
149   // - new_last_added_document_id: will be used to update the last added
150   //                               document id in the numeric index.
151   //
152   // Returns:
153   //   - OK on success
154   //   - Any other errors, depending on the actual implementation
155   virtual libtextclassifier3::Status Optimize(
156       const std::vector<DocumentId>& document_id_old_to_new,
157       DocumentId new_last_added_document_id) = 0;
158 
159   // Clears all data in the integer index and set last_added_document_id to
160   // kInvalidDocumentId.
161   //
162   // Returns:
163   //   - OK on success
164   //   - Any other errors, depending on the actual implementation
165   virtual libtextclassifier3::Status Clear() = 0;
166 
167   // Returns the largest document_id added to the index. Note that DocumentIds
168   // are always inserted in increasing order.
169   virtual DocumentId last_added_document_id() const = 0;
170 
171   // Sets last_added_document_id to document_id so long as document_id >
172   // last_added_document_id() or last_added_document_id() is invalid.
173   virtual void set_last_added_document_id(DocumentId document_id) = 0;
174 
175   // The number of individual indices that the NumericIndex has created to
176   // search over all indexed properties thus far.
177   virtual int num_property_indices() const = 0;
178 
179  protected:
NumericIndex(const Filesystem & filesystem,std::string && working_path,PersistentStorage::WorkingPathType working_path_type)180   explicit NumericIndex(const Filesystem& filesystem,
181                         std::string&& working_path,
182                         PersistentStorage::WorkingPathType working_path_type)
183       : PersistentStorage(filesystem, std::move(working_path),
184                           working_path_type) {}
185 
186   virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0;
187 
188   virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0;
189 
190   virtual libtextclassifier3::Status WriteMetadata() override = 0;
191 
192   virtual libtextclassifier3::StatusOr<Crc32> UpdateStoragesChecksum()
193       override = 0;
194 
195   virtual libtextclassifier3::StatusOr<Crc32> GetInfoChecksum()
196       const override = 0;
197 
198   virtual libtextclassifier3::StatusOr<Crc32> GetStoragesChecksum()
199       const override = 0;
200 
201   virtual Crcs& crcs() override = 0;
202   virtual const Crcs& crcs() const override = 0;
203 };
204 
205 }  // namespace lib
206 }  // namespace icing
207 
208 #endif  // ICING_INDEX_NUMERIC_NUMERIC_INDEX_H_
209