xref: /aosp_15_r20/external/icing/icing/join/qualified-id-join-index.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
16 #define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/file/persistent-storage.h"
29 #include "icing/join/document-id-to-join-info.h"
30 #include "icing/join/document-join-id-pair.h"
31 #include "icing/schema/joinable-property.h"
32 #include "icing/store/document-filter-data.h"
33 #include "icing/store/document-id.h"
34 #include "icing/store/namespace-id-fingerprint.h"
35 #include "icing/store/namespace-id.h"
36 #include "icing/util/crc32.h"
37 
38 namespace icing {
39 namespace lib {
40 
41 // QualifiedIdJoinIndex: an abstract class to maintain data for qualified id
42 // joining.
43 class QualifiedIdJoinIndex : public PersistentStorage {
44  public:
45   class JoinDataIteratorBase {
46    public:
47     virtual ~JoinDataIteratorBase() = default;
48 
49     virtual libtextclassifier3::Status Advance() = 0;
50 
51     virtual const DocumentIdToJoinInfo<NamespaceIdFingerprint>& GetCurrent()
52         const = 0;
53   };
54 
55   enum class Version { kV1, kV2, kV3 };
56 
57   static constexpr WorkingPathType kWorkingPathType =
58       WorkingPathType::kDirectory;
59 
60   // Deletes QualifiedIdJoinIndex under working_path.
61   //
62   // Returns:
63   //   - OK on success
64   //   - INTERNAL_ERROR on I/O error
Discard(const Filesystem & filesystem,const std::string & working_path)65   static libtextclassifier3::Status Discard(const Filesystem& filesystem,
66                                             const std::string& working_path) {
67     return PersistentStorage::Discard(filesystem, working_path,
68                                       kWorkingPathType);
69   }
70 
71   virtual ~QualifiedIdJoinIndex() override = default;
72 
73   // (v1 only) Puts a new data into index: DocumentJoinIdPair (DocumentId,
74   // JoinablePropertyId) references to ref_qualified_id_str (the identifier of
75   // another document).
76   //
77   // REQUIRES: ref_qualified_id_str contains no '\0'.
78   //
79   // Returns:
80   //   - OK on success
81   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
82   //   - Any KeyMapper errors
83   virtual libtextclassifier3::Status Put(
84       const DocumentJoinIdPair& document_join_id_pair,
85       std::string_view ref_qualified_id_str) = 0;
86 
87   // (v2 only) Puts a list of referenced NamespaceIdFingerprint into index,
88   // given the DocumentId, SchemaTypeId and JoinablePropertyId.
89   //
90   // Returns:
91   //   - OK on success
92   //   - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or
93   //     document_id is invalid
94   //   - Any KeyMapper/FlashIndexStorage errors
95   virtual libtextclassifier3::Status Put(
96       SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id,
97       DocumentId document_id,
98       std::vector<NamespaceIdFingerprint>&&
99           ref_namespace_id_uri_fingerprints) = 0;
100 
101   // (v3 only) Puts a new child document and its referenced parent documents
102   // into the join index.
103   //
104   // Returns:
105   //   - OK on success
106   //   - INVALID_ARGUMENT_ERROR if child_document_join_id_pair is invalid
107   //   - Any FileBackedVector errors
108   virtual libtextclassifier3::Status Put(
109       const DocumentJoinIdPair& child_document_join_id_pair,
110       std::vector<DocumentId>&& parent_document_ids) = 0;
111 
112   // (v1 only) Gets the referenced document's qualified id string by
113   // DocumentJoinIdPair.
114   //
115   // Returns:
116   //   - A qualified id string referenced by the given DocumentJoinIdPair
117   //     (DocumentId, JoinablePropertyId) on success
118   //   - INVALID_ARGUMENT_ERROR if doc_join_info is invalid
119   //   - NOT_FOUND_ERROR if doc_join_info doesn't exist
120   //   - Any KeyMapper errors
121   virtual libtextclassifier3::StatusOr<std::string_view> Get(
122       const DocumentJoinIdPair& document_join_id_pair) const = 0;
123 
124   // (v2 only) Returns a JoinDataIterator for iterating through all join data of
125   // the specified (schema_type_id, joinable_property_id).
126   //
127   // Returns:
128   //   - On success: a JoinDataIterator
129   //   - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is
130   //     invalid
131   //   - Any KeyMapper/FlashIndexStorage errors
132   virtual libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>>
133   GetIterator(SchemaTypeId schema_type_id,
134               JoinablePropertyId joinable_property_id) const = 0;
135 
136   // (v3 only) Gets the list of joinable children for the given parent document
137   // id.
138   //
139   // Returns:
140   //   - A list of children's DocumentJoinIdPair on success
141   //   - Any FileBackedVector errors
142   virtual libtextclassifier3::StatusOr<std::vector<DocumentJoinIdPair>> Get(
143       DocumentId parent_document_id) const = 0;
144 
145   // Migrates existing join data for a parent document from old_document_id to
146   // new_document_id if necessary.
147   //
148   // Returns:
149   //   - OK on success
150   //   - INVALID_ARGUMENT_ERROR if any document id is invalid
151   //   - Any errors, depending on the implementation
152   virtual libtextclassifier3::Status MigrateParent(
153       DocumentId old_document_id, DocumentId new_document_id) = 0;
154 
155   // Reduces internal file sizes by reclaiming space and ids of deleted
156   // documents. Qualified id type joinable index will convert all entries to the
157   // new document ids.
158   //
159   // - document_id_old_to_new: a map for converting old document id to new
160   //   document id.
161   // - namespace_id_old_to_new: a map for converting old namespace id to new
162   //   namespace id.
163   // - new_last_added_document_id: will be used to update the last added
164   //                               document id in the qualified id type joinable
165   //                               index.
166   //
167   // Returns:
168   //   - OK on success
169   //   - INTERNAL_ERROR on I/O error. This could potentially leave the index in
170   //     an invalid state and the caller should handle it properly (e.g. discard
171   //     and rebuild)
172   virtual libtextclassifier3::Status Optimize(
173       const std::vector<DocumentId>& document_id_old_to_new,
174       const std::vector<NamespaceId>& namespace_id_old_to_new,
175       DocumentId new_last_added_document_id) = 0;
176 
177   // Clears all data and set last_added_document_id to kInvalidDocumentId.
178   //
179   // Returns:
180   //   - OK on success
181   //   - INTERNAL_ERROR on I/O error
182   virtual libtextclassifier3::Status Clear() = 0;
183 
184   virtual Version version() const = 0;
185 
186   virtual int32_t size() const = 0;
187 
188   virtual bool empty() const = 0;
189 
190   virtual DocumentId last_added_document_id() const = 0;
191 
192   virtual void set_last_added_document_id(DocumentId document_id) = 0;
193 
194  protected:
QualifiedIdJoinIndex(const Filesystem & filesystem,std::string && working_path)195   explicit QualifiedIdJoinIndex(const Filesystem& filesystem,
196                                 std::string&& working_path)
197       : PersistentStorage(filesystem, std::move(working_path),
198                           kWorkingPathType) {}
199 
200   virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0;
201 
202   virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0;
203 
204   virtual libtextclassifier3::StatusOr<Crc32> UpdateStoragesChecksum()
205       override = 0;
206 
207   virtual libtextclassifier3::StatusOr<Crc32> GetInfoChecksum()
208       const override = 0;
209 
210   virtual libtextclassifier3::StatusOr<Crc32> GetStoragesChecksum()
211       const override = 0;
212 
213   virtual Crcs& crcs() override = 0;
214   virtual const Crcs& crcs() const override = 0;
215 };
216 
217 }  // namespace lib
218 }  // namespace icing
219 
220 #endif  // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_
221