1 // Copyright (C) 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_ 16 #define ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <string_view> 22 #include <utility> 23 #include <vector> 24 25 #include "icing/text_classifier/lib3/utils/base/status.h" 26 #include "icing/text_classifier/lib3/utils/base/statusor.h" 27 #include "icing/file/filesystem.h" 28 #include "icing/file/persistent-storage.h" 29 #include "icing/join/document-id-to-join-info.h" 30 #include "icing/join/document-join-id-pair.h" 31 #include "icing/schema/joinable-property.h" 32 #include "icing/store/document-filter-data.h" 33 #include "icing/store/document-id.h" 34 #include "icing/store/namespace-id-fingerprint.h" 35 #include "icing/store/namespace-id.h" 36 #include "icing/util/crc32.h" 37 38 namespace icing { 39 namespace lib { 40 41 // QualifiedIdJoinIndex: an abstract class to maintain data for qualified id 42 // joining. 43 class QualifiedIdJoinIndex : public PersistentStorage { 44 public: 45 class JoinDataIteratorBase { 46 public: 47 virtual ~JoinDataIteratorBase() = default; 48 49 virtual libtextclassifier3::Status Advance() = 0; 50 51 virtual const DocumentIdToJoinInfo<NamespaceIdFingerprint>& GetCurrent() 52 const = 0; 53 }; 54 55 enum class Version { kV1, kV2, kV3 }; 56 57 static constexpr WorkingPathType kWorkingPathType = 58 WorkingPathType::kDirectory; 59 60 // Deletes QualifiedIdJoinIndex under working_path. 61 // 62 // Returns: 63 // - OK on success 64 // - INTERNAL_ERROR on I/O error Discard(const Filesystem & filesystem,const std::string & working_path)65 static libtextclassifier3::Status Discard(const Filesystem& filesystem, 66 const std::string& working_path) { 67 return PersistentStorage::Discard(filesystem, working_path, 68 kWorkingPathType); 69 } 70 71 virtual ~QualifiedIdJoinIndex() override = default; 72 73 // (v1 only) Puts a new data into index: DocumentJoinIdPair (DocumentId, 74 // JoinablePropertyId) references to ref_qualified_id_str (the identifier of 75 // another document). 76 // 77 // REQUIRES: ref_qualified_id_str contains no '\0'. 78 // 79 // Returns: 80 // - OK on success 81 // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid 82 // - Any KeyMapper errors 83 virtual libtextclassifier3::Status Put( 84 const DocumentJoinIdPair& document_join_id_pair, 85 std::string_view ref_qualified_id_str) = 0; 86 87 // (v2 only) Puts a list of referenced NamespaceIdFingerprint into index, 88 // given the DocumentId, SchemaTypeId and JoinablePropertyId. 89 // 90 // Returns: 91 // - OK on success 92 // - INVALID_ARGUMENT_ERROR if schema_type_id, joinable_property_id, or 93 // document_id is invalid 94 // - Any KeyMapper/FlashIndexStorage errors 95 virtual libtextclassifier3::Status Put( 96 SchemaTypeId schema_type_id, JoinablePropertyId joinable_property_id, 97 DocumentId document_id, 98 std::vector<NamespaceIdFingerprint>&& 99 ref_namespace_id_uri_fingerprints) = 0; 100 101 // (v3 only) Puts a new child document and its referenced parent documents 102 // into the join index. 103 // 104 // Returns: 105 // - OK on success 106 // - INVALID_ARGUMENT_ERROR if child_document_join_id_pair is invalid 107 // - Any FileBackedVector errors 108 virtual libtextclassifier3::Status Put( 109 const DocumentJoinIdPair& child_document_join_id_pair, 110 std::vector<DocumentId>&& parent_document_ids) = 0; 111 112 // (v1 only) Gets the referenced document's qualified id string by 113 // DocumentJoinIdPair. 114 // 115 // Returns: 116 // - A qualified id string referenced by the given DocumentJoinIdPair 117 // (DocumentId, JoinablePropertyId) on success 118 // - INVALID_ARGUMENT_ERROR if doc_join_info is invalid 119 // - NOT_FOUND_ERROR if doc_join_info doesn't exist 120 // - Any KeyMapper errors 121 virtual libtextclassifier3::StatusOr<std::string_view> Get( 122 const DocumentJoinIdPair& document_join_id_pair) const = 0; 123 124 // (v2 only) Returns a JoinDataIterator for iterating through all join data of 125 // the specified (schema_type_id, joinable_property_id). 126 // 127 // Returns: 128 // - On success: a JoinDataIterator 129 // - INVALID_ARGUMENT_ERROR if schema_type_id or joinable_property_id is 130 // invalid 131 // - Any KeyMapper/FlashIndexStorage errors 132 virtual libtextclassifier3::StatusOr<std::unique_ptr<JoinDataIteratorBase>> 133 GetIterator(SchemaTypeId schema_type_id, 134 JoinablePropertyId joinable_property_id) const = 0; 135 136 // (v3 only) Gets the list of joinable children for the given parent document 137 // id. 138 // 139 // Returns: 140 // - A list of children's DocumentJoinIdPair on success 141 // - Any FileBackedVector errors 142 virtual libtextclassifier3::StatusOr<std::vector<DocumentJoinIdPair>> Get( 143 DocumentId parent_document_id) const = 0; 144 145 // Migrates existing join data for a parent document from old_document_id to 146 // new_document_id if necessary. 147 // 148 // Returns: 149 // - OK on success 150 // - INVALID_ARGUMENT_ERROR if any document id is invalid 151 // - Any errors, depending on the implementation 152 virtual libtextclassifier3::Status MigrateParent( 153 DocumentId old_document_id, DocumentId new_document_id) = 0; 154 155 // Reduces internal file sizes by reclaiming space and ids of deleted 156 // documents. Qualified id type joinable index will convert all entries to the 157 // new document ids. 158 // 159 // - document_id_old_to_new: a map for converting old document id to new 160 // document id. 161 // - namespace_id_old_to_new: a map for converting old namespace id to new 162 // namespace id. 163 // - new_last_added_document_id: will be used to update the last added 164 // document id in the qualified id type joinable 165 // index. 166 // 167 // Returns: 168 // - OK on success 169 // - INTERNAL_ERROR on I/O error. This could potentially leave the index in 170 // an invalid state and the caller should handle it properly (e.g. discard 171 // and rebuild) 172 virtual libtextclassifier3::Status Optimize( 173 const std::vector<DocumentId>& document_id_old_to_new, 174 const std::vector<NamespaceId>& namespace_id_old_to_new, 175 DocumentId new_last_added_document_id) = 0; 176 177 // Clears all data and set last_added_document_id to kInvalidDocumentId. 178 // 179 // Returns: 180 // - OK on success 181 // - INTERNAL_ERROR on I/O error 182 virtual libtextclassifier3::Status Clear() = 0; 183 184 virtual Version version() const = 0; 185 186 virtual int32_t size() const = 0; 187 188 virtual bool empty() const = 0; 189 190 virtual DocumentId last_added_document_id() const = 0; 191 192 virtual void set_last_added_document_id(DocumentId document_id) = 0; 193 194 protected: QualifiedIdJoinIndex(const Filesystem & filesystem,std::string && working_path)195 explicit QualifiedIdJoinIndex(const Filesystem& filesystem, 196 std::string&& working_path) 197 : PersistentStorage(filesystem, std::move(working_path), 198 kWorkingPathType) {} 199 200 virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0; 201 202 virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0; 203 204 virtual libtextclassifier3::StatusOr<Crc32> UpdateStoragesChecksum() 205 override = 0; 206 207 virtual libtextclassifier3::StatusOr<Crc32> GetInfoChecksum() 208 const override = 0; 209 210 virtual libtextclassifier3::StatusOr<Crc32> GetStoragesChecksum() 211 const override = 0; 212 213 virtual Crcs& crcs() override = 0; 214 virtual const Crcs& crcs() const override = 0; 215 }; 216 217 } // namespace lib 218 } // namespace icing 219 220 #endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEX_H_ 221