1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2023 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/join/qualified-id-join-indexing-handler.h"
16*8b6cd535SAndroid Build Coastguard Worker
17*8b6cd535SAndroid Build Coastguard Worker #include <cstdint>
18*8b6cd535SAndroid Build Coastguard Worker #include <limits>
19*8b6cd535SAndroid Build Coastguard Worker #include <memory>
20*8b6cd535SAndroid Build Coastguard Worker #include <optional>
21*8b6cd535SAndroid Build Coastguard Worker #include <string_view>
22*8b6cd535SAndroid Build Coastguard Worker #include <utility>
23*8b6cd535SAndroid Build Coastguard Worker #include <vector>
24*8b6cd535SAndroid Build Coastguard Worker
25*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/status.h"
26*8b6cd535SAndroid Build Coastguard Worker #include "icing/text_classifier/lib3/utils/base/statusor.h"
27*8b6cd535SAndroid Build Coastguard Worker #include "icing/absl_ports/canonical_errors.h"
28*8b6cd535SAndroid Build Coastguard Worker #include "icing/join/document-join-id-pair.h"
29*8b6cd535SAndroid Build Coastguard Worker #include "icing/join/qualified-id-join-index.h"
30*8b6cd535SAndroid Build Coastguard Worker #include "icing/join/qualified-id.h"
31*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/core/icing-string-util.h"
32*8b6cd535SAndroid Build Coastguard Worker #include "icing/proto/logging.pb.h"
33*8b6cd535SAndroid Build Coastguard Worker #include "icing/schema/joinable-property.h"
34*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-filter-data.h"
35*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-id.h"
36*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/document-store.h"
37*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/namespace-id-fingerprint.h"
38*8b6cd535SAndroid Build Coastguard Worker #include "icing/store/namespace-id.h"
39*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/clock.h"
40*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/logging.h"
41*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/status-macros.h"
42*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/tokenized-document.h"
43*8b6cd535SAndroid Build Coastguard Worker
44*8b6cd535SAndroid Build Coastguard Worker namespace icing {
45*8b6cd535SAndroid Build Coastguard Worker namespace lib {
46*8b6cd535SAndroid Build Coastguard Worker
47*8b6cd535SAndroid Build Coastguard Worker /* static */ libtextclassifier3::StatusOr<
48*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<QualifiedIdJoinIndexingHandler>>
Create(const Clock * clock,const DocumentStore * doc_store,QualifiedIdJoinIndex * qualified_id_join_index)49*8b6cd535SAndroid Build Coastguard Worker QualifiedIdJoinIndexingHandler::Create(
50*8b6cd535SAndroid Build Coastguard Worker const Clock* clock, const DocumentStore* doc_store,
51*8b6cd535SAndroid Build Coastguard Worker QualifiedIdJoinIndex* qualified_id_join_index) {
52*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_ERROR_IF_NULL(clock);
53*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_ERROR_IF_NULL(doc_store);
54*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
55*8b6cd535SAndroid Build Coastguard Worker
56*8b6cd535SAndroid Build Coastguard Worker return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
57*8b6cd535SAndroid Build Coastguard Worker new QualifiedIdJoinIndexingHandler(clock, doc_store,
58*8b6cd535SAndroid Build Coastguard Worker qualified_id_join_index));
59*8b6cd535SAndroid Build Coastguard Worker }
60*8b6cd535SAndroid Build Coastguard Worker
Handle(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id,bool recovery_mode,PutDocumentStatsProto * put_document_stats)61*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
62*8b6cd535SAndroid Build Coastguard Worker const TokenizedDocument& tokenized_document, DocumentId document_id,
63*8b6cd535SAndroid Build Coastguard Worker DocumentId old_document_id, bool recovery_mode,
64*8b6cd535SAndroid Build Coastguard Worker PutDocumentStatsProto* put_document_stats) {
65*8b6cd535SAndroid Build Coastguard Worker std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
66*8b6cd535SAndroid Build Coastguard Worker
67*8b6cd535SAndroid Build Coastguard Worker if (!IsDocumentIdValid(document_id)) {
68*8b6cd535SAndroid Build Coastguard Worker return absl_ports::InvalidArgumentError(
69*8b6cd535SAndroid Build Coastguard Worker IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
70*8b6cd535SAndroid Build Coastguard Worker }
71*8b6cd535SAndroid Build Coastguard Worker
72*8b6cd535SAndroid Build Coastguard Worker if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId &&
73*8b6cd535SAndroid Build Coastguard Worker document_id <= qualified_id_join_index_.last_added_document_id()) {
74*8b6cd535SAndroid Build Coastguard Worker if (recovery_mode) {
75*8b6cd535SAndroid Build Coastguard Worker // Skip the document if document_id <= last_added_document_id in recovery
76*8b6cd535SAndroid Build Coastguard Worker // mode without returning an error.
77*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
78*8b6cd535SAndroid Build Coastguard Worker }
79*8b6cd535SAndroid Build Coastguard Worker return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
80*8b6cd535SAndroid Build Coastguard Worker "DocumentId %d must be greater than last added document_id %d",
81*8b6cd535SAndroid Build Coastguard Worker document_id, qualified_id_join_index_.last_added_document_id()));
82*8b6cd535SAndroid Build Coastguard Worker }
83*8b6cd535SAndroid Build Coastguard Worker qualified_id_join_index_.set_last_added_document_id(document_id);
84*8b6cd535SAndroid Build Coastguard Worker
85*8b6cd535SAndroid Build Coastguard Worker switch (qualified_id_join_index_.version()) {
86*8b6cd535SAndroid Build Coastguard Worker case QualifiedIdJoinIndex::Version::kV1:
87*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_IF_ERROR(HandleV1(tokenized_document, document_id));
88*8b6cd535SAndroid Build Coastguard Worker break;
89*8b6cd535SAndroid Build Coastguard Worker case QualifiedIdJoinIndex::Version::kV2:
90*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_IF_ERROR(HandleV2(tokenized_document, document_id));
91*8b6cd535SAndroid Build Coastguard Worker break;
92*8b6cd535SAndroid Build Coastguard Worker case QualifiedIdJoinIndex::Version::kV3:
93*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_IF_ERROR(
94*8b6cd535SAndroid Build Coastguard Worker HandleV3(tokenized_document, document_id, old_document_id));
95*8b6cd535SAndroid Build Coastguard Worker break;
96*8b6cd535SAndroid Build Coastguard Worker }
97*8b6cd535SAndroid Build Coastguard Worker
98*8b6cd535SAndroid Build Coastguard Worker if (put_document_stats != nullptr) {
99*8b6cd535SAndroid Build Coastguard Worker put_document_stats->set_qualified_id_join_index_latency_ms(
100*8b6cd535SAndroid Build Coastguard Worker index_timer->GetElapsedMilliseconds());
101*8b6cd535SAndroid Build Coastguard Worker }
102*8b6cd535SAndroid Build Coastguard Worker
103*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
104*8b6cd535SAndroid Build Coastguard Worker }
105*8b6cd535SAndroid Build Coastguard Worker
HandleV1(const TokenizedDocument & tokenized_document,DocumentId document_id)106*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status QualifiedIdJoinIndexingHandler::HandleV1(
107*8b6cd535SAndroid Build Coastguard Worker const TokenizedDocument& tokenized_document, DocumentId document_id) {
108*8b6cd535SAndroid Build Coastguard Worker for (const JoinableProperty<std::string_view>& qualified_id_property :
109*8b6cd535SAndroid Build Coastguard Worker tokenized_document.qualified_id_join_properties()) {
110*8b6cd535SAndroid Build Coastguard Worker if (qualified_id_property.values.empty()) {
111*8b6cd535SAndroid Build Coastguard Worker continue;
112*8b6cd535SAndroid Build Coastguard Worker }
113*8b6cd535SAndroid Build Coastguard Worker
114*8b6cd535SAndroid Build Coastguard Worker DocumentJoinIdPair document_join_id_pair(document_id,
115*8b6cd535SAndroid Build Coastguard Worker qualified_id_property.metadata.id);
116*8b6cd535SAndroid Build Coastguard Worker // Currently we only support single (non-repeated) joinable value under a
117*8b6cd535SAndroid Build Coastguard Worker // property.
118*8b6cd535SAndroid Build Coastguard Worker std::string_view ref_qualified_id_str = qualified_id_property.values[0];
119*8b6cd535SAndroid Build Coastguard Worker
120*8b6cd535SAndroid Build Coastguard Worker // Attempt to parse qualified id string to make sure the format is
121*8b6cd535SAndroid Build Coastguard Worker // correct.
122*8b6cd535SAndroid Build Coastguard Worker if (!QualifiedId::Parse(ref_qualified_id_str).ok()) {
123*8b6cd535SAndroid Build Coastguard Worker // Skip incorrect format of qualified id string to save disk space.
124*8b6cd535SAndroid Build Coastguard Worker continue;
125*8b6cd535SAndroid Build Coastguard Worker }
126*8b6cd535SAndroid Build Coastguard Worker
127*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status status = qualified_id_join_index_.Put(
128*8b6cd535SAndroid Build Coastguard Worker document_join_id_pair, ref_qualified_id_str);
129*8b6cd535SAndroid Build Coastguard Worker if (!status.ok()) {
130*8b6cd535SAndroid Build Coastguard Worker ICING_LOG(WARNING)
131*8b6cd535SAndroid Build Coastguard Worker << "Failed to add data into qualified id join index due to: "
132*8b6cd535SAndroid Build Coastguard Worker << status.error_message();
133*8b6cd535SAndroid Build Coastguard Worker return status;
134*8b6cd535SAndroid Build Coastguard Worker }
135*8b6cd535SAndroid Build Coastguard Worker }
136*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
137*8b6cd535SAndroid Build Coastguard Worker }
138*8b6cd535SAndroid Build Coastguard Worker
HandleV2(const TokenizedDocument & tokenized_document,DocumentId document_id)139*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status QualifiedIdJoinIndexingHandler::HandleV2(
140*8b6cd535SAndroid Build Coastguard Worker const TokenizedDocument& tokenized_document, DocumentId document_id) {
141*8b6cd535SAndroid Build Coastguard Worker std::optional<DocumentFilterData> filter_data =
142*8b6cd535SAndroid Build Coastguard Worker doc_store_.GetAliveDocumentFilterData(
143*8b6cd535SAndroid Build Coastguard Worker document_id,
144*8b6cd535SAndroid Build Coastguard Worker /*current_time_ms=*/std::numeric_limits<int64_t>::min());
145*8b6cd535SAndroid Build Coastguard Worker if (!filter_data) {
146*8b6cd535SAndroid Build Coastguard Worker // This should not happen.
147*8b6cd535SAndroid Build Coastguard Worker return absl_ports::InternalError(
148*8b6cd535SAndroid Build Coastguard Worker "Failed to get alive document filter data when indexing");
149*8b6cd535SAndroid Build Coastguard Worker }
150*8b6cd535SAndroid Build Coastguard Worker
151*8b6cd535SAndroid Build Coastguard Worker for (const JoinableProperty<std::string_view>& qualified_id_property :
152*8b6cd535SAndroid Build Coastguard Worker tokenized_document.qualified_id_join_properties()) {
153*8b6cd535SAndroid Build Coastguard Worker // Parse all qualified id strings and convert them to
154*8b6cd535SAndroid Build Coastguard Worker // NamespaceIdFingerprint.
155*8b6cd535SAndroid Build Coastguard Worker std::vector<NamespaceIdFingerprint> ref_doc_nsid_uri_fingerprints;
156*8b6cd535SAndroid Build Coastguard Worker for (std::string_view ref_qualified_id_str : qualified_id_property.values) {
157*8b6cd535SAndroid Build Coastguard Worker // Attempt to parse qualified id string to make sure the format is
158*8b6cd535SAndroid Build Coastguard Worker // correct.
159*8b6cd535SAndroid Build Coastguard Worker auto ref_qualified_id_or = QualifiedId::Parse(ref_qualified_id_str);
160*8b6cd535SAndroid Build Coastguard Worker if (!ref_qualified_id_or.ok()) {
161*8b6cd535SAndroid Build Coastguard Worker // Skip incorrect format of qualified id string.
162*8b6cd535SAndroid Build Coastguard Worker continue;
163*8b6cd535SAndroid Build Coastguard Worker }
164*8b6cd535SAndroid Build Coastguard Worker
165*8b6cd535SAndroid Build Coastguard Worker QualifiedId ref_qualified_id =
166*8b6cd535SAndroid Build Coastguard Worker std::move(ref_qualified_id_or).ValueOrDie();
167*8b6cd535SAndroid Build Coastguard Worker auto ref_namespace_id_or =
168*8b6cd535SAndroid Build Coastguard Worker doc_store_.GetNamespaceId(ref_qualified_id.name_space());
169*8b6cd535SAndroid Build Coastguard Worker if (!ref_namespace_id_or.ok()) {
170*8b6cd535SAndroid Build Coastguard Worker // Skip invalid namespace id.
171*8b6cd535SAndroid Build Coastguard Worker continue;
172*8b6cd535SAndroid Build Coastguard Worker }
173*8b6cd535SAndroid Build Coastguard Worker NamespaceId ref_namespace_id =
174*8b6cd535SAndroid Build Coastguard Worker std::move(ref_namespace_id_or).ValueOrDie();
175*8b6cd535SAndroid Build Coastguard Worker
176*8b6cd535SAndroid Build Coastguard Worker ref_doc_nsid_uri_fingerprints.push_back(
177*8b6cd535SAndroid Build Coastguard Worker NamespaceIdFingerprint(ref_namespace_id, ref_qualified_id.uri()));
178*8b6cd535SAndroid Build Coastguard Worker }
179*8b6cd535SAndroid Build Coastguard Worker
180*8b6cd535SAndroid Build Coastguard Worker // Batch add all join data of this (schema_type_id, joinable_property_id)
181*8b6cd535SAndroid Build Coastguard Worker // into to the index.
182*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status status = qualified_id_join_index_.Put(
183*8b6cd535SAndroid Build Coastguard Worker filter_data->schema_type_id(), qualified_id_property.metadata.id,
184*8b6cd535SAndroid Build Coastguard Worker document_id, std::move(ref_doc_nsid_uri_fingerprints));
185*8b6cd535SAndroid Build Coastguard Worker if (!status.ok()) {
186*8b6cd535SAndroid Build Coastguard Worker ICING_LOG(WARNING)
187*8b6cd535SAndroid Build Coastguard Worker << "Failed to add data into qualified id join index v2 due to: "
188*8b6cd535SAndroid Build Coastguard Worker << status.error_message();
189*8b6cd535SAndroid Build Coastguard Worker return status;
190*8b6cd535SAndroid Build Coastguard Worker }
191*8b6cd535SAndroid Build Coastguard Worker }
192*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
193*8b6cd535SAndroid Build Coastguard Worker }
194*8b6cd535SAndroid Build Coastguard Worker
HandleV3(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id)195*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status QualifiedIdJoinIndexingHandler::HandleV3(
196*8b6cd535SAndroid Build Coastguard Worker const TokenizedDocument& tokenized_document, DocumentId document_id,
197*8b6cd535SAndroid Build Coastguard Worker DocumentId old_document_id) {
198*8b6cd535SAndroid Build Coastguard Worker // (Parent perspective)
199*8b6cd535SAndroid Build Coastguard Worker // When replacement, if there were any existing child documents joining to it,
200*8b6cd535SAndroid Build Coastguard Worker // then we need to migrate the old document id to the new document id.
201*8b6cd535SAndroid Build Coastguard Worker if (IsDocumentIdValid(old_document_id)) {
202*8b6cd535SAndroid Build Coastguard Worker ICING_RETURN_IF_ERROR(
203*8b6cd535SAndroid Build Coastguard Worker qualified_id_join_index_.MigrateParent(old_document_id, document_id));
204*8b6cd535SAndroid Build Coastguard Worker }
205*8b6cd535SAndroid Build Coastguard Worker
206*8b6cd535SAndroid Build Coastguard Worker // (Child perspective)
207*8b6cd535SAndroid Build Coastguard Worker // Add child join data.
208*8b6cd535SAndroid Build Coastguard Worker for (const JoinableProperty<std::string_view>& qualified_id_property :
209*8b6cd535SAndroid Build Coastguard Worker tokenized_document.qualified_id_join_properties()) {
210*8b6cd535SAndroid Build Coastguard Worker if (qualified_id_property.values.empty()) {
211*8b6cd535SAndroid Build Coastguard Worker continue;
212*8b6cd535SAndroid Build Coastguard Worker }
213*8b6cd535SAndroid Build Coastguard Worker
214*8b6cd535SAndroid Build Coastguard Worker DocumentJoinIdPair child_doc_join_id_pair(
215*8b6cd535SAndroid Build Coastguard Worker document_id, qualified_id_property.metadata.id);
216*8b6cd535SAndroid Build Coastguard Worker
217*8b6cd535SAndroid Build Coastguard Worker // Extract parent qualified ids and lookup their corresponding document ids.
218*8b6cd535SAndroid Build Coastguard Worker std::vector<DocumentId> parent_doc_ids;
219*8b6cd535SAndroid Build Coastguard Worker parent_doc_ids.reserve(qualified_id_property.values.size());
220*8b6cd535SAndroid Build Coastguard Worker for (std::string_view parent_qualified_id_str :
221*8b6cd535SAndroid Build Coastguard Worker qualified_id_property.values) {
222*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<QualifiedId> parent_qualified_id_or =
223*8b6cd535SAndroid Build Coastguard Worker QualifiedId::Parse(parent_qualified_id_str);
224*8b6cd535SAndroid Build Coastguard Worker if (!parent_qualified_id_or.ok()) {
225*8b6cd535SAndroid Build Coastguard Worker // Skip incorrect format of qualified id string.
226*8b6cd535SAndroid Build Coastguard Worker continue;
227*8b6cd535SAndroid Build Coastguard Worker }
228*8b6cd535SAndroid Build Coastguard Worker QualifiedId parent_qualified_id =
229*8b6cd535SAndroid Build Coastguard Worker std::move(parent_qualified_id_or).ValueOrDie();
230*8b6cd535SAndroid Build Coastguard Worker
231*8b6cd535SAndroid Build Coastguard Worker // Lookup document store to get the parent document id.
232*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::StatusOr<DocumentId> parent_doc_id_or =
233*8b6cd535SAndroid Build Coastguard Worker doc_store_.GetDocumentId(parent_qualified_id.name_space(),
234*8b6cd535SAndroid Build Coastguard Worker parent_qualified_id.uri());
235*8b6cd535SAndroid Build Coastguard Worker if (!parent_doc_id_or.ok() ||
236*8b6cd535SAndroid Build Coastguard Worker parent_doc_id_or.ValueOrDie() == kInvalidDocumentId) {
237*8b6cd535SAndroid Build Coastguard Worker // Skip invalid parent document id or parent document does not exist.
238*8b6cd535SAndroid Build Coastguard Worker continue;
239*8b6cd535SAndroid Build Coastguard Worker }
240*8b6cd535SAndroid Build Coastguard Worker parent_doc_ids.push_back(parent_doc_id_or.ValueOrDie());
241*8b6cd535SAndroid Build Coastguard Worker }
242*8b6cd535SAndroid Build Coastguard Worker
243*8b6cd535SAndroid Build Coastguard Worker // Add all parent document ids to the index.
244*8b6cd535SAndroid Build Coastguard Worker libtextclassifier3::Status status = qualified_id_join_index_.Put(
245*8b6cd535SAndroid Build Coastguard Worker child_doc_join_id_pair, std::move(parent_doc_ids));
246*8b6cd535SAndroid Build Coastguard Worker if (!status.ok()) {
247*8b6cd535SAndroid Build Coastguard Worker ICING_LOG(WARNING)
248*8b6cd535SAndroid Build Coastguard Worker << "Failed to add data into qualified id join index due to: "
249*8b6cd535SAndroid Build Coastguard Worker << status.error_message();
250*8b6cd535SAndroid Build Coastguard Worker return status;
251*8b6cd535SAndroid Build Coastguard Worker }
252*8b6cd535SAndroid Build Coastguard Worker }
253*8b6cd535SAndroid Build Coastguard Worker return libtextclassifier3::Status::OK;
254*8b6cd535SAndroid Build Coastguard Worker }
255*8b6cd535SAndroid Build Coastguard Worker
256*8b6cd535SAndroid Build Coastguard Worker } // namespace lib
257*8b6cd535SAndroid Build Coastguard Worker } // namespace icing
258