xref: /aosp_15_r20/external/icing/icing/index/term-indexing-handler.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/term-indexing-handler.h"
16 
17 #include <memory>
18 #include <string>
19 #include <utility>
20 
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/index/index.h"
25 #include "icing/index/property-existence-indexing-handler.h"
26 #include "icing/index/string-section-indexing-handler.h"
27 #include "icing/legacy/core/icing-string-util.h"
28 #include "icing/proto/logging.pb.h"
29 #include "icing/store/document-id.h"
30 #include "icing/transform/normalizer.h"
31 #include "icing/util/clock.h"
32 #include "icing/util/logging.h"
33 #include "icing/util/status-macros.h"
34 #include "icing/util/tokenized-document.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
Create(const Clock * clock,const Normalizer * normalizer,Index * index,bool build_property_existence_metadata_hits)40 TermIndexingHandler::Create(const Clock* clock, const Normalizer* normalizer,
41                             Index* index,
42                             bool build_property_existence_metadata_hits) {
43   ICING_RETURN_ERROR_IF_NULL(clock);
44   ICING_RETURN_ERROR_IF_NULL(normalizer);
45   ICING_RETURN_ERROR_IF_NULL(index);
46 
47   // Property existence index handler
48   std::unique_ptr<PropertyExistenceIndexingHandler>
49       property_existence_indexing_handler = nullptr;
50   if (build_property_existence_metadata_hits) {
51     ICING_ASSIGN_OR_RETURN(
52         property_existence_indexing_handler,
53         PropertyExistenceIndexingHandler::Create(clock, index));
54   }
55   // String section index handler
56   ICING_ASSIGN_OR_RETURN(
57       std::unique_ptr<StringSectionIndexingHandler>
58           string_section_indexing_handler,
59       StringSectionIndexingHandler::Create(normalizer, index));
60 
61   return std::unique_ptr<TermIndexingHandler>(new TermIndexingHandler(
62       clock, index, std::move(property_existence_indexing_handler),
63       std::move(string_section_indexing_handler)));
64 }
65 
Handle(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id,bool recovery_mode,PutDocumentStatsProto * put_document_stats)66 libtextclassifier3::Status TermIndexingHandler::Handle(
67     const TokenizedDocument& tokenized_document, DocumentId document_id,
68     DocumentId old_document_id, bool recovery_mode,
69     PutDocumentStatsProto* put_document_stats) {
70   std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
71 
72   if (index_.last_added_document_id() != kInvalidDocumentId &&
73       document_id <= index_.last_added_document_id()) {
74     if (recovery_mode) {
75       // Skip the document if document_id <= last_added_document_id in recovery
76       // mode without returning an error.
77       return libtextclassifier3::Status::OK;
78     }
79     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
80         "DocumentId %d must be greater than last added document_id %d",
81         document_id, index_.last_added_document_id()));
82   }
83   index_.set_last_added_document_id(document_id);
84 
85   libtextclassifier3::Status status = libtextclassifier3::Status::OK;
86   if (property_existence_indexing_handler_ != nullptr) {
87     status = property_existence_indexing_handler_->Handle(
88         tokenized_document, document_id, old_document_id, put_document_stats);
89   }
90   if (status.ok()) {
91     status = string_section_indexing_handler_->Handle(
92         tokenized_document, document_id, old_document_id, put_document_stats);
93   }
94 
95   if (put_document_stats != nullptr) {
96     put_document_stats->set_term_index_latency_ms(
97         index_timer->GetElapsedMilliseconds());
98   }
99 
100   // Check if we should merge when we're either successful or we've hit resource
101   // exhausted.
102   bool should_merge =
103       (status.ok() || absl_ports::IsResourceExhausted(status)) &&
104       index_.WantsMerge();
105 
106   // Check and sort the LiteIndex HitBuffer if we don't need to merge.
107   if (!should_merge && index_.LiteIndexNeedSort()) {
108     std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
109     index_.SortLiteIndex();
110 
111     if (put_document_stats != nullptr) {
112       put_document_stats->set_lite_index_sort_latency_ms(
113           sort_timer->GetElapsedMilliseconds());
114     }
115   }
116 
117   // Attempt index merge if needed.
118   if (should_merge) {
119     ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
120 
121     std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
122     libtextclassifier3::Status merge_status = index_.Merge();
123 
124     if (!merge_status.ok()) {
125       ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
126       if (!index_.Reset().ok()) {
127         return absl_ports::InternalError(IcingStringUtil::StringPrintf(
128             "Unable to reset to clear index after merge failure. Merge "
129             "failure=%d:%s",
130             merge_status.error_code(), merge_status.error_message().c_str()));
131       } else {
132         return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
133             "Forced to reset index after merge failure. Merge failure=%d:%s",
134             merge_status.error_code(), merge_status.error_message().c_str()));
135       }
136     }
137 
138     if (put_document_stats != nullptr) {
139       put_document_stats->set_index_merge_latency_ms(
140           merge_timer->GetElapsedMilliseconds());
141     }
142   }
143   return status;
144 }
145 
146 }  // namespace lib
147 }  // namespace icing
148