1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/term-indexing-handler.h"
16
17 #include <memory>
18 #include <string>
19 #include <utility>
20
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/index/index.h"
25 #include "icing/index/property-existence-indexing-handler.h"
26 #include "icing/index/string-section-indexing-handler.h"
27 #include "icing/legacy/core/icing-string-util.h"
28 #include "icing/proto/logging.pb.h"
29 #include "icing/store/document-id.h"
30 #include "icing/transform/normalizer.h"
31 #include "icing/util/clock.h"
32 #include "icing/util/logging.h"
33 #include "icing/util/status-macros.h"
34 #include "icing/util/tokenized-document.h"
35
36 namespace icing {
37 namespace lib {
38
39 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<TermIndexingHandler>>
Create(const Clock * clock,const Normalizer * normalizer,Index * index,bool build_property_existence_metadata_hits)40 TermIndexingHandler::Create(const Clock* clock, const Normalizer* normalizer,
41 Index* index,
42 bool build_property_existence_metadata_hits) {
43 ICING_RETURN_ERROR_IF_NULL(clock);
44 ICING_RETURN_ERROR_IF_NULL(normalizer);
45 ICING_RETURN_ERROR_IF_NULL(index);
46
47 // Property existence index handler
48 std::unique_ptr<PropertyExistenceIndexingHandler>
49 property_existence_indexing_handler = nullptr;
50 if (build_property_existence_metadata_hits) {
51 ICING_ASSIGN_OR_RETURN(
52 property_existence_indexing_handler,
53 PropertyExistenceIndexingHandler::Create(clock, index));
54 }
55 // String section index handler
56 ICING_ASSIGN_OR_RETURN(
57 std::unique_ptr<StringSectionIndexingHandler>
58 string_section_indexing_handler,
59 StringSectionIndexingHandler::Create(normalizer, index));
60
61 return std::unique_ptr<TermIndexingHandler>(new TermIndexingHandler(
62 clock, index, std::move(property_existence_indexing_handler),
63 std::move(string_section_indexing_handler)));
64 }
65
Handle(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id,bool recovery_mode,PutDocumentStatsProto * put_document_stats)66 libtextclassifier3::Status TermIndexingHandler::Handle(
67 const TokenizedDocument& tokenized_document, DocumentId document_id,
68 DocumentId old_document_id, bool recovery_mode,
69 PutDocumentStatsProto* put_document_stats) {
70 std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
71
72 if (index_.last_added_document_id() != kInvalidDocumentId &&
73 document_id <= index_.last_added_document_id()) {
74 if (recovery_mode) {
75 // Skip the document if document_id <= last_added_document_id in recovery
76 // mode without returning an error.
77 return libtextclassifier3::Status::OK;
78 }
79 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
80 "DocumentId %d must be greater than last added document_id %d",
81 document_id, index_.last_added_document_id()));
82 }
83 index_.set_last_added_document_id(document_id);
84
85 libtextclassifier3::Status status = libtextclassifier3::Status::OK;
86 if (property_existence_indexing_handler_ != nullptr) {
87 status = property_existence_indexing_handler_->Handle(
88 tokenized_document, document_id, old_document_id, put_document_stats);
89 }
90 if (status.ok()) {
91 status = string_section_indexing_handler_->Handle(
92 tokenized_document, document_id, old_document_id, put_document_stats);
93 }
94
95 if (put_document_stats != nullptr) {
96 put_document_stats->set_term_index_latency_ms(
97 index_timer->GetElapsedMilliseconds());
98 }
99
100 // Check if we should merge when we're either successful or we've hit resource
101 // exhausted.
102 bool should_merge =
103 (status.ok() || absl_ports::IsResourceExhausted(status)) &&
104 index_.WantsMerge();
105
106 // Check and sort the LiteIndex HitBuffer if we don't need to merge.
107 if (!should_merge && index_.LiteIndexNeedSort()) {
108 std::unique_ptr<Timer> sort_timer = clock_.GetNewTimer();
109 index_.SortLiteIndex();
110
111 if (put_document_stats != nullptr) {
112 put_document_stats->set_lite_index_sort_latency_ms(
113 sort_timer->GetElapsedMilliseconds());
114 }
115 }
116
117 // Attempt index merge if needed.
118 if (should_merge) {
119 ICING_LOG(INFO) << "Merging the index at docid " << document_id << ".";
120
121 std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
122 libtextclassifier3::Status merge_status = index_.Merge();
123
124 if (!merge_status.ok()) {
125 ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
126 if (!index_.Reset().ok()) {
127 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
128 "Unable to reset to clear index after merge failure. Merge "
129 "failure=%d:%s",
130 merge_status.error_code(), merge_status.error_message().c_str()));
131 } else {
132 return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
133 "Forced to reset index after merge failure. Merge failure=%d:%s",
134 merge_status.error_code(), merge_status.error_message().c_str()));
135 }
136 }
137
138 if (put_document_stats != nullptr) {
139 put_document_stats->set_index_merge_latency_ms(
140 merge_timer->GetElapsedMilliseconds());
141 }
142 }
143 return status;
144 }
145
146 } // namespace lib
147 } // namespace icing
148