1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
16
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/index/hit/doc-hit-info.h"
28 #include "icing/index/iterator/doc-hit-info-iterator.h"
29 #include "icing/schema/section.h"
30 #include "icing/store/document-id.h"
31 #include "icing/util/status-macros.h"
32
33 namespace icing {
34 namespace lib {
35
36 namespace {
37
38 // When combining ANDed iterators, n-ary operator has better performance when
39 // number of operands > 3 according to benchmark cl/243720660
40 inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
41
42 // The minimum number of iterators needed to construct a And iterator. The And
43 // constructor currently takes 2 iterators.
44 inline constexpr int kMinBinaryIterators = 2;
45
46 } // namespace
47
CreateAndIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)48 std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
49 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
50 if (iterators.size() == 1) {
51 return std::move(iterators.at(0));
52 }
53
54 std::unique_ptr<DocHitInfoIterator> iterator;
55 if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
56 iterators.size() >= kMinBinaryIterators) {
57 // Accumulate the iterators that need to be ANDed together.
58 iterator = std::move(iterators.at(iterators.size() - 1));
59 for (int i = iterators.size() - 2; i >= 0; --i) {
60 std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
61 iterator = std::make_unique<DocHitInfoIteratorAnd>(
62 /*short_it=*/std::move(iterators[i]),
63 /*long_it=*/std::move(temp_iterator));
64 }
65 } else {
66 // If the vector is too small, the AndNary iterator can handle it and return
67 // an error on the Advance call
68 iterator =
69 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
70 }
71
72 return iterator;
73 }
74
DocHitInfoIteratorAnd(std::unique_ptr<DocHitInfoIterator> short_it,std::unique_ptr<DocHitInfoIterator> long_it)75 DocHitInfoIteratorAnd::DocHitInfoIteratorAnd(
76 std::unique_ptr<DocHitInfoIterator> short_it,
77 std::unique_ptr<DocHitInfoIterator> long_it)
78 : short_(std::move(short_it)), long_(std::move(long_it)) {}
79
Advance()80 libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
81 // Advance on short first
82 if (!short_->Advance().ok()) {
83 // Didn't find anything for the first iterator, reset to invalid values and
84 // return.
85 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
86 return absl_ports::ResourceExhaustedError(
87 "No more DocHitInfos in iterator");
88 }
89 DocumentId short_doc_id = short_->doc_hit_info().document_id();
90
91 // Then AdvanceTo on long
92 ICING_ASSIGN_OR_RETURN(DocumentId long_doc_id,
93 AdvanceTo(long_.get(), short_doc_id));
94
95 // Now try to align DocHitInfos by moving one or the other.
96 while (short_doc_id != long_doc_id) {
97 if (short_doc_id > long_doc_id) {
98 ICING_ASSIGN_OR_RETURN(short_doc_id,
99 AdvanceTo(short_.get(), long_doc_id));
100 } else {
101 ICING_ASSIGN_OR_RETURN(long_doc_id, AdvanceTo(long_.get(), short_doc_id));
102 }
103 }
104
105 // Guaranteed that short_doc_id and long_doc_id match now
106 doc_hit_info_ = short_->doc_hit_info();
107 doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
108 return libtextclassifier3::Status::OK;
109 }
110
111 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()112 DocHitInfoIteratorAnd::TrimRightMostNode() && {
113 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
114 std::move(*long_).TrimRightMostNode());
115 if (trimmed_long.iterator_ == nullptr) {
116 trimmed_long.iterator_ = std::move(short_);
117 } else {
118 trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
119 std::move(short_), std::move(trimmed_long.iterator_));
120 }
121 return trimmed_long;
122 }
123
ToString() const124 std::string DocHitInfoIteratorAnd::ToString() const {
125 return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
126 ")");
127 }
128
DocHitInfoIteratorAndNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)129 DocHitInfoIteratorAndNary::DocHitInfoIteratorAndNary(
130 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
131 : iterators_(std::move(iterators)) {}
132
Advance()133 libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
134 if (iterators_.size() < 2) {
135 return absl_ports::InvalidArgumentError(
136 "Not enough iterators to AND together");
137 }
138
139 // Advance on the first iterator to get a potential hit
140 if (!iterators_.at(0)->Advance().ok()) {
141 // Didn't find anything for the first iterator, reset to invalid values and
142 // return
143 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
144 return absl_ports::ResourceExhaustedError(
145 "No more DocHitInfos in iterator");
146 }
147 DocumentId potential_document_id =
148 iterators_.at(0)->doc_hit_info().document_id();
149
150 // Our goal is to find the next document_id that exists on all the iterators
151 // by advancing the iterators one by one. We start with some
152 // "potential_document_id", check if it actually matches the above goal. If
153 // yes, return. If not, find the next best "potential" and repeat till we hit
154 // the end.
155
156 // Has the current potential_document_id been found in all the iterators?
157 bool found_document_id = false;
158 while (!found_document_id) {
159 for (auto& iterator : iterators_) {
160 if (iterator->doc_hit_info().document_id() > potential_document_id) {
161 // Advance the current iterator until it's equal to or smaller than the
162 // potential hit doc id
163 DocumentId unused;
164 ICING_ASSIGN_OR_RETURN(
165 unused, AdvanceTo(iterator.get(), potential_document_id));
166 (void)unused; // Silence unused warning.
167 }
168
169 if (iterator->doc_hit_info().document_id() == potential_document_id) {
170 // The potential hit got matched on the iterators so far
171 found_document_id = true;
172 continue;
173 } else if (iterator->doc_hit_info().document_id() <
174 potential_document_id) {
175 // This iterator doesn't have potential_document_id as we've gone past
176 // it already. Use the current document_id as the new
177 // "potential_document_id" and start checking all iterators again.
178 found_document_id = false;
179 potential_document_id = iterator->doc_hit_info().document_id();
180 break;
181 }
182 }
183 }
184
185 // Found a DocumentId which exists in all the iterators
186 doc_hit_info_ = iterators_.at(0)->doc_hit_info();
187
188 for (size_t i = 1; i < iterators_.size(); i++) {
189 doc_hit_info_.MergeSectionsFrom(
190 iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
191 }
192 return libtextclassifier3::Status::OK;
193 }
194
195 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()196 DocHitInfoIteratorAndNary::TrimRightMostNode() && {
197 ICING_ASSIGN_OR_RETURN(
198 TrimmedNode trimmed_right,
199 std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
200 if (trimmed_right.iterator_ == nullptr) {
201 if (iterators_.size() > 2) {
202 iterators_.pop_back();
203 trimmed_right.iterator_ =
204 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
205 } else if (iterators_.size() == 2) {
206 trimmed_right.iterator_ = std::move(iterators_.at(0));
207 }
208 } else {
209 iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
210 trimmed_right.iterator_ =
211 std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
212 }
213 return trimmed_right;
214 }
215
GetCallStats() const216 DocHitInfoIterator::CallStats DocHitInfoIteratorAndNary::GetCallStats() const {
217 CallStats call_stats;
218 for (const auto& iter : iterators_) {
219 call_stats += iter->GetCallStats();
220 }
221 return call_stats;
222 }
223
ToString() const224 std::string DocHitInfoIteratorAndNary::ToString() const {
225 std::string ret = "(";
226
227 for (int i = 0; i < iterators_.size(); ++i) {
228 if (i == iterators_.size() - 1) {
229 // Last element in vector
230 absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), ")");
231 } else {
232 absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), " AND ");
233 }
234 }
235
236 return ret;
237 }
238
239 } // namespace lib
240 } // namespace icing
241