xref: /aosp_15_r20/external/icing/icing/index/iterator/doc-hit-info-iterator-and.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/index/hit/doc-hit-info.h"
28 #include "icing/index/iterator/doc-hit-info-iterator.h"
29 #include "icing/schema/section.h"
30 #include "icing/store/document-id.h"
31 #include "icing/util/status-macros.h"
32 
33 namespace icing {
34 namespace lib {
35 
36 namespace {
37 
38 // When combining ANDed iterators, n-ary operator has better performance when
39 // number of operands > 3 according to benchmark cl/243720660
40 inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
41 
42 // The minimum number of iterators needed to construct a And iterator. The And
43 // constructor currently takes 2 iterators.
44 inline constexpr int kMinBinaryIterators = 2;
45 
46 }  // namespace
47 
CreateAndIterator(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)48 std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
49     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
50   if (iterators.size() == 1) {
51     return std::move(iterators.at(0));
52   }
53 
54   std::unique_ptr<DocHitInfoIterator> iterator;
55   if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
56       iterators.size() >= kMinBinaryIterators) {
57     // Accumulate the iterators that need to be ANDed together.
58     iterator = std::move(iterators.at(iterators.size() - 1));
59     for (int i = iterators.size() - 2; i >= 0; --i) {
60       std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
61       iterator = std::make_unique<DocHitInfoIteratorAnd>(
62           /*short_it=*/std::move(iterators[i]),
63           /*long_it=*/std::move(temp_iterator));
64     }
65   } else {
66     // If the vector is too small, the AndNary iterator can handle it and return
67     // an error on the Advance call
68     iterator =
69         std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
70   }
71 
72   return iterator;
73 }
74 
DocHitInfoIteratorAnd(std::unique_ptr<DocHitInfoIterator> short_it,std::unique_ptr<DocHitInfoIterator> long_it)75 DocHitInfoIteratorAnd::DocHitInfoIteratorAnd(
76     std::unique_ptr<DocHitInfoIterator> short_it,
77     std::unique_ptr<DocHitInfoIterator> long_it)
78     : short_(std::move(short_it)), long_(std::move(long_it)) {}
79 
Advance()80 libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
81   // Advance on short first
82   if (!short_->Advance().ok()) {
83     // Didn't find anything for the first iterator, reset to invalid values and
84     // return.
85     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
86     return absl_ports::ResourceExhaustedError(
87         "No more DocHitInfos in iterator");
88   }
89   DocumentId short_doc_id = short_->doc_hit_info().document_id();
90 
91   // Then AdvanceTo on long
92   ICING_ASSIGN_OR_RETURN(DocumentId long_doc_id,
93                          AdvanceTo(long_.get(), short_doc_id));
94 
95   // Now try to align DocHitInfos by moving one or the other.
96   while (short_doc_id != long_doc_id) {
97     if (short_doc_id > long_doc_id) {
98       ICING_ASSIGN_OR_RETURN(short_doc_id,
99                              AdvanceTo(short_.get(), long_doc_id));
100     } else {
101       ICING_ASSIGN_OR_RETURN(long_doc_id, AdvanceTo(long_.get(), short_doc_id));
102     }
103   }
104 
105   // Guaranteed that short_doc_id and long_doc_id match now
106   doc_hit_info_ = short_->doc_hit_info();
107   doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info().hit_section_ids_mask());
108   return libtextclassifier3::Status::OK;
109 }
110 
111 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()112 DocHitInfoIteratorAnd::TrimRightMostNode() && {
113   ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_long,
114                          std::move(*long_).TrimRightMostNode());
115   if (trimmed_long.iterator_ == nullptr) {
116     trimmed_long.iterator_ = std::move(short_);
117   } else {
118     trimmed_long.iterator_ = std::make_unique<DocHitInfoIteratorAnd>(
119         std::move(short_), std::move(trimmed_long.iterator_));
120   }
121   return trimmed_long;
122 }
123 
ToString() const124 std::string DocHitInfoIteratorAnd::ToString() const {
125   return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
126                             ")");
127 }
128 
DocHitInfoIteratorAndNary(std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)129 DocHitInfoIteratorAndNary::DocHitInfoIteratorAndNary(
130     std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
131     : iterators_(std::move(iterators)) {}
132 
Advance()133 libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
134   if (iterators_.size() < 2) {
135     return absl_ports::InvalidArgumentError(
136         "Not enough iterators to AND together");
137   }
138 
139   // Advance on the first iterator to get a potential hit
140   if (!iterators_.at(0)->Advance().ok()) {
141     // Didn't find anything for the first iterator, reset to invalid values and
142     // return
143     doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
144     return absl_ports::ResourceExhaustedError(
145         "No more DocHitInfos in iterator");
146   }
147   DocumentId potential_document_id =
148       iterators_.at(0)->doc_hit_info().document_id();
149 
150   // Our goal is to find the next document_id that exists on all the iterators
151   // by advancing the iterators one by one. We start with some
152   // "potential_document_id", check if it actually matches the above goal. If
153   // yes, return. If not, find the next best "potential" and repeat till we hit
154   // the end.
155 
156   // Has the current potential_document_id been found in all the iterators?
157   bool found_document_id = false;
158   while (!found_document_id) {
159     for (auto& iterator : iterators_) {
160       if (iterator->doc_hit_info().document_id() > potential_document_id) {
161         // Advance the current iterator until it's equal to or smaller than the
162         // potential hit doc id
163         DocumentId unused;
164         ICING_ASSIGN_OR_RETURN(
165             unused, AdvanceTo(iterator.get(), potential_document_id));
166         (void)unused;  // Silence unused warning.
167       }
168 
169       if (iterator->doc_hit_info().document_id() == potential_document_id) {
170         // The potential hit got matched on the iterators so far
171         found_document_id = true;
172         continue;
173       } else if (iterator->doc_hit_info().document_id() <
174                  potential_document_id) {
175         // This iterator doesn't have potential_document_id as we've gone past
176         // it already. Use the current document_id as the new
177         // "potential_document_id" and start checking all iterators again.
178         found_document_id = false;
179         potential_document_id = iterator->doc_hit_info().document_id();
180         break;
181       }
182     }
183   }
184 
185   // Found a DocumentId which exists in all the iterators
186   doc_hit_info_ = iterators_.at(0)->doc_hit_info();
187 
188   for (size_t i = 1; i < iterators_.size(); i++) {
189     doc_hit_info_.MergeSectionsFrom(
190         iterators_.at(i)->doc_hit_info().hit_section_ids_mask());
191   }
192   return libtextclassifier3::Status::OK;
193 }
194 
195 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()196 DocHitInfoIteratorAndNary::TrimRightMostNode() && {
197   ICING_ASSIGN_OR_RETURN(
198       TrimmedNode trimmed_right,
199       std::move(*iterators_.rbegin()->get()).TrimRightMostNode());
200   if (trimmed_right.iterator_ == nullptr) {
201     if (iterators_.size() > 2) {
202       iterators_.pop_back();
203       trimmed_right.iterator_ =
204           std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
205     } else if (iterators_.size() == 2) {
206       trimmed_right.iterator_ = std::move(iterators_.at(0));
207     }
208   } else {
209     iterators_.at(iterators_.size() - 1) = std::move(trimmed_right.iterator_);
210     trimmed_right.iterator_ =
211         std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators_));
212   }
213   return trimmed_right;
214 }
215 
GetCallStats() const216 DocHitInfoIterator::CallStats DocHitInfoIteratorAndNary::GetCallStats() const {
217   CallStats call_stats;
218   for (const auto& iter : iterators_) {
219     call_stats += iter->GetCallStats();
220   }
221   return call_stats;
222 }
223 
ToString() const224 std::string DocHitInfoIteratorAndNary::ToString() const {
225   std::string ret = "(";
226 
227   for (int i = 0; i < iterators_.size(); ++i) {
228     if (i == iterators_.size() - 1) {
229       // Last element in vector
230       absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), ")");
231     } else {
232       absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), " AND ");
233     }
234   }
235 
236   return ret;
237 }
238 
239 }  // namespace lib
240 }  // namespace icing
241