xref: /aosp_15_r20/external/icing/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <set>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/absl_ports/canonical_errors.h"
29 #include "icing/absl_ports/str_cat.h"
30 #include "icing/absl_ports/str_join.h"
31 #include "icing/index/hit/doc-hit-info.h"
32 #include "icing/index/iterator/doc-hit-info-iterator.h"
33 #include "icing/index/iterator/section-restrict-data.h"
34 #include "icing/proto/search.pb.h"
35 #include "icing/schema/schema-store.h"
36 #include "icing/schema/section.h"
37 #include "icing/store/document-filter-data.h"
38 #include "icing/store/document-id.h"
39 #include "icing/store/document-store.h"
40 #include "icing/util/status-macros.h"
41 
42 namespace icing {
43 namespace lib {
44 
45 // An iterator that simply takes ownership of SectionRestrictData.
46 class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
47  public:
SectionRestrictDataHolderIterator(std::unique_ptr<DocHitInfoIterator> delegate,std::unique_ptr<SectionRestrictData> data)48   explicit SectionRestrictDataHolderIterator(
49       std::unique_ptr<DocHitInfoIterator> delegate,
50       std::unique_ptr<SectionRestrictData> data)
51       : delegate_(std::move(delegate)), data_(std::move(data)) {}
52 
Advance()53   libtextclassifier3::Status Advance() override {
54     auto result = delegate_->Advance();
55     doc_hit_info_ = delegate_->doc_hit_info();
56     return result;
57   }
58 
TrimRightMostNode()59   libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
60     ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
61                            std::move(*delegate_).TrimRightMostNode());
62     if (trimmed_delegate.iterator_ != nullptr) {
63       trimmed_delegate.iterator_ =
64           std::make_unique<SectionRestrictDataHolderIterator>(
65               std::move(trimmed_delegate.iterator_), std::move(data_));
66     }
67     return trimmed_delegate;
68   }
69 
MapChildren(const ChildrenMapper & mapper)70   void MapChildren(const ChildrenMapper& mapper) override {
71     delegate_ = mapper(std::move(delegate_));
72   }
73 
GetCallStats() const74   CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
75 
ToString() const76   std::string ToString() const override { return delegate_->ToString(); }
77 
PopulateMatchedTermsStats(std::vector<TermMatchInfo> * matched_terms_stats,SectionIdMask filtering_section_mask) const78   void PopulateMatchedTermsStats(
79       std::vector<TermMatchInfo>* matched_terms_stats,
80       SectionIdMask filtering_section_mask) const override {
81     return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
82                                                 filtering_section_mask);
83   }
84 
85  private:
86   std::unique_ptr<DocHitInfoIterator> delegate_;
87   std::unique_ptr<SectionRestrictData> data_;
88 };
89 
DocHitInfoIteratorSectionRestrict(std::unique_ptr<DocHitInfoIterator> delegate,SectionRestrictData * data)90 DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
91     std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
92     : delegate_(std::move(delegate)), data_(data) {}
93 
94 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,const DocumentStore * document_store,const SchemaStore * schema_store,std::set<std::string> target_sections,int64_t current_time_ms)95 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
96     std::unique_ptr<DocHitInfoIterator> iterator,
97     const DocumentStore* document_store, const SchemaStore* schema_store,
98     std::set<std::string> target_sections, int64_t current_time_ms) {
99   std::unordered_map<std::string, std::set<std::string>> type_property_filters;
100   type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
101       std::move(target_sections);
102   auto data = std::make_unique<SectionRestrictData>(
103       document_store, schema_store, current_time_ms, type_property_filters);
104   std::unique_ptr<DocHitInfoIterator> result =
105       ApplyRestrictions(std::move(iterator), data.get());
106   return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
107                                                              std::move(data));
108 }
109 
110 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,const DocumentStore * document_store,const SchemaStore * schema_store,const SearchSpecProto & search_spec,int64_t current_time_ms)111 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
112     std::unique_ptr<DocHitInfoIterator> iterator,
113     const DocumentStore* document_store, const SchemaStore* schema_store,
114     const SearchSpecProto& search_spec, int64_t current_time_ms) {
115   std::unordered_map<std::string, std::set<std::string>> type_property_filters;
116   for (const SchemaStore::ExpandedTypePropertyMask& type_property_mask :
117        schema_store->ExpandTypePropertyMasks(
118            search_spec.type_property_filters())) {
119     type_property_filters[type_property_mask.schema_type] =
120         std::set<std::string>(type_property_mask.paths.begin(),
121                               type_property_mask.paths.end());
122   }
123   auto data = std::make_unique<SectionRestrictData>(
124       document_store, schema_store, current_time_ms, type_property_filters);
125   std::unique_ptr<DocHitInfoIterator> result =
126       ApplyRestrictions(std::move(iterator), data.get());
127   return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
128                                                              std::move(data));
129 }
130 
131 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,SectionRestrictData * data)132 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
133     std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
134   ChildrenMapper mapper;
135   mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
136       -> std::unique_ptr<DocHitInfoIterator> {
137     if (iterator->HandleSectionRestriction(data)) {
138       return iterator;
139     } else if (iterator->is_leaf()) {
140       return std::make_unique<DocHitInfoIteratorSectionRestrict>(
141           std::move(iterator), data);
142     } else {
143       iterator->MapChildren(mapper);
144       return iterator;
145     }
146   };
147   return mapper(std::move(iterator));
148 }
149 
Advance()150 libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
151   doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
152   while (delegate_->Advance().ok()) {
153     DocumentId document_id = delegate_->doc_hit_info().document_id();
154     SectionIdMask allowed_sections_mask =
155         data_->ComputeAllowedSectionsMask(document_id);
156 
157     // A hit can be in multiple sections at once, need to check which of the
158     // section ids match the sections allowed by type_property_masks_. This can
159     // be done by doing a bitwise and of the section_id_mask in the doc hit and
160     // the allowed_sections_mask.
161     SectionIdMask section_id_mask =
162         delegate_->doc_hit_info().hit_section_ids_mask() &
163         allowed_sections_mask;
164 
165     // Return this document if:
166     //  - the sectionIdMask is not empty after applying property filters, or
167     //  - no property filters apply for its schema type (allowed_sections_mask
168     //    == kSectionIdMaskAll). This is needed to ensure that in case of empty
169     //    query (which uses doc-hit-info-iterator-all-document-id), where
170     //    section_id_mask is kSectionIdMaskNone, doc hits with no property
171     //    restrictions don't get filtered out. Doc hits for schema types for
172     //    whom property filters are specified will still get filtered out.
173     if (allowed_sections_mask == kSectionIdMaskAll ||
174         section_id_mask != kSectionIdMaskNone) {
175       doc_hit_info_ = delegate_->doc_hit_info();
176       doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
177       return libtextclassifier3::Status::OK;
178     }
179     // Didn't find a matching section name for this hit. Continue.
180   }
181 
182   // Didn't find anything on the delegate iterator.
183   return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
184 }
185 
186 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()187 DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
188   ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
189                          std::move(*delegate_).TrimRightMostNode());
190   // TrimRightMostNode is only used by suggestion processor to process query
191   // expression, so an entry for wildcard should always be present in
192   // type_property_filters_ when code flow reaches here. If the InternalError
193   // below is returned, that means TrimRightMostNode hasn't been called in the
194   // right context.
195   const auto it = data_->type_property_filters().find("*");
196   if (it == data_->type_property_filters().end()) {
197     return absl_ports::InternalError(
198         "A wildcard entry should always be present in type property filters "
199         "whenever TrimRightMostNode() is called for "
200         "DocHitInfoIteratorSectionRestrict");
201   }
202   const std::set<std::string>& target_sections = it->second;
203   if (target_sections.empty()) {
204     return absl_ports::InternalError(
205         "Target sections should not be empty whenever TrimRightMostNode() is "
206         "called for DocHitInfoIteratorSectionRestrict");
207   }
208   if (trimmed_delegate.iterator_ == nullptr) {
209     // TODO(b/228240987): Update TrimmedNode and downstream code to handle
210     // multiple section restricts.
211     trimmed_delegate.target_section_ = std::move(*target_sections.begin());
212     return trimmed_delegate;
213   }
214   trimmed_delegate.iterator_ =
215       std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
216           new DocHitInfoIteratorSectionRestrict(
217               std::move(trimmed_delegate.iterator_), std::move(data_)));
218   return std::move(trimmed_delegate);
219 }
220 
ToString() const221 std::string DocHitInfoIteratorSectionRestrict::ToString() const {
222   std::string output = "";
223   for (auto it = data_->type_property_filters().cbegin();
224        it != data_->type_property_filters().cend(); it++) {
225     std::string paths = absl_ports::StrJoin(it->second, ",");
226     output += (it->first) + ":" + (paths) + "; ";
227   }
228   std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
229   return absl_ports::StrCat(result, delegate_->ToString());
230 }
231 
232 }  // namespace lib
233 }  // namespace icing
234