1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <set>
20 #include <string>
21 #include <string_view>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/absl_ports/canonical_errors.h"
29 #include "icing/absl_ports/str_cat.h"
30 #include "icing/absl_ports/str_join.h"
31 #include "icing/index/hit/doc-hit-info.h"
32 #include "icing/index/iterator/doc-hit-info-iterator.h"
33 #include "icing/index/iterator/section-restrict-data.h"
34 #include "icing/proto/search.pb.h"
35 #include "icing/schema/schema-store.h"
36 #include "icing/schema/section.h"
37 #include "icing/store/document-filter-data.h"
38 #include "icing/store/document-id.h"
39 #include "icing/store/document-store.h"
40 #include "icing/util/status-macros.h"
41
42 namespace icing {
43 namespace lib {
44
45 // An iterator that simply takes ownership of SectionRestrictData.
46 class SectionRestrictDataHolderIterator : public DocHitInfoIterator {
47 public:
SectionRestrictDataHolderIterator(std::unique_ptr<DocHitInfoIterator> delegate,std::unique_ptr<SectionRestrictData> data)48 explicit SectionRestrictDataHolderIterator(
49 std::unique_ptr<DocHitInfoIterator> delegate,
50 std::unique_ptr<SectionRestrictData> data)
51 : delegate_(std::move(delegate)), data_(std::move(data)) {}
52
Advance()53 libtextclassifier3::Status Advance() override {
54 auto result = delegate_->Advance();
55 doc_hit_info_ = delegate_->doc_hit_info();
56 return result;
57 }
58
TrimRightMostNode()59 libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override {
60 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
61 std::move(*delegate_).TrimRightMostNode());
62 if (trimmed_delegate.iterator_ != nullptr) {
63 trimmed_delegate.iterator_ =
64 std::make_unique<SectionRestrictDataHolderIterator>(
65 std::move(trimmed_delegate.iterator_), std::move(data_));
66 }
67 return trimmed_delegate;
68 }
69
MapChildren(const ChildrenMapper & mapper)70 void MapChildren(const ChildrenMapper& mapper) override {
71 delegate_ = mapper(std::move(delegate_));
72 }
73
GetCallStats() const74 CallStats GetCallStats() const override { return delegate_->GetCallStats(); }
75
ToString() const76 std::string ToString() const override { return delegate_->ToString(); }
77
PopulateMatchedTermsStats(std::vector<TermMatchInfo> * matched_terms_stats,SectionIdMask filtering_section_mask) const78 void PopulateMatchedTermsStats(
79 std::vector<TermMatchInfo>* matched_terms_stats,
80 SectionIdMask filtering_section_mask) const override {
81 return delegate_->PopulateMatchedTermsStats(matched_terms_stats,
82 filtering_section_mask);
83 }
84
85 private:
86 std::unique_ptr<DocHitInfoIterator> delegate_;
87 std::unique_ptr<SectionRestrictData> data_;
88 };
89
DocHitInfoIteratorSectionRestrict(std::unique_ptr<DocHitInfoIterator> delegate,SectionRestrictData * data)90 DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
91 std::unique_ptr<DocHitInfoIterator> delegate, SectionRestrictData* data)
92 : delegate_(std::move(delegate)), data_(data) {}
93
94 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,const DocumentStore * document_store,const SchemaStore * schema_store,std::set<std::string> target_sections,int64_t current_time_ms)95 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
96 std::unique_ptr<DocHitInfoIterator> iterator,
97 const DocumentStore* document_store, const SchemaStore* schema_store,
98 std::set<std::string> target_sections, int64_t current_time_ms) {
99 std::unordered_map<std::string, std::set<std::string>> type_property_filters;
100 type_property_filters[std::string(SchemaStore::kSchemaTypeWildcard)] =
101 std::move(target_sections);
102 auto data = std::make_unique<SectionRestrictData>(
103 document_store, schema_store, current_time_ms, type_property_filters);
104 std::unique_ptr<DocHitInfoIterator> result =
105 ApplyRestrictions(std::move(iterator), data.get());
106 return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
107 std::move(data));
108 }
109
110 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,const DocumentStore * document_store,const SchemaStore * schema_store,const SearchSpecProto & search_spec,int64_t current_time_ms)111 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
112 std::unique_ptr<DocHitInfoIterator> iterator,
113 const DocumentStore* document_store, const SchemaStore* schema_store,
114 const SearchSpecProto& search_spec, int64_t current_time_ms) {
115 std::unordered_map<std::string, std::set<std::string>> type_property_filters;
116 for (const SchemaStore::ExpandedTypePropertyMask& type_property_mask :
117 schema_store->ExpandTypePropertyMasks(
118 search_spec.type_property_filters())) {
119 type_property_filters[type_property_mask.schema_type] =
120 std::set<std::string>(type_property_mask.paths.begin(),
121 type_property_mask.paths.end());
122 }
123 auto data = std::make_unique<SectionRestrictData>(
124 document_store, schema_store, current_time_ms, type_property_filters);
125 std::unique_ptr<DocHitInfoIterator> result =
126 ApplyRestrictions(std::move(iterator), data.get());
127 return std::make_unique<SectionRestrictDataHolderIterator>(std::move(result),
128 std::move(data));
129 }
130
131 std::unique_ptr<DocHitInfoIterator>
ApplyRestrictions(std::unique_ptr<DocHitInfoIterator> iterator,SectionRestrictData * data)132 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
133 std::unique_ptr<DocHitInfoIterator> iterator, SectionRestrictData* data) {
134 ChildrenMapper mapper;
135 mapper = [&data, &mapper](std::unique_ptr<DocHitInfoIterator> iterator)
136 -> std::unique_ptr<DocHitInfoIterator> {
137 if (iterator->HandleSectionRestriction(data)) {
138 return iterator;
139 } else if (iterator->is_leaf()) {
140 return std::make_unique<DocHitInfoIteratorSectionRestrict>(
141 std::move(iterator), data);
142 } else {
143 iterator->MapChildren(mapper);
144 return iterator;
145 }
146 };
147 return mapper(std::move(iterator));
148 }
149
Advance()150 libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
151 doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
152 while (delegate_->Advance().ok()) {
153 DocumentId document_id = delegate_->doc_hit_info().document_id();
154 SectionIdMask allowed_sections_mask =
155 data_->ComputeAllowedSectionsMask(document_id);
156
157 // A hit can be in multiple sections at once, need to check which of the
158 // section ids match the sections allowed by type_property_masks_. This can
159 // be done by doing a bitwise and of the section_id_mask in the doc hit and
160 // the allowed_sections_mask.
161 SectionIdMask section_id_mask =
162 delegate_->doc_hit_info().hit_section_ids_mask() &
163 allowed_sections_mask;
164
165 // Return this document if:
166 // - the sectionIdMask is not empty after applying property filters, or
167 // - no property filters apply for its schema type (allowed_sections_mask
168 // == kSectionIdMaskAll). This is needed to ensure that in case of empty
169 // query (which uses doc-hit-info-iterator-all-document-id), where
170 // section_id_mask is kSectionIdMaskNone, doc hits with no property
171 // restrictions don't get filtered out. Doc hits for schema types for
172 // whom property filters are specified will still get filtered out.
173 if (allowed_sections_mask == kSectionIdMaskAll ||
174 section_id_mask != kSectionIdMaskNone) {
175 doc_hit_info_ = delegate_->doc_hit_info();
176 doc_hit_info_.set_hit_section_ids_mask(section_id_mask);
177 return libtextclassifier3::Status::OK;
178 }
179 // Didn't find a matching section name for this hit. Continue.
180 }
181
182 // Didn't find anything on the delegate iterator.
183 return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
184 }
185
186 libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
TrimRightMostNode()187 DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
188 ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
189 std::move(*delegate_).TrimRightMostNode());
190 // TrimRightMostNode is only used by suggestion processor to process query
191 // expression, so an entry for wildcard should always be present in
192 // type_property_filters_ when code flow reaches here. If the InternalError
193 // below is returned, that means TrimRightMostNode hasn't been called in the
194 // right context.
195 const auto it = data_->type_property_filters().find("*");
196 if (it == data_->type_property_filters().end()) {
197 return absl_ports::InternalError(
198 "A wildcard entry should always be present in type property filters "
199 "whenever TrimRightMostNode() is called for "
200 "DocHitInfoIteratorSectionRestrict");
201 }
202 const std::set<std::string>& target_sections = it->second;
203 if (target_sections.empty()) {
204 return absl_ports::InternalError(
205 "Target sections should not be empty whenever TrimRightMostNode() is "
206 "called for DocHitInfoIteratorSectionRestrict");
207 }
208 if (trimmed_delegate.iterator_ == nullptr) {
209 // TODO(b/228240987): Update TrimmedNode and downstream code to handle
210 // multiple section restricts.
211 trimmed_delegate.target_section_ = std::move(*target_sections.begin());
212 return trimmed_delegate;
213 }
214 trimmed_delegate.iterator_ =
215 std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
216 new DocHitInfoIteratorSectionRestrict(
217 std::move(trimmed_delegate.iterator_), std::move(data_)));
218 return std::move(trimmed_delegate);
219 }
220
ToString() const221 std::string DocHitInfoIteratorSectionRestrict::ToString() const {
222 std::string output = "";
223 for (auto it = data_->type_property_filters().cbegin();
224 it != data_->type_property_filters().cend(); it++) {
225 std::string paths = absl_ports::StrJoin(it->second, ",");
226 output += (it->first) + ":" + (paths) + "; ";
227 }
228 std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
229 return absl_ports::StrCat(result, delegate_->ToString());
230 }
231
232 } // namespace lib
233 } // namespace icing
234