1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/query/query-processor.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/feature-flags.h"
27 #include "icing/index/embed/embedding-index.h"
28 #include "icing/index/index.h"
29 #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
30 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
31 #include "icing/index/iterator/doc-hit-info-iterator-by-uri.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-filter.h"
33 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
34 #include "icing/index/iterator/doc-hit-info-iterator.h"
35 #include "icing/index/numeric/numeric-index.h"
36 #include "icing/join/join-children-fetcher.h"
37 #include "icing/proto/logging.pb.h"
38 #include "icing/proto/search.pb.h"
39 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
40 #include "icing/query/advanced_query_parser/lexer.h"
41 #include "icing/query/advanced_query_parser/parser.h"
42 #include "icing/query/advanced_query_parser/query-visitor.h"
43 #include "icing/query/query-features.h"
44 #include "icing/query/query-results.h"
45 #include "icing/query/query-utils.h"
46 #include "icing/schema/schema-store.h"
47 #include "icing/store/document-store.h"
48 #include "icing/tokenization/language-segmenter.h"
49 #include "icing/tokenization/tokenizer-factory.h"
50 #include "icing/tokenization/tokenizer.h"
51 #include "icing/transform/normalizer.h"
52 #include "icing/util/clock.h"
53 #include "icing/util/status-macros.h"
54
55 namespace icing {
56 namespace lib {
57
58 libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>>
Create(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const JoinChildrenFetcher * join_children_fetcher,const Clock * clock,const FeatureFlags * feature_flags)59 QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
60 const EmbeddingIndex* embedding_index,
61 const LanguageSegmenter* language_segmenter,
62 const Normalizer* normalizer,
63 const DocumentStore* document_store,
64 const SchemaStore* schema_store,
65 const JoinChildrenFetcher* join_children_fetcher,
66 const Clock* clock, const FeatureFlags* feature_flags) {
67 ICING_RETURN_ERROR_IF_NULL(index);
68 ICING_RETURN_ERROR_IF_NULL(numeric_index);
69 ICING_RETURN_ERROR_IF_NULL(embedding_index);
70 ICING_RETURN_ERROR_IF_NULL(language_segmenter);
71 ICING_RETURN_ERROR_IF_NULL(normalizer);
72 ICING_RETURN_ERROR_IF_NULL(document_store);
73 ICING_RETURN_ERROR_IF_NULL(schema_store);
74 ICING_RETURN_ERROR_IF_NULL(clock);
75 ICING_RETURN_ERROR_IF_NULL(feature_flags);
76
77 return std::unique_ptr<QueryProcessor>(new QueryProcessor(
78 index, numeric_index, embedding_index, language_segmenter, normalizer,
79 document_store, schema_store, join_children_fetcher, clock,
80 feature_flags));
81 }
82
QueryProcessor(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const JoinChildrenFetcher * join_children_fetcher,const Clock * clock,const FeatureFlags * feature_flags)83 QueryProcessor::QueryProcessor(
84 Index* index, const NumericIndex<int64_t>* numeric_index,
85 const EmbeddingIndex* embedding_index,
86 const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
87 const DocumentStore* document_store, const SchemaStore* schema_store,
88 const JoinChildrenFetcher* join_children_fetcher, const Clock* clock,
89 const FeatureFlags* feature_flags)
90 : index_(*index),
91 numeric_index_(*numeric_index),
92 embedding_index_(*embedding_index),
93 language_segmenter_(*language_segmenter),
94 normalizer_(*normalizer),
95 document_store_(*document_store),
96 schema_store_(*schema_store),
97 join_children_fetcher_(join_children_fetcher),
98 clock_(*clock),
99 feature_flags_(*feature_flags) {}
100
ParseSearch(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats)101 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
102 const SearchSpecProto& search_spec,
103 ScoringSpecProto::RankingStrategy::Code ranking_strategy,
104 int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) {
105 ICING_ASSIGN_OR_RETURN(QueryResults results,
106 ParseAdvancedQuery(search_spec, ranking_strategy,
107 current_time_ms, search_stats));
108
109 // Check that all new features used in the search have been enabled in the
110 // SearchSpec.
111 const std::unordered_set<Feature> enabled_features(
112 search_spec.enabled_features().begin(),
113 search_spec.enabled_features().end());
114 for (const Feature feature : results.features_in_use) {
115 if (enabled_features.find(feature) == enabled_features.end()) {
116 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
117 "Attempted use of unenabled feature ", feature,
118 ". Please make sure that you have explicitly set all advanced query "
119 "features used in this query as enabled in the SearchSpec."));
120 }
121 }
122
123 std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
124 if (search_spec.document_uri_filters_size() > 0) {
125 ICING_ASSIGN_OR_RETURN(
126 std::unique_ptr<DocHitInfoIteratorByUri> uri_iterator,
127 DocHitInfoIteratorByUri::Create(&document_store_, search_spec));
128 iterators.push_back(std::move(uri_iterator));
129 }
130 if (results.root_iterator != nullptr) {
131 iterators.push_back(std::move(results.root_iterator));
132 }
133 if (iterators.empty()) {
134 iterators.push_back(std::make_unique<DocHitInfoIteratorAllDocumentId>(
135 document_store_.last_added_document_id()));
136 }
137 results.root_iterator = CreateAndIterator(std::move(iterators));
138
139 DocHitInfoIteratorFilter::Options options =
140 GetFilterOptions(search_spec, document_store_, schema_store_);
141 results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
142 std::move(results.root_iterator), &document_store_, &schema_store_,
143 options, current_time_ms);
144 if (!search_spec.type_property_filters().empty()) {
145 results.root_iterator =
146 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
147 std::move(results.root_iterator), &document_store_, &schema_store_,
148 search_spec, current_time_ms);
149 }
150 return results;
151 }
152
ParseAdvancedQuery(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats) const153 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
154 const SearchSpecProto& search_spec,
155 ScoringSpecProto::RankingStrategy::Code ranking_strategy,
156 int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const {
157 std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer();
158 Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
159 ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
160 std::move(lexer).ExtractTokens());
161 if (search_stats != nullptr) {
162 search_stats->set_query_processor_lexer_extract_token_latency_ms(
163 lexer_timer->GetElapsedMilliseconds());
164 }
165
166 std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer();
167 Parser parser = Parser::Create(std::move(lexer_tokens));
168 ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
169 parser.ConsumeQuery());
170 if (search_stats != nullptr) {
171 search_stats->set_query_processor_parser_consume_query_latency_ms(
172 parser_timer->GetElapsedMilliseconds());
173 }
174
175 if (tree_root == nullptr) {
176 return QueryResults{/*root_iterator=*/nullptr};
177 }
178 ICING_ASSIGN_OR_RETURN(
179 std::unique_ptr<Tokenizer> plain_tokenizer,
180 tokenizer_factory::CreateIndexingTokenizer(
181 StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
182 DocHitInfoIteratorFilter::Options options =
183 GetFilterOptions(search_spec, document_store_, schema_store_);
184 bool needs_term_frequency_info =
185 ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
186
187 std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer();
188 QueryVisitor query_visitor(
189 &index_, &numeric_index_, &embedding_index_, &document_store_,
190 &schema_store_, &normalizer_, plain_tokenizer.get(),
191 join_children_fetcher_, search_spec, std::move(options),
192 needs_term_frequency_info, &feature_flags_, current_time_ms);
193 tree_root->Accept(&query_visitor);
194 ICING_ASSIGN_OR_RETURN(QueryResults results,
195 std::move(query_visitor).ConsumeResults());
196 if (search_stats != nullptr) {
197 search_stats->set_query_processor_query_visitor_latency_ms(
198 query_visitor_timer->GetElapsedMilliseconds());
199 }
200
201 return results;
202 }
203
204 } // namespace lib
205 } // namespace icing
206