xref: /aosp_15_r20/external/icing/icing/query/query-processor.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/query/query-processor.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/feature-flags.h"
27 #include "icing/index/embed/embedding-index.h"
28 #include "icing/index/index.h"
29 #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
30 #include "icing/index/iterator/doc-hit-info-iterator-and.h"
31 #include "icing/index/iterator/doc-hit-info-iterator-by-uri.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-filter.h"
33 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
34 #include "icing/index/iterator/doc-hit-info-iterator.h"
35 #include "icing/index/numeric/numeric-index.h"
36 #include "icing/join/join-children-fetcher.h"
37 #include "icing/proto/logging.pb.h"
38 #include "icing/proto/search.pb.h"
39 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
40 #include "icing/query/advanced_query_parser/lexer.h"
41 #include "icing/query/advanced_query_parser/parser.h"
42 #include "icing/query/advanced_query_parser/query-visitor.h"
43 #include "icing/query/query-features.h"
44 #include "icing/query/query-results.h"
45 #include "icing/query/query-utils.h"
46 #include "icing/schema/schema-store.h"
47 #include "icing/store/document-store.h"
48 #include "icing/tokenization/language-segmenter.h"
49 #include "icing/tokenization/tokenizer-factory.h"
50 #include "icing/tokenization/tokenizer.h"
51 #include "icing/transform/normalizer.h"
52 #include "icing/util/clock.h"
53 #include "icing/util/status-macros.h"
54 
55 namespace icing {
56 namespace lib {
57 
58 libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>>
Create(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const JoinChildrenFetcher * join_children_fetcher,const Clock * clock,const FeatureFlags * feature_flags)59 QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
60                        const EmbeddingIndex* embedding_index,
61                        const LanguageSegmenter* language_segmenter,
62                        const Normalizer* normalizer,
63                        const DocumentStore* document_store,
64                        const SchemaStore* schema_store,
65                        const JoinChildrenFetcher* join_children_fetcher,
66                        const Clock* clock, const FeatureFlags* feature_flags) {
67   ICING_RETURN_ERROR_IF_NULL(index);
68   ICING_RETURN_ERROR_IF_NULL(numeric_index);
69   ICING_RETURN_ERROR_IF_NULL(embedding_index);
70   ICING_RETURN_ERROR_IF_NULL(language_segmenter);
71   ICING_RETURN_ERROR_IF_NULL(normalizer);
72   ICING_RETURN_ERROR_IF_NULL(document_store);
73   ICING_RETURN_ERROR_IF_NULL(schema_store);
74   ICING_RETURN_ERROR_IF_NULL(clock);
75   ICING_RETURN_ERROR_IF_NULL(feature_flags);
76 
77   return std::unique_ptr<QueryProcessor>(new QueryProcessor(
78       index, numeric_index, embedding_index, language_segmenter, normalizer,
79       document_store, schema_store, join_children_fetcher, clock,
80       feature_flags));
81 }
82 
QueryProcessor(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const JoinChildrenFetcher * join_children_fetcher,const Clock * clock,const FeatureFlags * feature_flags)83 QueryProcessor::QueryProcessor(
84     Index* index, const NumericIndex<int64_t>* numeric_index,
85     const EmbeddingIndex* embedding_index,
86     const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
87     const DocumentStore* document_store, const SchemaStore* schema_store,
88     const JoinChildrenFetcher* join_children_fetcher, const Clock* clock,
89     const FeatureFlags* feature_flags)
90     : index_(*index),
91       numeric_index_(*numeric_index),
92       embedding_index_(*embedding_index),
93       language_segmenter_(*language_segmenter),
94       normalizer_(*normalizer),
95       document_store_(*document_store),
96       schema_store_(*schema_store),
97       join_children_fetcher_(join_children_fetcher),
98       clock_(*clock),
99       feature_flags_(*feature_flags) {}
100 
ParseSearch(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats)101 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
102     const SearchSpecProto& search_spec,
103     ScoringSpecProto::RankingStrategy::Code ranking_strategy,
104     int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) {
105   ICING_ASSIGN_OR_RETURN(QueryResults results,
106                          ParseAdvancedQuery(search_spec, ranking_strategy,
107                                             current_time_ms, search_stats));
108 
109   // Check that all new features used in the search have been enabled in the
110   // SearchSpec.
111   const std::unordered_set<Feature> enabled_features(
112       search_spec.enabled_features().begin(),
113       search_spec.enabled_features().end());
114   for (const Feature feature : results.features_in_use) {
115     if (enabled_features.find(feature) == enabled_features.end()) {
116       return absl_ports::InvalidArgumentError(absl_ports::StrCat(
117           "Attempted use of unenabled feature ", feature,
118           ". Please make sure that you have explicitly set all advanced query "
119           "features used in this query as enabled in the SearchSpec."));
120     }
121   }
122 
123   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
124   if (search_spec.document_uri_filters_size() > 0) {
125     ICING_ASSIGN_OR_RETURN(
126         std::unique_ptr<DocHitInfoIteratorByUri> uri_iterator,
127         DocHitInfoIteratorByUri::Create(&document_store_, search_spec));
128     iterators.push_back(std::move(uri_iterator));
129   }
130   if (results.root_iterator != nullptr) {
131     iterators.push_back(std::move(results.root_iterator));
132   }
133   if (iterators.empty()) {
134     iterators.push_back(std::make_unique<DocHitInfoIteratorAllDocumentId>(
135         document_store_.last_added_document_id()));
136   }
137   results.root_iterator = CreateAndIterator(std::move(iterators));
138 
139   DocHitInfoIteratorFilter::Options options =
140       GetFilterOptions(search_spec, document_store_, schema_store_);
141   results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
142       std::move(results.root_iterator), &document_store_, &schema_store_,
143       options, current_time_ms);
144   if (!search_spec.type_property_filters().empty()) {
145     results.root_iterator =
146         DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
147             std::move(results.root_iterator), &document_store_, &schema_store_,
148             search_spec, current_time_ms);
149   }
150   return results;
151 }
152 
ParseAdvancedQuery(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats) const153 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
154     const SearchSpecProto& search_spec,
155     ScoringSpecProto::RankingStrategy::Code ranking_strategy,
156     int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const {
157   std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer();
158   Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
159   ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
160                          std::move(lexer).ExtractTokens());
161   if (search_stats != nullptr) {
162     search_stats->set_query_processor_lexer_extract_token_latency_ms(
163         lexer_timer->GetElapsedMilliseconds());
164   }
165 
166   std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer();
167   Parser parser = Parser::Create(std::move(lexer_tokens));
168   ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
169                          parser.ConsumeQuery());
170   if (search_stats != nullptr) {
171     search_stats->set_query_processor_parser_consume_query_latency_ms(
172         parser_timer->GetElapsedMilliseconds());
173   }
174 
175   if (tree_root == nullptr) {
176     return QueryResults{/*root_iterator=*/nullptr};
177   }
178   ICING_ASSIGN_OR_RETURN(
179       std::unique_ptr<Tokenizer> plain_tokenizer,
180       tokenizer_factory::CreateIndexingTokenizer(
181           StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
182   DocHitInfoIteratorFilter::Options options =
183       GetFilterOptions(search_spec, document_store_, schema_store_);
184   bool needs_term_frequency_info =
185       ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
186 
187   std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer();
188   QueryVisitor query_visitor(
189       &index_, &numeric_index_, &embedding_index_, &document_store_,
190       &schema_store_, &normalizer_, plain_tokenizer.get(),
191       join_children_fetcher_, search_spec, std::move(options),
192       needs_term_frequency_info, &feature_flags_, current_time_ms);
193   tree_root->Accept(&query_visitor);
194   ICING_ASSIGN_OR_RETURN(QueryResults results,
195                          std::move(query_visitor).ConsumeResults());
196   if (search_stats != nullptr) {
197     search_stats->set_query_processor_query_visitor_latency_ms(
198         query_visitor_timer->GetElapsedMilliseconds());
199   }
200 
201   return results;
202 }
203 
204 }  // namespace lib
205 }  // namespace icing
206