xref: /aosp_15_r20/external/icing/icing/query/query-processor.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_QUERY_QUERY_PROCESSOR_H_
16 #define ICING_QUERY_QUERY_PROCESSOR_H_
17 
18 #include <cstdint>
19 #include <memory>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/feature-flags.h"
23 #include "icing/index/embed/embedding-index.h"
24 #include "icing/index/index.h"
25 #include "icing/index/numeric/numeric-index.h"
26 #include "icing/join/join-children-fetcher.h"
27 #include "icing/proto/logging.pb.h"
28 #include "icing/proto/search.pb.h"
29 #include "icing/query/query-results.h"
30 #include "icing/schema/schema-store.h"
31 #include "icing/store/document-store.h"
32 #include "icing/tokenization/language-segmenter.h"
33 #include "icing/transform/normalizer.h"
34 #include "icing/util/clock.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 // Processes SearchSpecProtos and retrieves the specified DocHitInfos that
40 // satisfies the query and its restrictions. This does not perform any scoring,
41 // and returns matched documents in a descending DocumentId order.
42 class QueryProcessor {
43  public:
44   // Factory function to create a QueryProcessor which does not take ownership
45   // of any input components, and all pointers must refer to valid objects that
46   // outlive the created QueryProcessor instance.
47   //
48   // Returns:
49   //   An QueryProcessor on success
50   //   FAILED_PRECONDITION if any of the pointers is null.
51   static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
52       Index* index, const NumericIndex<int64_t>* numeric_index,
53       const EmbeddingIndex* embedding_index,
54       const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
55       const DocumentStore* document_store, const SchemaStore* schema_store,
56       const JoinChildrenFetcher* join_children_fetcher, const Clock* clock,
57       const FeatureFlags* feature_flags);
58 
59   // Parse the search configurations (including the query, any additional
60   // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
61   //
62   // When ranking_strategy == RELEVANCE_SCORE, the root_iterator and the
63   // query_term_iterators returned will keep term frequency information
64   // internally, so that term frequency stats will be collected when calling
65   // PopulateMatchedTermsStats to the iterators.
66   //
67   // Returns:
68   //   On success,
69   //     - One iterator that represents the entire query
70   //     - A map representing the query terms and any section restrictions
71   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
72   //   INTERNAL_ERROR on all other errors
73   libtextclassifier3::StatusOr<QueryResults> ParseSearch(
74       const SearchSpecProto& search_spec,
75       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
76       int64_t current_time_ms,
77       QueryStatsProto::SearchStats* search_stats = nullptr);
78 
79  private:
80   explicit QueryProcessor(
81       Index* index, const NumericIndex<int64_t>* numeric_index,
82       const EmbeddingIndex* embedding_index,
83       const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
84       const DocumentStore* document_store, const SchemaStore* schema_store,
85       const JoinChildrenFetcher* join_children_fetcher, const Clock* clock,
86       const FeatureFlags* feature_flags);
87 
88   // Parse the query into a QueryResults object, which holds a
89   // DocHitInfoIterator that represents the root of a query tree in our new
90   // Advanced Query Language.
91   //
92   // Returns:
93   //   On success,
94   //     - A QueryResults instance. If the query is empty, the
95   //       DocHitInfoIterator that it holds will be nullptr.
96   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
97   libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery(
98       const SearchSpecProto& search_spec,
99       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
100       int64_t current_time_ms,
101       QueryStatsProto::SearchStats* search_stats) const;
102 
103   // Parse the query into a one DocHitInfoIterator that represents the root of a
104   // query tree.
105   //
106   // Returns:
107   //   On success,
108   //     - One iterator that represents the entire query
109   //     - A map representing the query terms and any section restrictions
110   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
111   //   INTERNAL_ERROR on all other errors
112   libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
113       const SearchSpecProto& search_spec,
114       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
115       int64_t current_time_ms);
116 
117   // Not const because we could modify/sort the hit buffer in the lite index at
118   // query time.
119   Index& index_;                                 // Does not own.
120   const NumericIndex<int64_t>& numeric_index_;   // Does not own.
121   const EmbeddingIndex& embedding_index_;        // Does not own.
122   const LanguageSegmenter& language_segmenter_;  // Does not own.
123   const Normalizer& normalizer_;                 // Does not own.
124   const DocumentStore& document_store_;          // Does not own.
125   const SchemaStore& schema_store_;              // Does not own.
126   // Nullable. A non-null join_children_fetcher_ indicates that this is the
127   // parent query for a join query, in which case child scores are available.
128   const JoinChildrenFetcher* join_children_fetcher_;  // Does not own.
129   const Clock& clock_;                                // Does not own.
130   const FeatureFlags& feature_flags_;                 // Does not own.
131 };
132 
133 }  // namespace lib
134 }  // namespace icing
135 
136 #endif  // ICING_QUERY_QUERY_PROCESSOR_H_
137