1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_QUERY_QUERY_PROCESSOR_H_ 16 #define ICING_QUERY_QUERY_PROCESSOR_H_ 17 18 #include <cstdint> 19 #include <memory> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 #include "icing/feature-flags.h" 23 #include "icing/index/embed/embedding-index.h" 24 #include "icing/index/index.h" 25 #include "icing/index/numeric/numeric-index.h" 26 #include "icing/join/join-children-fetcher.h" 27 #include "icing/proto/logging.pb.h" 28 #include "icing/proto/search.pb.h" 29 #include "icing/query/query-results.h" 30 #include "icing/schema/schema-store.h" 31 #include "icing/store/document-store.h" 32 #include "icing/tokenization/language-segmenter.h" 33 #include "icing/transform/normalizer.h" 34 #include "icing/util/clock.h" 35 36 namespace icing { 37 namespace lib { 38 39 // Processes SearchSpecProtos and retrieves the specified DocHitInfos that 40 // satisfies the query and its restrictions. This does not perform any scoring, 41 // and returns matched documents in a descending DocumentId order. 42 class QueryProcessor { 43 public: 44 // Factory function to create a QueryProcessor which does not take ownership 45 // of any input components, and all pointers must refer to valid objects that 46 // outlive the created QueryProcessor instance. 47 // 48 // Returns: 49 // An QueryProcessor on success 50 // FAILED_PRECONDITION if any of the pointers is null. 51 static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create( 52 Index* index, const NumericIndex<int64_t>* numeric_index, 53 const EmbeddingIndex* embedding_index, 54 const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, 55 const DocumentStore* document_store, const SchemaStore* schema_store, 56 const JoinChildrenFetcher* join_children_fetcher, const Clock* clock, 57 const FeatureFlags* feature_flags); 58 59 // Parse the search configurations (including the query, any additional 60 // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator. 61 // 62 // When ranking_strategy == RELEVANCE_SCORE, the root_iterator and the 63 // query_term_iterators returned will keep term frequency information 64 // internally, so that term frequency stats will be collected when calling 65 // PopulateMatchedTermsStats to the iterators. 66 // 67 // Returns: 68 // On success, 69 // - One iterator that represents the entire query 70 // - A map representing the query terms and any section restrictions 71 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 72 // INTERNAL_ERROR on all other errors 73 libtextclassifier3::StatusOr<QueryResults> ParseSearch( 74 const SearchSpecProto& search_spec, 75 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 76 int64_t current_time_ms, 77 QueryStatsProto::SearchStats* search_stats = nullptr); 78 79 private: 80 explicit QueryProcessor( 81 Index* index, const NumericIndex<int64_t>* numeric_index, 82 const EmbeddingIndex* embedding_index, 83 const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, 84 const DocumentStore* document_store, const SchemaStore* schema_store, 85 const JoinChildrenFetcher* join_children_fetcher, const Clock* clock, 86 const FeatureFlags* feature_flags); 87 88 // Parse the query into a QueryResults object, which holds a 89 // DocHitInfoIterator that represents the root of a query tree in our new 90 // Advanced Query Language. 91 // 92 // Returns: 93 // On success, 94 // - A QueryResults instance. If the query is empty, the 95 // DocHitInfoIterator that it holds will be nullptr. 96 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 97 libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery( 98 const SearchSpecProto& search_spec, 99 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 100 int64_t current_time_ms, 101 QueryStatsProto::SearchStats* search_stats) const; 102 103 // Parse the query into a one DocHitInfoIterator that represents the root of a 104 // query tree. 105 // 106 // Returns: 107 // On success, 108 // - One iterator that represents the entire query 109 // - A map representing the query terms and any section restrictions 110 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 111 // INTERNAL_ERROR on all other errors 112 libtextclassifier3::StatusOr<QueryResults> ParseRawQuery( 113 const SearchSpecProto& search_spec, 114 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 115 int64_t current_time_ms); 116 117 // Not const because we could modify/sort the hit buffer in the lite index at 118 // query time. 119 Index& index_; // Does not own. 120 const NumericIndex<int64_t>& numeric_index_; // Does not own. 121 const EmbeddingIndex& embedding_index_; // Does not own. 122 const LanguageSegmenter& language_segmenter_; // Does not own. 123 const Normalizer& normalizer_; // Does not own. 124 const DocumentStore& document_store_; // Does not own. 125 const SchemaStore& schema_store_; // Does not own. 126 // Nullable. A non-null join_children_fetcher_ indicates that this is the 127 // parent query for a join query, in which case child scores are available. 128 const JoinChildrenFetcher* join_children_fetcher_; // Does not own. 129 const Clock& clock_; // Does not own. 130 const FeatureFlags& feature_flags_; // Does not own. 131 }; 132 133 } // namespace lib 134 } // namespace icing 135 136 #endif // ICING_QUERY_QUERY_PROCESSOR_H_ 137