1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_RESULT_RESULT_STATE_MANAGER_H_ 16 #define ICING_RESULT_RESULT_STATE_MANAGER_H_ 17 18 #include <atomic> 19 #include <memory> 20 #include <queue> 21 #include <random> 22 #include <unordered_map> 23 #include <unordered_set> 24 25 #include "icing/text_classifier/lib3/utils/base/statusor.h" 26 #include "icing/absl_ports/mutex.h" 27 #include "icing/proto/search.pb.h" 28 #include "icing/result/page-result.h" 29 #include "icing/result/result-adjustment-info.h" 30 #include "icing/result/result-retriever-v2.h" 31 #include "icing/result/result-state-v2.h" 32 #include "icing/scoring/scored-document-hits-ranker.h" 33 #include "icing/util/clock.h" 34 35 namespace icing { 36 namespace lib { 37 38 // This should be the same as the default value of 39 // SearchResultProto.next_page_token. 40 inline constexpr uint64_t kInvalidNextPageToken = 0; 41 42 // 1 hr as the default ttl for a ResultState after being pushed into 43 // token_queue_. 44 inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000; 45 46 // Used to store and manage ResultState. 47 class ResultStateManager { 48 public: 49 explicit ResultStateManager(int max_total_hits, 50 const DocumentStore& document_store); 51 52 ResultStateManager(const ResultStateManager&) = delete; 53 ResultStateManager& operator=(const ResultStateManager&) = delete; 54 55 // Creates a new result state, retrieves and returns PageResult for the first 56 // page. Also caches the new result state and returns a next_page_token which 57 // can be used to fetch more pages from the same result state later. Before 58 // caching the result state, adjusts (truncate) the size and evicts some old 59 // result states if exceeding the cache size limit. next_page_token will be 60 // set to a default value kInvalidNextPageToken if there're no more pages. 61 // 62 // NOTE: parent_adjustment_info and child_adjustment_info can be nullptr if 63 // there is no requirement to apply adjustment (snippet, projection) to 64 // them. 65 // 66 // NOTE: it is possible to have empty result for the first page even if the 67 // ranker was not empty before the retrieval, since GroupResultLimiter 68 // may filter out all docs. In this case, the first page is also the 69 // last page and next_page_token will be set to kInvalidNextPageToken. 70 // 71 // Returns: 72 // A token and PageResult wrapped by std::pair on success 73 // INVALID_ARGUMENT if the input ranker is null or contains no results 74 libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> 75 CacheAndRetrieveFirstPage( 76 std::unique_ptr<ScoredDocumentHitsRanker> ranker, 77 std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info, 78 std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info, 79 const ResultSpecProto& result_spec, const DocumentStore& document_store, 80 const ResultRetrieverV2& result_retriever, int64_t current_time_ms) 81 ICING_LOCKS_EXCLUDED(mutex_); 82 83 // Retrieves and returns PageResult for the next page. 84 // The returned results won't exist in ResultStateManager anymore. If the 85 // query has no more pages after this retrieval, the input token will be 86 // invalidated. 87 // 88 // NOTE: it is possible to have empty result for the last page even if the 89 // ranker was not empty before the retrieval, since GroupResultLimiter 90 // may filtered out all remaining docs. 91 // 92 // Returns: 93 // A token and PageResult wrapped by std::pair on success 94 // NOT_FOUND if failed to find any more results 95 libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> GetNextPage( 96 uint64_t next_page_token, const ResultRetrieverV2& result_retriever, 97 int64_t current_time_ms) ICING_LOCKS_EXCLUDED(mutex_); 98 99 // Invalidates the result state associated with the given next-page token. 100 void InvalidateResultState(uint64_t next_page_token) 101 ICING_LOCKS_EXCLUDED(mutex_); 102 103 // Invalidates all result states / tokens currently in ResultStateManager. 104 void InvalidateAllResultStates() ICING_LOCKS_EXCLUDED(mutex_); 105 num_total_hits()106 int num_total_hits() const { return num_total_hits_; } 107 108 private: 109 absl_ports::shared_mutex mutex_; 110 111 const DocumentStore& document_store_; 112 113 // The maximum number of scored document hits that all result states may 114 // have. When a new result state is added such that num_total_hits_ would 115 // exceed max_total_hits_, the oldest result states are evicted until 116 // num_total_hits_ is below max_total_hits. 117 const int max_total_hits_; 118 119 // The number of scored document hits that all result states currently held by 120 // the result state manager have. 121 std::atomic<int> num_total_hits_; 122 123 // A hash map of (next-page token -> result state) 124 std::unordered_map<uint64_t, std::shared_ptr<ResultStateV2>> result_state_map_ 125 ICING_GUARDED_BY(mutex_); 126 127 // A queue used to track the insertion order of tokens with pushed timestamps. 128 std::queue<std::pair<uint64_t, int64_t>> token_queue_ 129 ICING_GUARDED_BY(mutex_); 130 131 // A set to temporarily store the invalidated tokens before they're finally 132 // removed from token_queue_. We store the invalidated tokens to ensure the 133 // uniqueness of new generated tokens. 134 std::unordered_set<uint64_t> invalidated_token_set_ ICING_GUARDED_BY(mutex_); 135 136 // A random 64-bit number generator 137 std::mt19937_64 random_generator_ ICING_GUARDED_BY(mutex_); 138 139 // Puts a new result state into the internal storage and returns a next-page 140 // token associated with it. The token is guaranteed to be unique among all 141 // currently valid tokens. When the maximum number of result states is 142 // reached, the oldest / firstly added result state will be removed to make 143 // room for the new state. 144 uint64_t Add(std::shared_ptr<ResultStateV2> result_state, 145 int64_t current_time_ms) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 146 147 // Helper method to generate a next-page token that is unique among all 148 // existing tokens in token_queue_. 149 uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 150 151 // Helper method to remove old states to make room for incoming states with 152 // size num_hits_to_add. 153 void RemoveStatesIfNeeded(int num_hits_to_add) 154 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 155 156 // Helper method to remove a result state from result_state_map_, the token 157 // will then be temporarily kept in invalidated_token_set_ until it's finally 158 // removed from token_queue_. 159 void InternalInvalidateResultState(uint64_t token) 160 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 161 162 // Internal method to invalidate all result states / tokens currently in 163 // ResultStateManager. We need this separate method so that other public 164 // methods don't need to call InvalidateAllResultStates(). Public methods 165 // calling each other may cause deadlock issues. 166 void InternalInvalidateAllResultStates() 167 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 168 169 // Internal method to invalidate and remove expired result states / tokens 170 // currently in ResultStateManager that were created before 171 // current_time - result_state_ttl. 172 void InternalInvalidateExpiredResultStates(int64_t result_state_ttl, 173 int64_t current_time_ms) 174 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 175 }; 176 177 } // namespace lib 178 } // namespace icing 179 180 #endif // ICING_RESULT_RESULT_STATE_MANAGER_H_ 181