xref: /aosp_15_r20/external/icing/icing/result/result-state-manager.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_RESULT_RESULT_STATE_MANAGER_H_
16 #define ICING_RESULT_RESULT_STATE_MANAGER_H_
17 
18 #include <atomic>
19 #include <memory>
20 #include <queue>
21 #include <random>
22 #include <unordered_map>
23 #include <unordered_set>
24 
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/mutex.h"
27 #include "icing/proto/search.pb.h"
28 #include "icing/result/page-result.h"
29 #include "icing/result/result-adjustment-info.h"
30 #include "icing/result/result-retriever-v2.h"
31 #include "icing/result/result-state-v2.h"
32 #include "icing/scoring/scored-document-hits-ranker.h"
33 #include "icing/util/clock.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 // This should be the same as the default value of
39 // SearchResultProto.next_page_token.
40 inline constexpr uint64_t kInvalidNextPageToken = 0;
41 
42 // 1 hr as the default ttl for a ResultState after being pushed into
43 // token_queue_.
44 inline constexpr int64_t kDefaultResultStateTtlInMs = 1LL * 60 * 60 * 1000;
45 
46 // Used to store and manage ResultState.
47 class ResultStateManager {
48  public:
49   explicit ResultStateManager(int max_total_hits,
50                               const DocumentStore& document_store);
51 
52   ResultStateManager(const ResultStateManager&) = delete;
53   ResultStateManager& operator=(const ResultStateManager&) = delete;
54 
55   // Creates a new result state, retrieves and returns PageResult for the first
56   // page. Also caches the new result state and returns a next_page_token which
57   // can be used to fetch more pages from the same result state later. Before
58   // caching the result state, adjusts (truncate) the size and evicts some old
59   // result states if exceeding the cache size limit. next_page_token will be
60   // set to a default value kInvalidNextPageToken if there're no more pages.
61   //
62   // NOTE: parent_adjustment_info and child_adjustment_info can be nullptr if
63   //       there is no requirement to apply adjustment (snippet, projection) to
64   //       them.
65   //
66   // NOTE: it is possible to have empty result for the first page even if the
67   //       ranker was not empty before the retrieval, since GroupResultLimiter
68   //       may filter out all docs. In this case, the first page is also the
69   //       last page and next_page_token will be set to kInvalidNextPageToken.
70   //
71   // Returns:
72   //   A token and PageResult wrapped by std::pair on success
73   //   INVALID_ARGUMENT if the input ranker is null or contains no results
74   libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
75   CacheAndRetrieveFirstPage(
76       std::unique_ptr<ScoredDocumentHitsRanker> ranker,
77       std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info,
78       std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info,
79       const ResultSpecProto& result_spec, const DocumentStore& document_store,
80       const ResultRetrieverV2& result_retriever, int64_t current_time_ms)
81       ICING_LOCKS_EXCLUDED(mutex_);
82 
83   // Retrieves and returns PageResult for the next page.
84   // The returned results won't exist in ResultStateManager anymore. If the
85   // query has no more pages after this retrieval, the input token will be
86   // invalidated.
87   //
88   // NOTE: it is possible to have empty result for the last page even if the
89   //       ranker was not empty before the retrieval, since GroupResultLimiter
90   //       may filtered out all remaining docs.
91   //
92   // Returns:
93   //   A token and PageResult wrapped by std::pair on success
94   //   NOT_FOUND if failed to find any more results
95   libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> GetNextPage(
96       uint64_t next_page_token, const ResultRetrieverV2& result_retriever,
97       int64_t current_time_ms) ICING_LOCKS_EXCLUDED(mutex_);
98 
99   // Invalidates the result state associated with the given next-page token.
100   void InvalidateResultState(uint64_t next_page_token)
101       ICING_LOCKS_EXCLUDED(mutex_);
102 
103   // Invalidates all result states / tokens currently in ResultStateManager.
104   void InvalidateAllResultStates() ICING_LOCKS_EXCLUDED(mutex_);
105 
num_total_hits()106   int num_total_hits() const { return num_total_hits_; }
107 
108  private:
109   absl_ports::shared_mutex mutex_;
110 
111   const DocumentStore& document_store_;
112 
113   // The maximum number of scored document hits that all result states may
114   // have. When a new result state is added such that num_total_hits_ would
115   // exceed max_total_hits_, the oldest result states are evicted until
116   // num_total_hits_ is below max_total_hits.
117   const int max_total_hits_;
118 
119   // The number of scored document hits that all result states currently held by
120   // the result state manager have.
121   std::atomic<int> num_total_hits_;
122 
123   // A hash map of (next-page token -> result state)
124   std::unordered_map<uint64_t, std::shared_ptr<ResultStateV2>> result_state_map_
125       ICING_GUARDED_BY(mutex_);
126 
127   // A queue used to track the insertion order of tokens with pushed timestamps.
128   std::queue<std::pair<uint64_t, int64_t>> token_queue_
129       ICING_GUARDED_BY(mutex_);
130 
131   // A set to temporarily store the invalidated tokens before they're finally
132   // removed from token_queue_. We store the invalidated tokens to ensure the
133   // uniqueness of new generated tokens.
134   std::unordered_set<uint64_t> invalidated_token_set_ ICING_GUARDED_BY(mutex_);
135 
136   // A random 64-bit number generator
137   std::mt19937_64 random_generator_ ICING_GUARDED_BY(mutex_);
138 
139   // Puts a new result state into the internal storage and returns a next-page
140   // token associated with it. The token is guaranteed to be unique among all
141   // currently valid tokens. When the maximum number of result states is
142   // reached, the oldest / firstly added result state will be removed to make
143   // room for the new state.
144   uint64_t Add(std::shared_ptr<ResultStateV2> result_state,
145                int64_t current_time_ms) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
146 
147   // Helper method to generate a next-page token that is unique among all
148   // existing tokens in token_queue_.
149   uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
150 
151   // Helper method to remove old states to make room for incoming states with
152   // size num_hits_to_add.
153   void RemoveStatesIfNeeded(int num_hits_to_add)
154       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
155 
156   // Helper method to remove a result state from result_state_map_, the token
157   // will then be temporarily kept in invalidated_token_set_ until it's finally
158   // removed from token_queue_.
159   void InternalInvalidateResultState(uint64_t token)
160       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
161 
162   // Internal method to invalidate all result states / tokens currently in
163   // ResultStateManager. We need this separate method so that other public
164   // methods don't need to call InvalidateAllResultStates(). Public methods
165   // calling each other may cause deadlock issues.
166   void InternalInvalidateAllResultStates()
167       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
168 
169   // Internal method to invalidate and remove expired result states / tokens
170   // currently in ResultStateManager that were created before
171   // current_time - result_state_ttl.
172   void InternalInvalidateExpiredResultStates(int64_t result_state_ttl,
173                                              int64_t current_time_ms)
174       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
175 };
176 
177 }  // namespace lib
178 }  // namespace icing
179 
180 #endif  // ICING_RESULT_RESULT_STATE_MANAGER_H_
181