xref: /aosp_15_r20/external/icing/icing/join/posting-list-join-data-accessor.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
16 #define ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/file/posting_list/flash-index-storage.h"
27 #include "icing/file/posting_list/index-block.h"
28 #include "icing/file/posting_list/posting-list-accessor.h"
29 #include "icing/file/posting_list/posting-list-common.h"
30 #include "icing/file/posting_list/posting-list-identifier.h"
31 #include "icing/file/posting_list/posting-list-used.h"
32 #include "icing/join/posting-list-join-data-serializer.h"
33 #include "icing/legacy/index/icing-bit-util.h"
34 #include "icing/util/status-macros.h"
35 
36 namespace icing {
37 namespace lib {
38 
39 // This class is used to provide a simple abstraction for adding join data to
40 // posting lists. PostingListJoinDataAccessor handles:
41 // 1) selection of properly-sized posting lists for the accumulated join index
42 //    data during Finalize()
43 // 2) chaining of max-sized posting lists.
44 template <typename JoinDataType>
45 class PostingListJoinDataAccessor : public PostingListAccessor {
46  public:
47   // Creates an empty PostingListJoinDataAccessor.
48   //
49   // RETURNS:
50   //   - On success, a valid instance of PostingListJoinDataAccessor
51   //   - INVALID_ARGUMENT error if storage has an invalid block_size.
52   static libtextclassifier3::StatusOr<
53       std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
54   Create(FlashIndexStorage* storage,
55          PostingListJoinDataSerializer<JoinDataType>* serializer);
56 
57   // Creates a PostingListJoinDataAccessor with an existing posting list
58   // identified by existing_posting_list_id.
59   //
60   // RETURNS:
61   //   - On success, a valid instance of PostingListJoinDataAccessor
62   //   - INVALID_ARGUMENT if storage has an invalid block_size.
63   static libtextclassifier3::StatusOr<
64       std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
65   CreateFromExisting(FlashIndexStorage* storage,
66                      PostingListJoinDataSerializer<JoinDataType>* serializer,
67                      PostingListIdentifier existing_posting_list_id);
68 
GetSerializer()69   PostingListSerializer* GetSerializer() override { return serializer_; }
70 
71   // Retrieves the next batch of data in the posting list chain.
72   //
73   // RETURNS:
74   //   - On success, a vector of join data in the posting list chain
75   //   - FAILED_PRECONDITION_ERROR if called on an instance that was created via
76   //     Create.
77   //   - INTERNAL_ERROR if unable to read the next posting list in the chain or
78   //     if the posting list has been corrupted somehow.
79   libtextclassifier3::StatusOr<std::vector<JoinDataType>> GetNextDataBatch();
80 
81   // Prepends one data. This may result in flushing the posting list to disk (if
82   // the PostingListJoinDataAccessor holds a max-sized posting list that is
83   // full) or freeing a pre-existing posting list if it is too small to fit all
84   // data necessary.
85   //
86   // RETURNS:
87   //   - OK, on success
88   //   - INVALID_ARGUMENT if !data.is_valid() or if data is greater than the
89   //     previously added data.
90   //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
91   //     posting list.
92   libtextclassifier3::Status PrependData(const JoinDataType& data);
93 
94  private:
PostingListJoinDataAccessor(FlashIndexStorage * storage,PostingListUsed in_memory_posting_list,PostingListJoinDataSerializer<JoinDataType> * serializer)95   explicit PostingListJoinDataAccessor(
96       FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
97       PostingListJoinDataSerializer<JoinDataType>* serializer)
98       : PostingListAccessor(storage, std::move(in_memory_posting_list)),
99         serializer_(serializer) {}
100 
101   PostingListJoinDataSerializer<JoinDataType>* serializer_;  // Does not own.
102 };
103 
104 template <typename JoinDataType>
105 /* static */ libtextclassifier3::StatusOr<
106     std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
Create(FlashIndexStorage * storage,PostingListJoinDataSerializer<JoinDataType> * serializer)107 PostingListJoinDataAccessor<JoinDataType>::Create(
108     FlashIndexStorage* storage,
109     PostingListJoinDataSerializer<JoinDataType>* serializer) {
110   uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
111       storage->block_size(), serializer->GetDataTypeBytes());
112   ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
113                          PostingListUsed::CreateFromUnitializedRegion(
114                              serializer, max_posting_list_bytes));
115   return std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>(
116       new PostingListJoinDataAccessor<JoinDataType>(
117           storage, std::move(in_memory_posting_list), serializer));
118 }
119 
120 template <typename JoinDataType>
121 /* static */ libtextclassifier3::StatusOr<
122     std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>>>
CreateFromExisting(FlashIndexStorage * storage,PostingListJoinDataSerializer<JoinDataType> * serializer,PostingListIdentifier existing_posting_list_id)123 PostingListJoinDataAccessor<JoinDataType>::CreateFromExisting(
124     FlashIndexStorage* storage,
125     PostingListJoinDataSerializer<JoinDataType>* serializer,
126     PostingListIdentifier existing_posting_list_id) {
127   ICING_ASSIGN_OR_RETURN(
128       std::unique_ptr<PostingListJoinDataAccessor<JoinDataType>> pl_accessor,
129       Create(storage, serializer));
130   ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
131                          storage->GetPostingList(existing_posting_list_id));
132   pl_accessor->preexisting_posting_list_ =
133       std::make_unique<PostingListHolder>(std::move(holder));
134   return pl_accessor;
135 }
136 
137 // Returns the next batch of join data for the provided posting list.
138 template <typename JoinDataType>
139 libtextclassifier3::StatusOr<std::vector<JoinDataType>>
GetNextDataBatch()140 PostingListJoinDataAccessor<JoinDataType>::GetNextDataBatch() {
141   if (preexisting_posting_list_ == nullptr) {
142     if (has_reached_posting_list_chain_end_) {
143       return std::vector<JoinDataType>();
144     }
145     return absl_ports::FailedPreconditionError(
146         "Cannot retrieve data from a PostingListJoinDataAccessor that was not "
147         "created from a preexisting posting list.");
148   }
149   ICING_ASSIGN_OR_RETURN(
150       std::vector<JoinDataType> batch,
151       serializer_->GetData(&preexisting_posting_list_->posting_list));
152   uint32_t next_block_index = kInvalidBlockIndex;
153   // Posting lists will only be chained when they are max-sized, in which case
154   // next_block_index will point to the next block for the next posting list.
155   // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
156   // to the next free list block, which is not relevant here.
157   if (preexisting_posting_list_->posting_list.size_in_bytes() ==
158       storage_->max_posting_list_bytes()) {
159     next_block_index = preexisting_posting_list_->next_block_index;
160   }
161 
162   if (next_block_index != kInvalidBlockIndex) {
163     // Since we only have to deal with next block for max-sized posting list
164     // block, max_num_posting_lists is 1 and posting_list_index_bits is
165     // BitsToStore(1).
166     PostingListIdentifier next_posting_list_id(
167         next_block_index, /*posting_list_index=*/0,
168         /*posting_list_index_bits=*/BitsToStore(1));
169     ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
170                            storage_->GetPostingList(next_posting_list_id));
171     preexisting_posting_list_ =
172         std::make_unique<PostingListHolder>(std::move(holder));
173   } else {
174     has_reached_posting_list_chain_end_ = true;
175     preexisting_posting_list_.reset();
176   }
177   return batch;
178 }
179 
180 template <typename JoinDataType>
181 libtextclassifier3::Status
PrependData(const JoinDataType & data)182 PostingListJoinDataAccessor<JoinDataType>::PrependData(
183     const JoinDataType& data) {
184   PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
185                                    ? preexisting_posting_list_->posting_list
186                                    : in_memory_posting_list_;
187   libtextclassifier3::Status status =
188       serializer_->PrependData(&active_pl, data);
189   if (!absl_ports::IsResourceExhausted(status)) {
190     return status;
191   }
192   // There is no more room to add data to this current posting list! Therefore,
193   // we need to either move those data to a larger posting list or flush this
194   // posting list and create another max-sized posting list in the chain.
195   if (preexisting_posting_list_ != nullptr) {
196     ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
197   } else {
198     ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
199   }
200 
201   // Re-add data. Should always fit since we just cleared
202   // in_memory_posting_list_. It's fine to explicitly reference
203   // in_memory_posting_list_ here because there's no way of reaching this line
204   // while preexisting_posting_list_ is still in use.
205   return serializer_->PrependData(&in_memory_posting_list_, data);
206 }
207 
208 }  // namespace lib
209 }  // namespace icing
210 
211 #endif  // ICING_JOIN_POSTING_LIST_JOIN_DATA_ACCESSOR_H_
212