xref: /aosp_15_r20/external/icing/icing/index/numeric/posting-list-integer-index-accessor.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/numeric/posting-list-integer-index-accessor.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <vector>
20 
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/file/posting_list/flash-index-storage.h"
25 #include "icing/file/posting_list/index-block.h"
26 #include "icing/file/posting_list/posting-list-identifier.h"
27 #include "icing/file/posting_list/posting-list-used.h"
28 #include "icing/index/numeric/integer-index-data.h"
29 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
30 #include "icing/util/status-macros.h"
31 
32 namespace icing {
33 namespace lib {
34 
35 /* static */ libtextclassifier3::StatusOr<
36     std::unique_ptr<PostingListIntegerIndexAccessor>>
Create(FlashIndexStorage * storage,PostingListIntegerIndexSerializer * serializer)37 PostingListIntegerIndexAccessor::Create(
38     FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
39   uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
40       storage->block_size(), serializer->GetDataTypeBytes());
41   ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
42                          PostingListUsed::CreateFromUnitializedRegion(
43                              serializer, max_posting_list_bytes));
44   return std::unique_ptr<PostingListIntegerIndexAccessor>(
45       new PostingListIntegerIndexAccessor(
46           storage, std::move(in_memory_posting_list), serializer));
47 }
48 
49 /* static */ libtextclassifier3::StatusOr<
50     std::unique_ptr<PostingListIntegerIndexAccessor>>
CreateFromExisting(FlashIndexStorage * storage,PostingListIntegerIndexSerializer * serializer,PostingListIdentifier existing_posting_list_id)51 PostingListIntegerIndexAccessor::CreateFromExisting(
52     FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer,
53     PostingListIdentifier existing_posting_list_id) {
54   ICING_ASSIGN_OR_RETURN(
55       std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
56       Create(storage, serializer));
57   ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
58                          storage->GetPostingList(existing_posting_list_id));
59   pl_accessor->preexisting_posting_list_ =
60       std::make_unique<PostingListHolder>(std::move(holder));
61   return pl_accessor;
62 }
63 
64 // Returns the next batch of integer index data for the provided posting list.
65 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetNextDataBatch()66 PostingListIntegerIndexAccessor::GetNextDataBatch() {
67   return GetNextDataBatchImpl(/*free_posting_list=*/false);
68 }
69 
70 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetAllDataAndFree()71 PostingListIntegerIndexAccessor::GetAllDataAndFree() {
72   if (preexisting_posting_list_ == nullptr) {
73     return absl_ports::FailedPreconditionError(
74         "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
75         "was not created from a preexisting posting list.");
76   }
77 
78   std::vector<IntegerIndexData> all_data;
79   while (true) {
80     ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch,
81                            GetNextDataBatchImpl(/*free_posting_list=*/true));
82     if (batch.empty()) {
83       break;
84     }
85     std::move(batch.begin(), batch.end(), std::back_inserter(all_data));
86   }
87 
88   return all_data;
89 }
90 
PrependData(const IntegerIndexData & data)91 libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
92     const IntegerIndexData& data) {
93   PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
94                                    ? preexisting_posting_list_->posting_list
95                                    : in_memory_posting_list_;
96   libtextclassifier3::Status status =
97       serializer_->PrependData(&active_pl, data);
98   if (!absl_ports::IsResourceExhausted(status)) {
99     return status;
100   }
101   // There is no more room to add data to this current posting list! Therefore,
102   // we need to either move those data to a larger posting list or flush this
103   // posting list and create another max-sized posting list in the chain.
104   if (preexisting_posting_list_ != nullptr) {
105     ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
106   } else {
107     ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
108   }
109 
110   // Re-add data. Should always fit since we just cleared
111   // in_memory_posting_list_. It's fine to explicitly reference
112   // in_memory_posting_list_ here because there's no way of reaching this line
113   // while preexisting_posting_list_ is still in use.
114   return serializer_->PrependData(&in_memory_posting_list_, data);
115 }
116 
117 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetNextDataBatchImpl(bool free_posting_list)118 PostingListIntegerIndexAccessor::GetNextDataBatchImpl(bool free_posting_list) {
119   if (preexisting_posting_list_ == nullptr) {
120     if (has_reached_posting_list_chain_end_) {
121       return std::vector<IntegerIndexData>();
122     }
123     return absl_ports::FailedPreconditionError(
124         "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
125         "was not created from a preexisting posting list.");
126   }
127   ICING_ASSIGN_OR_RETURN(
128       std::vector<IntegerIndexData> batch,
129       serializer_->GetData(&preexisting_posting_list_->posting_list));
130   uint32_t next_block_index = kInvalidBlockIndex;
131   // Posting lists will only be chained when they are max-sized, in which case
132   // next_block_index will point to the next block for the next posting list.
133   // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
134   // to the next free list block, which is not relevant here.
135   if (preexisting_posting_list_->posting_list.size_in_bytes() ==
136       storage_->max_posting_list_bytes()) {
137     next_block_index = preexisting_posting_list_->next_block_index;
138   }
139 
140   if (free_posting_list) {
141     ICING_RETURN_IF_ERROR(
142         storage_->FreePostingList(std::move(*preexisting_posting_list_)));
143   }
144 
145   if (next_block_index != kInvalidBlockIndex) {
146     // Since we only have to deal with next block for max-sized posting list
147     // block, max_num_posting_lists is 1 and posting_list_index_bits is
148     // BitsToStore(1).
149     PostingListIdentifier next_posting_list_id(
150         next_block_index, /*posting_list_index=*/0,
151         /*posting_list_index_bits=*/BitsToStore(1));
152     ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
153                            storage_->GetPostingList(next_posting_list_id));
154     preexisting_posting_list_ =
155         std::make_unique<PostingListHolder>(std::move(holder));
156   } else {
157     has_reached_posting_list_chain_end_ = true;
158     preexisting_posting_list_.reset();
159   }
160   return batch;
161 }
162 
163 }  // namespace lib
164 }  // namespace icing
165