1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/numeric/posting-list-integer-index-accessor.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <vector>
20
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/file/posting_list/flash-index-storage.h"
25 #include "icing/file/posting_list/index-block.h"
26 #include "icing/file/posting_list/posting-list-identifier.h"
27 #include "icing/file/posting_list/posting-list-used.h"
28 #include "icing/index/numeric/integer-index-data.h"
29 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
30 #include "icing/util/status-macros.h"
31
32 namespace icing {
33 namespace lib {
34
35 /* static */ libtextclassifier3::StatusOr<
36 std::unique_ptr<PostingListIntegerIndexAccessor>>
Create(FlashIndexStorage * storage,PostingListIntegerIndexSerializer * serializer)37 PostingListIntegerIndexAccessor::Create(
38 FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
39 uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
40 storage->block_size(), serializer->GetDataTypeBytes());
41 ICING_ASSIGN_OR_RETURN(PostingListUsed in_memory_posting_list,
42 PostingListUsed::CreateFromUnitializedRegion(
43 serializer, max_posting_list_bytes));
44 return std::unique_ptr<PostingListIntegerIndexAccessor>(
45 new PostingListIntegerIndexAccessor(
46 storage, std::move(in_memory_posting_list), serializer));
47 }
48
49 /* static */ libtextclassifier3::StatusOr<
50 std::unique_ptr<PostingListIntegerIndexAccessor>>
CreateFromExisting(FlashIndexStorage * storage,PostingListIntegerIndexSerializer * serializer,PostingListIdentifier existing_posting_list_id)51 PostingListIntegerIndexAccessor::CreateFromExisting(
52 FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer,
53 PostingListIdentifier existing_posting_list_id) {
54 ICING_ASSIGN_OR_RETURN(
55 std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
56 Create(storage, serializer));
57 ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
58 storage->GetPostingList(existing_posting_list_id));
59 pl_accessor->preexisting_posting_list_ =
60 std::make_unique<PostingListHolder>(std::move(holder));
61 return pl_accessor;
62 }
63
64 // Returns the next batch of integer index data for the provided posting list.
65 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetNextDataBatch()66 PostingListIntegerIndexAccessor::GetNextDataBatch() {
67 return GetNextDataBatchImpl(/*free_posting_list=*/false);
68 }
69
70 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetAllDataAndFree()71 PostingListIntegerIndexAccessor::GetAllDataAndFree() {
72 if (preexisting_posting_list_ == nullptr) {
73 return absl_ports::FailedPreconditionError(
74 "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
75 "was not created from a preexisting posting list.");
76 }
77
78 std::vector<IntegerIndexData> all_data;
79 while (true) {
80 ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> batch,
81 GetNextDataBatchImpl(/*free_posting_list=*/true));
82 if (batch.empty()) {
83 break;
84 }
85 std::move(batch.begin(), batch.end(), std::back_inserter(all_data));
86 }
87
88 return all_data;
89 }
90
PrependData(const IntegerIndexData & data)91 libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
92 const IntegerIndexData& data) {
93 PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
94 ? preexisting_posting_list_->posting_list
95 : in_memory_posting_list_;
96 libtextclassifier3::Status status =
97 serializer_->PrependData(&active_pl, data);
98 if (!absl_ports::IsResourceExhausted(status)) {
99 return status;
100 }
101 // There is no more room to add data to this current posting list! Therefore,
102 // we need to either move those data to a larger posting list or flush this
103 // posting list and create another max-sized posting list in the chain.
104 if (preexisting_posting_list_ != nullptr) {
105 ICING_RETURN_IF_ERROR(FlushPreexistingPostingList());
106 } else {
107 ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
108 }
109
110 // Re-add data. Should always fit since we just cleared
111 // in_memory_posting_list_. It's fine to explicitly reference
112 // in_memory_posting_list_ here because there's no way of reaching this line
113 // while preexisting_posting_list_ is still in use.
114 return serializer_->PrependData(&in_memory_posting_list_, data);
115 }
116
117 libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
GetNextDataBatchImpl(bool free_posting_list)118 PostingListIntegerIndexAccessor::GetNextDataBatchImpl(bool free_posting_list) {
119 if (preexisting_posting_list_ == nullptr) {
120 if (has_reached_posting_list_chain_end_) {
121 return std::vector<IntegerIndexData>();
122 }
123 return absl_ports::FailedPreconditionError(
124 "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
125 "was not created from a preexisting posting list.");
126 }
127 ICING_ASSIGN_OR_RETURN(
128 std::vector<IntegerIndexData> batch,
129 serializer_->GetData(&preexisting_posting_list_->posting_list));
130 uint32_t next_block_index = kInvalidBlockIndex;
131 // Posting lists will only be chained when they are max-sized, in which case
132 // next_block_index will point to the next block for the next posting list.
133 // Otherwise, next_block_index can be kInvalidBlockIndex or be used to point
134 // to the next free list block, which is not relevant here.
135 if (preexisting_posting_list_->posting_list.size_in_bytes() ==
136 storage_->max_posting_list_bytes()) {
137 next_block_index = preexisting_posting_list_->next_block_index;
138 }
139
140 if (free_posting_list) {
141 ICING_RETURN_IF_ERROR(
142 storage_->FreePostingList(std::move(*preexisting_posting_list_)));
143 }
144
145 if (next_block_index != kInvalidBlockIndex) {
146 // Since we only have to deal with next block for max-sized posting list
147 // block, max_num_posting_lists is 1 and posting_list_index_bits is
148 // BitsToStore(1).
149 PostingListIdentifier next_posting_list_id(
150 next_block_index, /*posting_list_index=*/0,
151 /*posting_list_index_bits=*/BitsToStore(1));
152 ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
153 storage_->GetPostingList(next_posting_list_id));
154 preexisting_posting_list_ =
155 std::make_unique<PostingListHolder>(std::move(holder));
156 } else {
157 has_reached_posting_list_chain_end_ = true;
158 preexisting_posting_list_.reset();
159 }
160 return batch;
161 }
162
163 } // namespace lib
164 } // namespace icing
165