1 // Copyright (C) 2022 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_ 16 #define ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_ 17 18 #include <cstdint> 19 #include <memory> 20 21 #include "icing/text_classifier/lib3/utils/base/status.h" 22 #include "icing/file/posting_list/flash-index-storage.h" 23 #include "icing/file/posting_list/posting-list-identifier.h" 24 #include "icing/file/posting_list/posting-list-used.h" 25 26 namespace icing { 27 namespace lib { 28 29 // This class serves to: 30 // 1. Expose PostingListUseds to clients of FlashIndexStorage 31 // 2. Handles flushing posting list properly, including choosing the most 32 // efficient size of PL, chaining max-sized PL correctly, etc. 33 // 3. Ensure that PostingListUseds can only be freed by calling methods which 34 // will also properly maintain the FlashIndexStorage free list and prevent 35 // callers from modifying the Posting List after freeing. 36 class PostingListAccessor { 37 public: 38 virtual ~PostingListAccessor() = default; 39 40 struct FinalizeResult { 41 // - OK on success 42 // - INVALID_ARGUMENT if there was no pre-existing posting list and no 43 // data were added 44 // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a 45 // new posting list. 46 libtextclassifier3::Status status; 47 // Id of the posting list chain that was finalized. Guaranteed to be valid 48 // if status is OK. May be valid if status is non-OK, but previous blocks 49 // were written. 50 PostingListIdentifier id; 51 FinalizeResultFinalizeResult52 explicit FinalizeResult(libtextclassifier3::Status status_in, 53 PostingListIdentifier id_in) 54 : status(std::move(status_in)), id(std::move(id_in)) {} 55 }; 56 // Write all accumulated data to storage. 57 // 58 // If accessor points to a posting list chain with multiple posting lists in 59 // the chain and unable to write the last posting list in the chain, Finalize 60 // will return the error and also populate id with the id of the 61 // second-to-last posting list. 62 FinalizeResult Finalize() &&; 63 64 virtual PostingListSerializer* GetSerializer() = 0; 65 66 protected: PostingListAccessor(FlashIndexStorage * storage,PostingListUsed in_memory_posting_list)67 explicit PostingListAccessor(FlashIndexStorage* storage, 68 PostingListUsed in_memory_posting_list) 69 : storage_(storage), 70 prev_block_identifier_(PostingListIdentifier::kInvalid), 71 in_memory_posting_list_(std::move(in_memory_posting_list)), 72 has_reached_posting_list_chain_end_(false) {} 73 74 // Flushes preexisting_posting_list_ to disk if it's a max-sized posting list 75 // and populates prev_block_identifier. 76 // If it's not a max-sized posting list, moves the contents of 77 // preexisting_posting_list_ to in_memory_posting_list_ and frees 78 // preexisting_posting_list_. 79 // Sets preexisting_posting_list_ to nullptr. 80 libtextclassifier3::Status FlushPreexistingPostingList(); 81 82 // Flushes in_memory_posting_list_ to a max-sized posting list on disk, chains 83 // the newly allocated max-size posting list block by setting its next pointer 84 // to prev_block_identifier_, and updates prev_block_identifier_ to point to 85 // the newly allocated posting list. 86 libtextclassifier3::Status FlushInMemoryPostingList(); 87 88 // Frees all posting lists in the posting list chain starting at 89 // prev_block_identifier_. 90 libtextclassifier3::Status FreePostingListChain(); 91 92 FlashIndexStorage* storage_; // Does not own. 93 94 // The PostingListIdentifier of the first max-sized posting list in the 95 // posting list chain or PostingListIdentifier::kInvalid if there is no 96 // posting list chain. 97 PostingListIdentifier prev_block_identifier_; 98 99 // An editor to an existing posting list on disk. If available (non-NULL), 100 // we'll try to add all data to this posting list. Once this posting list 101 // fills up, we'll either 1) chain it (if a max-sized posting list) and put 102 // future data in in_memory_posting_list_ or 2) copy all of its data into 103 // in_memory_posting_list_ and free this pl (if not a max-sized posting list). 104 // TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using 105 // existing posting lists has on latency. 106 std::unique_ptr<PostingListHolder> preexisting_posting_list_; 107 108 // In-memory posting list used to buffer data before writing them to the 109 // smallest on-disk posting list that will fit them. 110 PostingListUsed in_memory_posting_list_; 111 112 bool has_reached_posting_list_chain_end_; 113 }; 114 115 } // namespace lib 116 } // namespace icing 117 118 #endif // ICING_FILE_POSTING_LIST_POSTING_LIST_ACCESSOR_H_ 119