1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_ 16 #define ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_ 17 18 #include <cstdint> 19 #include <memory> 20 21 #include "icing/text_classifier/lib3/utils/base/status.h" 22 #include "icing/text_classifier/lib3/utils/base/statusor.h" 23 24 namespace icing { 25 namespace lib { 26 27 class PostingListUsed; 28 29 // Interface for PostingListUsed data serialization and deserialization. 30 // - It contains several common methods used by lower level of posting list 31 // management related classes (e.g. FlashIndexStorage, IndexBlock, 32 // PostingListUsed, etc). 33 // - Higher level classes (e.g. MainIndex) create their desired serializers 34 // according to the data type they're dealing with, and pass the instance down 35 // to all posting list management related classes. 36 // - Data specific methods can also be implemented in each serializer. They 37 // won't be used by posting list management related classes, but higher level 38 // classes are able to call it and deal with the specific data type. 39 // 40 // E.g. main index stores 'Hit' data into posting lists. 41 // - MainIndex creates PostingListUsedHitSerializer instance and uses hit data 42 // related methods to serialize/deserialize Hit data to/from posting lists. 43 // - FlashIndexStorage, IndexBlock, PostingListUsed use the serializer created 44 // by MainIndex, but hold the reference/pointer in the interface format 45 // (PostingListSerializer) and only use common interface methods to manage 46 // posting list. 47 class PostingListSerializer { 48 public: 49 // Special data is either a DataType instance or data_start_offset. 50 template <typename DataType> 51 union SpecialData { SpecialData(const DataType & data)52 explicit SpecialData(const DataType& data) : data_(data) {} 53 SpecialData(uint32_t data_start_offset)54 explicit SpecialData(uint32_t data_start_offset) 55 : data_start_offset_(data_start_offset) {} 56 data()57 const DataType& data() const { return data_; } 58 data_start_offset()59 uint32_t data_start_offset() const { return data_start_offset_; } set_data_start_offset(uint32_t data_start_offset)60 void set_data_start_offset(uint32_t data_start_offset) { 61 data_start_offset_ = data_start_offset; 62 } 63 64 private: 65 DataType data_; 66 uint32_t data_start_offset_; 67 } __attribute__((packed)); 68 69 static constexpr uint32_t kNumSpecialData = 2; 70 71 virtual ~PostingListSerializer() = default; 72 73 // Returns byte size of the data type. 74 virtual uint32_t GetDataTypeBytes() const = 0; 75 76 // Returns minimum posting list size allowed. 77 // 78 // Note that min posting list size should also be large enough to store a 79 // single PostingListIndex (for posting list management usage), so we have to 80 // add static_assert in each serializer implementation. 81 // E.g. 82 // static constexpr uint32_t kMinPostingListSize = kSpecialHitsSize; 83 // static_assert(sizeof(PostingListIndex) <= kMinPostingListSize, ""); 84 virtual uint32_t GetMinPostingListSize() const = 0; 85 86 // Returns minimum size of posting list that can fit these used bytes 87 // (see MoveFrom). 88 virtual uint32_t GetMinPostingListSizeToFit( 89 const PostingListUsed* posting_list_used) const = 0; 90 91 // Returns bytes used by actual data. 92 virtual uint32_t GetBytesUsed( 93 const PostingListUsed* posting_list_used) const = 0; 94 95 // Clears the posting list. It is usually used for initializing a newly 96 // allocated (or reclaimed from free posting list chain) posting list. 97 virtual void Clear(PostingListUsed* posting_list_used) const = 0; 98 99 // Moves contents from posting list 'src' to 'dst'. Clears 'src'. 100 // 101 // RETURNS: 102 // - OK on success 103 // - INVALID_ARGUMENT if 'src' is not valid or 'src' is too large to fit in 104 // 'dst'. 105 // - FAILED_PRECONDITION if 'dst' posting list is in a corrupted state. 106 virtual libtextclassifier3::Status MoveFrom(PostingListUsed* dst, 107 PostingListUsed* src) const = 0; 108 }; 109 110 // A posting list with in-memory data. The caller should sync it to disk via 111 // FlashIndexStorage. Layout depends on the serializer. 112 class PostingListUsed { 113 public: 114 // Creates a PostingListUsed that takes over the ownership of 115 // posting_list_buffer with size_in_bytes bytes. 'Preexisting' means that 116 // the data in posting_list_buffer was previously modified by another instance 117 // of PostingListUsed, and the caller should read the data from disk to 118 // posting_list_buffer. 119 // 120 // RETURNS: 121 // - A valid PostingListUsed if successful 122 // - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check 123 // fails 124 // - FAILED_PRECONDITION if serializer or posting_list_buffer is null 125 static libtextclassifier3::StatusOr<PostingListUsed> 126 CreateFromPreexistingPostingListUsedRegion( 127 PostingListSerializer* serializer, 128 std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes); 129 130 // Creates a PostingListUsed that owns a buffer of size_in_bytes bytes and 131 // initializes the content of the buffer so that the returned PostingListUsed 132 // is empty. 133 // 134 // RETURNS: 135 // - A valid PostingListUsed if successful 136 // - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check 137 // fails 138 // - FAILED_PRECONDITION if serializer is null 139 static libtextclassifier3::StatusOr<PostingListUsed> 140 CreateFromUnitializedRegion(PostingListSerializer* serializer, 141 uint32_t size_in_bytes); 142 posting_list_buffer()143 uint8_t* posting_list_buffer() { 144 is_dirty_ = true; 145 return posting_list_buffer_.get(); 146 } 147 posting_list_buffer()148 const uint8_t* posting_list_buffer() const { 149 return posting_list_buffer_.get(); 150 } 151 size_in_bytes()152 uint32_t size_in_bytes() const { return size_in_bytes_; } 153 is_dirty()154 bool is_dirty() const { return is_dirty_; } 155 156 private: PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,uint32_t size_in_bytes)157 explicit PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer, 158 uint32_t size_in_bytes) 159 : posting_list_buffer_(std::move(posting_list_buffer)), 160 size_in_bytes_(size_in_bytes), 161 is_dirty_(false) {} 162 163 // A byte array of size size_in_bytes_ containing encoded data for this 164 // posting list. 165 std::unique_ptr<uint8_t[]> posting_list_buffer_; 166 uint32_t size_in_bytes_; 167 168 bool is_dirty_; 169 }; 170 171 } // namespace lib 172 } // namespace icing 173 174 #endif // ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_ 175