xref: /aosp_15_r20/external/icing/icing/file/posting_list/posting-list-used.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
16 #define ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
17 
18 #include <cstdint>
19 #include <memory>
20 
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 class PostingListUsed;
28 
29 // Interface for PostingListUsed data serialization and deserialization.
30 // - It contains several common methods used by lower level of posting list
31 //   management related classes (e.g. FlashIndexStorage, IndexBlock,
32 //   PostingListUsed, etc).
33 // - Higher level classes (e.g. MainIndex) create their desired serializers
34 //   according to the data type they're dealing with, and pass the instance down
35 //   to all posting list management related classes.
36 // - Data specific methods can also be implemented in each serializer. They
37 //   won't be used by posting list management related classes, but higher level
38 //   classes are able to call it and deal with the specific data type.
39 //
40 // E.g. main index stores 'Hit' data into posting lists.
41 // - MainIndex creates PostingListUsedHitSerializer instance and uses hit data
42 //   related methods to serialize/deserialize Hit data to/from posting lists.
43 // - FlashIndexStorage, IndexBlock, PostingListUsed use the serializer created
44 //   by MainIndex, but hold the reference/pointer in the interface format
45 //   (PostingListSerializer) and only use common interface methods to manage
46 //   posting list.
47 class PostingListSerializer {
48  public:
49   // Special data is either a DataType instance or data_start_offset.
50   template <typename DataType>
51   union SpecialData {
SpecialData(const DataType & data)52     explicit SpecialData(const DataType& data) : data_(data) {}
53 
SpecialData(uint32_t data_start_offset)54     explicit SpecialData(uint32_t data_start_offset)
55         : data_start_offset_(data_start_offset) {}
56 
data()57     const DataType& data() const { return data_; }
58 
data_start_offset()59     uint32_t data_start_offset() const { return data_start_offset_; }
set_data_start_offset(uint32_t data_start_offset)60     void set_data_start_offset(uint32_t data_start_offset) {
61       data_start_offset_ = data_start_offset;
62     }
63 
64    private:
65     DataType data_;
66     uint32_t data_start_offset_;
67   } __attribute__((packed));
68 
69   static constexpr uint32_t kNumSpecialData = 2;
70 
71   virtual ~PostingListSerializer() = default;
72 
73   // Returns byte size of the data type.
74   virtual uint32_t GetDataTypeBytes() const = 0;
75 
76   // Returns minimum posting list size allowed.
77   //
78   // Note that min posting list size should also be large enough to store a
79   // single PostingListIndex (for posting list management usage), so we have to
80   // add static_assert in each serializer implementation.
81   // E.g.
82   // static constexpr uint32_t kMinPostingListSize = kSpecialHitsSize;
83   // static_assert(sizeof(PostingListIndex) <= kMinPostingListSize, "");
84   virtual uint32_t GetMinPostingListSize() const = 0;
85 
86   // Returns minimum size of posting list that can fit these used bytes
87   // (see MoveFrom).
88   virtual uint32_t GetMinPostingListSizeToFit(
89       const PostingListUsed* posting_list_used) const = 0;
90 
91   // Returns bytes used by actual data.
92   virtual uint32_t GetBytesUsed(
93       const PostingListUsed* posting_list_used) const = 0;
94 
95   // Clears the posting list. It is usually used for initializing a newly
96   // allocated (or reclaimed from free posting list chain) posting list.
97   virtual void Clear(PostingListUsed* posting_list_used) const = 0;
98 
99   // Moves contents from posting list 'src' to 'dst'. Clears 'src'.
100   //
101   // RETURNS:
102   //   - OK on success
103   //   - INVALID_ARGUMENT if 'src' is not valid or 'src' is too large to fit in
104   //       'dst'.
105   //   - FAILED_PRECONDITION if 'dst' posting list is in a corrupted state.
106   virtual libtextclassifier3::Status MoveFrom(PostingListUsed* dst,
107                                               PostingListUsed* src) const = 0;
108 };
109 
110 // A posting list with in-memory data. The caller should sync it to disk via
111 // FlashIndexStorage. Layout depends on the serializer.
112 class PostingListUsed {
113  public:
114   // Creates a PostingListUsed that takes over the ownership of
115   // posting_list_buffer with size_in_bytes bytes. 'Preexisting' means that
116   // the data in posting_list_buffer was previously modified by another instance
117   // of PostingListUsed, and the caller should read the data from disk to
118   // posting_list_buffer.
119   //
120   // RETURNS:
121   //   - A valid PostingListUsed if successful
122   //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
123   //     fails
124   //   - FAILED_PRECONDITION if serializer or posting_list_buffer is null
125   static libtextclassifier3::StatusOr<PostingListUsed>
126   CreateFromPreexistingPostingListUsedRegion(
127       PostingListSerializer* serializer,
128       std::unique_ptr<uint8_t[]> posting_list_buffer, uint32_t size_in_bytes);
129 
130   // Creates a PostingListUsed that owns a buffer of size_in_bytes bytes and
131   // initializes the content of the buffer so that the returned PostingListUsed
132   // is empty.
133   //
134   // RETURNS:
135   //   - A valid PostingListUsed if successful
136   //   - INVALID_ARGUMENT if posting_list_utils::IsValidPostingListSize check
137   //     fails
138   //   - FAILED_PRECONDITION if serializer is null
139   static libtextclassifier3::StatusOr<PostingListUsed>
140   CreateFromUnitializedRegion(PostingListSerializer* serializer,
141                               uint32_t size_in_bytes);
142 
posting_list_buffer()143   uint8_t* posting_list_buffer() {
144     is_dirty_ = true;
145     return posting_list_buffer_.get();
146   }
147 
posting_list_buffer()148   const uint8_t* posting_list_buffer() const {
149     return posting_list_buffer_.get();
150   }
151 
size_in_bytes()152   uint32_t size_in_bytes() const { return size_in_bytes_; }
153 
is_dirty()154   bool is_dirty() const { return is_dirty_; }
155 
156  private:
PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,uint32_t size_in_bytes)157   explicit PostingListUsed(std::unique_ptr<uint8_t[]> posting_list_buffer,
158                            uint32_t size_in_bytes)
159       : posting_list_buffer_(std::move(posting_list_buffer)),
160         size_in_bytes_(size_in_bytes),
161         is_dirty_(false) {}
162 
163   // A byte array of size size_in_bytes_ containing encoded data for this
164   // posting list.
165   std::unique_ptr<uint8_t[]> posting_list_buffer_;
166   uint32_t size_in_bytes_;
167 
168   bool is_dirty_;
169 };
170 
171 }  // namespace lib
172 }  // namespace icing
173 
174 #endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_USED_H_
175