xref: /aosp_15_r20/external/icing/icing/index/numeric/integer-index-bucket-util.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
16 #define ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
17 
18 #include <cstdint>
19 #include <utility>
20 #include <vector>
21 
22 #include "icing/index/numeric/integer-index-data.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 namespace integer_index_bucket_util {
28 
29 // A wrapper struct that contains information of a bucket.
30 // - The bucket contains data within the iterator [start, end).
31 // - Bucket range is [key_lower, key_upper], and all data within [start, end)
32 //   should have keys in the bucket range.
33 //
34 // Note: the caller should make sure the lifecycle of data vector is longer than
35 // instances of this wrapper struct.
36 struct DataRangeAndBucketInfo {
37   std::vector<IntegerIndexData>::iterator start;
38   std::vector<IntegerIndexData>::iterator end;
39   int64_t key_lower;
40   int64_t key_upper;
41 
DataRangeAndBucketInfoDataRangeAndBucketInfo42   explicit DataRangeAndBucketInfo(
43       std::vector<IntegerIndexData>::iterator start_in,
44       std::vector<IntegerIndexData>::iterator end_in, int64_t key_lower_in,
45       int64_t key_upper_in)
46       : start(std::move(start_in)),
47         end(std::move(end_in)),
48         key_lower(key_lower_in),
49         key_upper(key_upper_in) {}
50 };
51 
52 // Helper function to split data (that are originally in a bucket with range
53 // [original_key_lower, original_key_upper]) into different buckets according to
54 // num_data_threshold.
55 // - The input vector `data` will be sorted by key in ascending order (unless
56 //   there's no need to split in which case data is returned unmodified)
57 // - Data with the same key will be in the same bucket even if # of them exceed
58 //   num_data_threshold.
59 // - Range of all buckets will be disjoint, and the range union will be
60 //   [original_key_lower, original_key_upper].
61 // - Data slice (i.e. [start, end)) can be empty.
62 //
63 // REQUIRES:
64 // - original_key_lower < original_key_upper
65 // - num_data_threshold > 0
66 // - Keys of all data are in range [original_key_lower, original_key_upper]
67 //
68 // Returns: a vector of DataRangeAndBucketInfo that contain all bucket info
69 //   after splitting. Also the returned vector should contain at least one
70 //   bucket, otherwise it is considered an error.
71 std::vector<DataRangeAndBucketInfo> Split(std::vector<IntegerIndexData>& data,
72                                           int64_t original_key_lower,
73                                           int64_t original_key_upper,
74                                           int32_t num_data_threshold);
75 
76 }  // namespace integer_index_bucket_util
77 
78 }  // namespace lib
79 }  // namespace icing
80 
81 #endif  // ICING_INDEX_NUMERIC_INTEGER_INDEX_BUCKET_UTIL_H_
82