xref: /aosp_15_r20/external/icing/icing/index/lite/lite-index-options.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/lite/lite-index-options.h"
16 
17 #include <algorithm>
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21 
22 #include "icing/index/lite/term-id-hit-pair.h"
23 #include "icing/legacy/index/icing-dynamic-trie.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 namespace {
29 
30 constexpr int kIcingMaxVariantsPerToken = 10;  // Maximum number of variants
31 
32 constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1;  // 64K
33 // Max num tokens per document. 64KB is our original maximum (searchable)
34 // document size. We clip if document exceeds this.
35 constexpr uint32_t kIcingMaxNumTokensPerDoc =
36     kIcingMaxSearchableDocumentSize / 5;
37 constexpr uint32_t kIcingMaxNumHitsPerDocument =
38     kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
39 
CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes)40 uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
41   constexpr uint32_t kHitBufferSlopMult = 2;
42 
43   // Add a 2x slop for the hit buffer. We need to make sure we can at
44   // least fit one document with index variants.
45   // TODO(b/111690435) Move LiteIndex::Element to a separate file so that this
46   // can use sizeof(LiteIndex::Element)
47   uint32_t hit_capacity_elts_with_slop =
48       hit_buffer_want_merge_bytes / sizeof(TermIdHitPair);
49   // Add some slop for index variants on top of max num tokens.
50   hit_capacity_elts_with_slop += kIcingMaxNumHitsPerDocument;
51   hit_capacity_elts_with_slop *= kHitBufferSlopMult;
52 
53   return hit_capacity_elts_with_slop;
54 }
55 
CalculateTrieOptions(uint32_t hit_buffer_size)56 IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
57   // The default min is 1/5th of the main index lexicon, which can
58   // hold >1M terms. We don't need values so value size is 0. We
59   // conservatively scale from there.
60   //
61   // We can give this a lot of headroom because overestimating the
62   // requirement has minimal resource impact.
63   double scaling_factor =
64       std::max(1.0, static_cast<double>(hit_buffer_size) / (100u << 10));
65   return IcingDynamicTrie::Options((200u << 10) * scaling_factor,
66                                    (200u << 10) * scaling_factor,
67                                    (1u << 20) * scaling_factor, 0);
68 }
69 
70 }  // namespace
71 
LiteIndexOptions(const std::string & filename_base,uint32_t hit_buffer_want_merge_bytes,bool hit_buffer_sort_at_indexing,uint32_t hit_buffer_sort_threshold_bytes)72 LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
73                                    uint32_t hit_buffer_want_merge_bytes,
74                                    bool hit_buffer_sort_at_indexing,
75                                    uint32_t hit_buffer_sort_threshold_bytes)
76     : filename_base(filename_base),
77       hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
78       hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
79       hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
80   hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
81   lexicon_options = CalculateTrieOptions(hit_buffer_size);
82   display_mappings_options = CalculateTrieOptions(hit_buffer_size);
83 }
84 
85 }  // namespace lib
86 }  // namespace icing
87