1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/lite/lite-index-options.h"
16
17 #include <algorithm>
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21
22 #include "icing/index/lite/term-id-hit-pair.h"
23 #include "icing/legacy/index/icing-dynamic-trie.h"
24
25 namespace icing {
26 namespace lib {
27
28 namespace {
29
30 constexpr int kIcingMaxVariantsPerToken = 10; // Maximum number of variants
31
32 constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1; // 64K
33 // Max num tokens per document. 64KB is our original maximum (searchable)
34 // document size. We clip if document exceeds this.
35 constexpr uint32_t kIcingMaxNumTokensPerDoc =
36 kIcingMaxSearchableDocumentSize / 5;
37 constexpr uint32_t kIcingMaxNumHitsPerDocument =
38 kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
39
CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes)40 uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
41 constexpr uint32_t kHitBufferSlopMult = 2;
42
43 // Add a 2x slop for the hit buffer. We need to make sure we can at
44 // least fit one document with index variants.
45 // TODO(b/111690435) Move LiteIndex::Element to a separate file so that this
46 // can use sizeof(LiteIndex::Element)
47 uint32_t hit_capacity_elts_with_slop =
48 hit_buffer_want_merge_bytes / sizeof(TermIdHitPair);
49 // Add some slop for index variants on top of max num tokens.
50 hit_capacity_elts_with_slop += kIcingMaxNumHitsPerDocument;
51 hit_capacity_elts_with_slop *= kHitBufferSlopMult;
52
53 return hit_capacity_elts_with_slop;
54 }
55
CalculateTrieOptions(uint32_t hit_buffer_size)56 IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
57 // The default min is 1/5th of the main index lexicon, which can
58 // hold >1M terms. We don't need values so value size is 0. We
59 // conservatively scale from there.
60 //
61 // We can give this a lot of headroom because overestimating the
62 // requirement has minimal resource impact.
63 double scaling_factor =
64 std::max(1.0, static_cast<double>(hit_buffer_size) / (100u << 10));
65 return IcingDynamicTrie::Options((200u << 10) * scaling_factor,
66 (200u << 10) * scaling_factor,
67 (1u << 20) * scaling_factor, 0);
68 }
69
70 } // namespace
71
LiteIndexOptions(const std::string & filename_base,uint32_t hit_buffer_want_merge_bytes,bool hit_buffer_sort_at_indexing,uint32_t hit_buffer_sort_threshold_bytes)72 LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
73 uint32_t hit_buffer_want_merge_bytes,
74 bool hit_buffer_sort_at_indexing,
75 uint32_t hit_buffer_sort_threshold_bytes)
76 : filename_base(filename_base),
77 hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
78 hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
79 hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
80 hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
81 lexicon_options = CalculateTrieOptions(hit_buffer_size);
82 display_mappings_options = CalculateTrieOptions(hit_buffer_size);
83 }
84
85 } // namespace lib
86 } // namespace icing
87