xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-set-digest.hh (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1 /*
2  * Copyright © 2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_SET_DIGEST_HH
28 #define HB_SET_DIGEST_HH
29 
30 #include "hb.hh"
31 #include "hb-machinery.hh"
32 
33 /*
34  * The set-digests here implement various "filters" that support
35  * "approximate member query".  Conceptually these are like Bloom
36  * Filter and Quotient Filter, however, much smaller, faster, and
37  * designed to fit the requirements of our uses for glyph coverage
38  * queries.
39  *
40  * Our filters are highly accurate if the lookup covers fairly local
41  * set of glyphs, but fully flooded and ineffective if coverage is
42  * all over the place.
43  *
44  * The way these are used is that the filter is first populated by
45  * a lookup's or subtable's Coverage table(s), and then when we
46  * want to apply the lookup or subtable to a glyph, before trying
47  * to apply, we ask the filter if the glyph may be covered. If it's
48  * not, we return early.  We can also match a digest against another
49  * digest.
50  *
51  * We use these filters at three levels:
52  *   - If the digest for all the glyphs in the buffer as a whole
53  *     does not match the digest for the lookup, skip the lookup.
54  *   - For each glyph, if it doesn't match the lookup digest,
55  *     skip it.
56  *   - For each glyph, if it doesn't match the subtable digest,
57  *     skip it.
58  *
59  * The main filter we use is a combination of three bits-pattern
60  * filters. A bits-pattern filter checks a number of bits (5 or 6)
61  * of the input number (glyph-id in this case) and checks whether
62  * its pattern is amongst the patterns of any of the accepted values.
63  * The accepted patterns are represented as a "long" integer. The
64  * check is done using four bitwise operations only.
65  */
66 
67 template <typename mask_t, unsigned int shift>
68 struct hb_set_digest_bits_pattern_t
69 {
70   static constexpr unsigned mask_bytes = sizeof (mask_t);
71   static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
72   static constexpr unsigned num_bits = 0
73 				     + (mask_bytes >= 1 ? 3 : 0)
74 				     + (mask_bytes >= 2 ? 1 : 0)
75 				     + (mask_bytes >= 4 ? 1 : 0)
76 				     + (mask_bytes >= 8 ? 1 : 0)
77 				     + (mask_bytes >= 16? 1 : 0)
78 				     + 0;
79 
80   static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
81   static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
82 
inithb_set_digest_bits_pattern_t83   void init () { mask = 0; }
84 
fullhb_set_digest_bits_pattern_t85   static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; }
86 
union_hb_set_digest_bits_pattern_t87   void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
88 
addhb_set_digest_bits_pattern_t89   void add (hb_codepoint_t g) { mask |= mask_for (g); }
90 
add_rangehb_set_digest_bits_pattern_t91   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
92   {
93     if (mask == (mask_t) -1) return false;
94     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
95     {
96       mask = (mask_t) -1;
97       return false;
98     }
99     else
100     {
101       mask_t ma = mask_for (a);
102       mask_t mb = mask_for (b);
103       mask |= mb + (mb - ma) - (mb < ma);
104       return true;
105     }
106   }
107 
108   template <typename T>
add_arrayhb_set_digest_bits_pattern_t109   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
110   {
111     for (unsigned int i = 0; i < count; i++)
112     {
113       add (*array);
114       array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
115     }
116   }
117   template <typename T>
add_arrayhb_set_digest_bits_pattern_t118   void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
119   template <typename T>
add_sorted_arrayhb_set_digest_bits_pattern_t120   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
121   {
122     add_array (array, count, stride);
123     return true;
124   }
125   template <typename T>
add_sorted_arrayhb_set_digest_bits_pattern_t126   bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
127 
may_havehb_set_digest_bits_pattern_t128   bool may_have (const hb_set_digest_bits_pattern_t &o) const
129   { return mask & o.mask; }
130 
may_havehb_set_digest_bits_pattern_t131   bool may_have (hb_codepoint_t g) const
132   { return mask & mask_for (g); }
133 
operator []hb_set_digest_bits_pattern_t134   bool operator [] (hb_codepoint_t g) const
135   { return may_have (g); }
136 
137   private:
138 
mask_forhb_set_digest_bits_pattern_t139   static mask_t mask_for (hb_codepoint_t g)
140   { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
141   mask_t mask = 0;
142 };
143 
144 template <typename head_t, typename tail_t>
145 struct hb_set_digest_combiner_t
146 {
inithb_set_digest_combiner_t147   void init ()
148   {
149     head.init ();
150     tail.init ();
151   }
152 
fullhb_set_digest_combiner_t153   static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; }
154 
union_hb_set_digest_combiner_t155   void union_ (const hb_set_digest_combiner_t &o)
156   {
157     head.union_ (o.head);
158     tail.union_(o.tail);
159   }
160 
addhb_set_digest_combiner_t161   void add (hb_codepoint_t g)
162   {
163     head.add (g);
164     tail.add (g);
165   }
166 
add_rangehb_set_digest_combiner_t167   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
168   {
169     return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
170   }
171   template <typename T>
add_arrayhb_set_digest_combiner_t172   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
173   {
174     head.add_array (array, count, stride);
175     tail.add_array (array, count, stride);
176   }
177   template <typename T>
add_arrayhb_set_digest_combiner_t178   void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
179   template <typename T>
add_sorted_arrayhb_set_digest_combiner_t180   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
181   {
182     return head.add_sorted_array (array, count, stride) &&
183 	   tail.add_sorted_array (array, count, stride);
184   }
185   template <typename T>
add_sorted_arrayhb_set_digest_combiner_t186   bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
187 
may_havehb_set_digest_combiner_t188   bool may_have (const hb_set_digest_combiner_t &o) const
189   {
190     return head.may_have (o.head) && tail.may_have (o.tail);
191   }
192 
may_havehb_set_digest_combiner_t193   bool may_have (hb_codepoint_t g) const
194   {
195     return head.may_have (g) && tail.may_have (g);
196   }
197 
operator []hb_set_digest_combiner_t198   bool operator [] (hb_codepoint_t g) const
199   { return may_have (g); }
200 
201   private:
202   head_t head;
203   tail_t tail;
204 };
205 
206 
207 /*
208  * hb_set_digest_t
209  *
210  * This is a combination of digests that performs "best".
211  * There is not much science to this: it's a result of intuition
212  * and testing.
213  */
214 using hb_set_digest_t =
215   hb_set_digest_combiner_t
216   <
217     hb_set_digest_bits_pattern_t<unsigned long, 4>,
218     hb_set_digest_combiner_t
219     <
220       hb_set_digest_bits_pattern_t<unsigned long, 0>,
221       hb_set_digest_bits_pattern_t<unsigned long, 9>
222     >
223   >
224 ;
225 
226 
227 #endif /* HB_SET_DIGEST_HH */
228