1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 #include "hb-machinery.hh" 32 33 /* 34 * The set-digests here implement various "filters" that support 35 * "approximate member query". Conceptually these are like Bloom 36 * Filter and Quotient Filter, however, much smaller, faster, and 37 * designed to fit the requirements of our uses for glyph coverage 38 * queries. 39 * 40 * Our filters are highly accurate if the lookup covers fairly local 41 * set of glyphs, but fully flooded and ineffective if coverage is 42 * all over the place. 43 * 44 * The way these are used is that the filter is first populated by 45 * a lookup's or subtable's Coverage table(s), and then when we 46 * want to apply the lookup or subtable to a glyph, before trying 47 * to apply, we ask the filter if the glyph may be covered. If it's 48 * not, we return early. We can also match a digest against another 49 * digest. 50 * 51 * We use these filters at three levels: 52 * - If the digest for all the glyphs in the buffer as a whole 53 * does not match the digest for the lookup, skip the lookup. 54 * - For each glyph, if it doesn't match the lookup digest, 55 * skip it. 56 * - For each glyph, if it doesn't match the subtable digest, 57 * skip it. 58 * 59 * The main filter we use is a combination of three bits-pattern 60 * filters. A bits-pattern filter checks a number of bits (5 or 6) 61 * of the input number (glyph-id in this case) and checks whether 62 * its pattern is amongst the patterns of any of the accepted values. 63 * The accepted patterns are represented as a "long" integer. The 64 * check is done using four bitwise operations only. 65 */ 66 67 template <typename mask_t, unsigned int shift> 68 struct hb_set_digest_bits_pattern_t 69 { 70 static constexpr unsigned mask_bytes = sizeof (mask_t); 71 static constexpr unsigned mask_bits = sizeof (mask_t) * 8; 72 static constexpr unsigned num_bits = 0 73 + (mask_bytes >= 1 ? 3 : 0) 74 + (mask_bytes >= 2 ? 1 : 0) 75 + (mask_bytes >= 4 ? 1 : 0) 76 + (mask_bytes >= 8 ? 1 : 0) 77 + (mask_bytes >= 16? 1 : 0) 78 + 0; 79 80 static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); 81 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); 82 inithb_set_digest_bits_pattern_t83 void init () { mask = 0; } 84 fullhb_set_digest_bits_pattern_t85 static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; } 86 union_hb_set_digest_bits_pattern_t87 void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } 88 addhb_set_digest_bits_pattern_t89 void add (hb_codepoint_t g) { mask |= mask_for (g); } 90 add_rangehb_set_digest_bits_pattern_t91 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 92 { 93 if (mask == (mask_t) -1) return false; 94 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 95 { 96 mask = (mask_t) -1; 97 return false; 98 } 99 else 100 { 101 mask_t ma = mask_for (a); 102 mask_t mb = mask_for (b); 103 mask |= mb + (mb - ma) - (mb < ma); 104 return true; 105 } 106 } 107 108 template <typename T> add_arrayhb_set_digest_bits_pattern_t109 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 110 { 111 for (unsigned int i = 0; i < count; i++) 112 { 113 add (*array); 114 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); 115 } 116 } 117 template <typename T> add_arrayhb_set_digest_bits_pattern_t118 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 119 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t120 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 121 { 122 add_array (array, count, stride); 123 return true; 124 } 125 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t126 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 127 may_havehb_set_digest_bits_pattern_t128 bool may_have (const hb_set_digest_bits_pattern_t &o) const 129 { return mask & o.mask; } 130 may_havehb_set_digest_bits_pattern_t131 bool may_have (hb_codepoint_t g) const 132 { return mask & mask_for (g); } 133 operator []hb_set_digest_bits_pattern_t134 bool operator [] (hb_codepoint_t g) const 135 { return may_have (g); } 136 137 private: 138 mask_forhb_set_digest_bits_pattern_t139 static mask_t mask_for (hb_codepoint_t g) 140 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } 141 mask_t mask = 0; 142 }; 143 144 template <typename head_t, typename tail_t> 145 struct hb_set_digest_combiner_t 146 { inithb_set_digest_combiner_t147 void init () 148 { 149 head.init (); 150 tail.init (); 151 } 152 fullhb_set_digest_combiner_t153 static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; } 154 union_hb_set_digest_combiner_t155 void union_ (const hb_set_digest_combiner_t &o) 156 { 157 head.union_ (o.head); 158 tail.union_(o.tail); 159 } 160 addhb_set_digest_combiner_t161 void add (hb_codepoint_t g) 162 { 163 head.add (g); 164 tail.add (g); 165 } 166 add_rangehb_set_digest_combiner_t167 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 168 { 169 return (int) head.add_range (a, b) | (int) tail.add_range (a, b); 170 } 171 template <typename T> add_arrayhb_set_digest_combiner_t172 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 173 { 174 head.add_array (array, count, stride); 175 tail.add_array (array, count, stride); 176 } 177 template <typename T> add_arrayhb_set_digest_combiner_t178 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 179 template <typename T> add_sorted_arrayhb_set_digest_combiner_t180 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 181 { 182 return head.add_sorted_array (array, count, stride) && 183 tail.add_sorted_array (array, count, stride); 184 } 185 template <typename T> add_sorted_arrayhb_set_digest_combiner_t186 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 187 may_havehb_set_digest_combiner_t188 bool may_have (const hb_set_digest_combiner_t &o) const 189 { 190 return head.may_have (o.head) && tail.may_have (o.tail); 191 } 192 may_havehb_set_digest_combiner_t193 bool may_have (hb_codepoint_t g) const 194 { 195 return head.may_have (g) && tail.may_have (g); 196 } 197 operator []hb_set_digest_combiner_t198 bool operator [] (hb_codepoint_t g) const 199 { return may_have (g); } 200 201 private: 202 head_t head; 203 tail_t tail; 204 }; 205 206 207 /* 208 * hb_set_digest_t 209 * 210 * This is a combination of digests that performs "best". 211 * There is not much science to this: it's a result of intuition 212 * and testing. 213 */ 214 using hb_set_digest_t = 215 hb_set_digest_combiner_t 216 < 217 hb_set_digest_bits_pattern_t<unsigned long, 4>, 218 hb_set_digest_combiner_t 219 < 220 hb_set_digest_bits_pattern_t<unsigned long, 0>, 221 hb_set_digest_bits_pattern_t<unsigned long, 9> 222 > 223 > 224 ; 225 226 227 #endif /* HB_SET_DIGEST_HH */ 228