xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-ucd.cc (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1 /*
2  * Copyright (C) 2012 Grigori Goronzy <[email protected]>
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #include "hb.hh"
18 #include "hb-unicode.hh"
19 #include "hb-machinery.hh"
20 
21 #include "hb-ucd-table.hh"
22 
23 static hb_unicode_combining_class_t
hb_ucd_combining_class(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)24 hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
25 			hb_codepoint_t unicode,
26 			void *user_data HB_UNUSED)
27 {
28   return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
29 }
30 
31 static hb_unicode_general_category_t
hb_ucd_general_category(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)32 hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
33 			 hb_codepoint_t unicode,
34 			 void *user_data HB_UNUSED)
35 {
36   return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
37 }
38 
39 static hb_codepoint_t
hb_ucd_mirroring(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)40 hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
41 		  hb_codepoint_t unicode,
42 		  void *user_data HB_UNUSED)
43 {
44   return unicode + _hb_ucd_bmg (unicode);
45 }
46 
47 static hb_script_t
hb_ucd_script(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t unicode,void * user_data HB_UNUSED)48 hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
49 	       hb_codepoint_t unicode,
50 	       void *user_data HB_UNUSED)
51 {
52   return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
53 }
54 
55 
56 #define SBASE 0xAC00u
57 #define LBASE 0x1100u
58 #define VBASE 0x1161u
59 #define TBASE 0x11A7u
60 #define SCOUNT 11172u
61 #define LCOUNT 19u
62 #define VCOUNT 21u
63 #define TCOUNT 28u
64 #define NCOUNT (VCOUNT * TCOUNT)
65 
66 static inline bool
_hb_ucd_decompose_hangul(hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)67 _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
68 {
69   unsigned si = ab - SBASE;
70 
71   if (si >= SCOUNT)
72     return false;
73 
74   if (si % TCOUNT)
75   {
76     /* LV,T */
77     *a = SBASE + (si / TCOUNT) * TCOUNT;
78     *b = TBASE + (si % TCOUNT);
79     return true;
80   } else {
81     /* L,V */
82     *a = LBASE + (si / NCOUNT);
83     *b = VBASE + (si % NCOUNT) / TCOUNT;
84     return true;
85   }
86 }
87 
88 static inline bool
_hb_ucd_compose_hangul(hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)89 _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
90 {
91   if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
92     !((a - SBASE) % TCOUNT))
93   {
94     /* LV,T */
95     *ab = a + (b - TBASE);
96     return true;
97   }
98   else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
99   {
100     /* L,V */
101     int li = a - LBASE;
102     int vi = b - VBASE;
103     *ab = SBASE + li * NCOUNT + vi * TCOUNT;
104     return true;
105   }
106   else
107     return false;
108 }
109 
110 static int
_cmp_pair(const void * _key,const void * _item)111 _cmp_pair (const void *_key, const void *_item)
112 {
113   uint64_t& a = * (uint64_t*) _key;
114   uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
115 
116   return a < b ? -1 : a > b ? +1 : 0;
117 }
118 static int
_cmp_pair_11_7_14(const void * _key,const void * _item)119 _cmp_pair_11_7_14 (const void *_key, const void *_item)
120 {
121   uint32_t& a = * (uint32_t*) _key;
122   uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
123 
124   return a < b ? -1 : a > b ? +1 : 0;
125 }
126 
127 static hb_bool_t
hb_ucd_compose(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab,void * user_data HB_UNUSED)128 hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
129 		hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
130 		void *user_data HB_UNUSED)
131 {
132   // Hangul is handled algorithmically.
133   if (_hb_ucd_compose_hangul (a, b, ab)) return true;
134 
135   hb_codepoint_t u = 0;
136 
137   if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
138   {
139     /* If "a" is small enough and "b" is in the U+0300 range,
140      * the composition data is encoded in a 32bit array sorted
141      * by "a,b" pair. */
142     uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
143     const uint32_t *v = hb_bsearch (k,
144 				    _hb_ucd_dm2_u32_map,
145 				    ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
146 				    sizeof (*_hb_ucd_dm2_u32_map),
147 				    _cmp_pair_11_7_14);
148     if (likely (!v)) return false;
149     u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
150   }
151   else
152   {
153     /* Otherwise it is stored in a 64bit array sorted by
154      * "a,b" pair. */
155     uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
156     const uint64_t *v = hb_bsearch (k,
157 				    _hb_ucd_dm2_u64_map,
158 				    ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
159 				    sizeof (*_hb_ucd_dm2_u64_map),
160 				    _cmp_pair);
161     if (likely (!v)) return false;
162     u = HB_CODEPOINT_DECODE3_3 (*v);
163   }
164 
165   if (unlikely (!u)) return false;
166   *ab = u;
167   return true;
168 }
169 
170 static hb_bool_t
hb_ucd_decompose(hb_unicode_funcs_t * ufuncs HB_UNUSED,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b,void * user_data HB_UNUSED)171 hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
172 		  hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
173 		  void *user_data HB_UNUSED)
174 {
175   if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
176 
177   unsigned i = _hb_ucd_dm (ab);
178 
179   /* If no data, there's no decomposition. */
180   if (likely (!i)) return false;
181   i--;
182 
183   /* Check if it's a single-character decomposition. */
184   if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
185   {
186     /* Single-character decompositions currently are only in plane 0 or plane 2. */
187     if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
188     {
189       /* Plane 0. */
190       *a = _hb_ucd_dm1_p0_map[i];
191     }
192     else
193     {
194       /* Plane 2. */
195       i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
196       *a = 0x20000 | _hb_ucd_dm1_p2_map[i];
197     }
198     *b = 0;
199     return true;
200   }
201   i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
202 
203   /* Otherwise they are encoded either in a 32bit array or a 64bit array. */
204   if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
205   {
206     /* 32bit array. */
207     uint32_t v = _hb_ucd_dm2_u32_map[i];
208     *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
209     *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
210     return true;
211   }
212   i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
213 
214   /* 64bit array. */
215   uint64_t v = _hb_ucd_dm2_u64_map[i];
216   *a = HB_CODEPOINT_DECODE3_1 (v);
217   *b = HB_CODEPOINT_DECODE3_2 (v);
218   return true;
219 }
220 
221 
222 static void free_static_ucd_funcs ();
223 
224 static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
225 {
createhb_ucd_unicode_funcs_lazy_loader_t226   static hb_unicode_funcs_t *create ()
227   {
228     hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
229 
230     hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
231     hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
232     hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
233     hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
234     hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
235     hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
236 
237     hb_unicode_funcs_make_immutable (funcs);
238 
239     hb_atexit (free_static_ucd_funcs);
240 
241     return funcs;
242   }
243 } static_ucd_funcs;
244 
245 static inline
free_static_ucd_funcs()246 void free_static_ucd_funcs ()
247 {
248   static_ucd_funcs.free_instance ();
249 }
250 
251 hb_unicode_funcs_t *
hb_ucd_get_unicode_funcs()252 hb_ucd_get_unicode_funcs ()
253 {
254 #ifdef HB_NO_UCD
255   return hb_unicode_funcs_get_empty ();
256 #endif
257   return static_ucd_funcs.get_unconst ();
258 }
259