xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-utf.hh (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1*2d1272b8SAndroid Build Coastguard Worker /*
2*2d1272b8SAndroid Build Coastguard Worker  * Copyright © 2011,2012,2014  Google, Inc.
3*2d1272b8SAndroid Build Coastguard Worker  *
4*2d1272b8SAndroid Build Coastguard Worker  *  This is part of HarfBuzz, a text shaping library.
5*2d1272b8SAndroid Build Coastguard Worker  *
6*2d1272b8SAndroid Build Coastguard Worker  * Permission is hereby granted, without written agreement and without
7*2d1272b8SAndroid Build Coastguard Worker  * license or royalty fees, to use, copy, modify, and distribute this
8*2d1272b8SAndroid Build Coastguard Worker  * software and its documentation for any purpose, provided that the
9*2d1272b8SAndroid Build Coastguard Worker  * above copyright notice and the following two paragraphs appear in
10*2d1272b8SAndroid Build Coastguard Worker  * all copies of this software.
11*2d1272b8SAndroid Build Coastguard Worker  *
12*2d1272b8SAndroid Build Coastguard Worker  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13*2d1272b8SAndroid Build Coastguard Worker  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14*2d1272b8SAndroid Build Coastguard Worker  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15*2d1272b8SAndroid Build Coastguard Worker  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16*2d1272b8SAndroid Build Coastguard Worker  * DAMAGE.
17*2d1272b8SAndroid Build Coastguard Worker  *
18*2d1272b8SAndroid Build Coastguard Worker  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19*2d1272b8SAndroid Build Coastguard Worker  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20*2d1272b8SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21*2d1272b8SAndroid Build Coastguard Worker  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22*2d1272b8SAndroid Build Coastguard Worker  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23*2d1272b8SAndroid Build Coastguard Worker  *
24*2d1272b8SAndroid Build Coastguard Worker  * Google Author(s): Behdad Esfahbod
25*2d1272b8SAndroid Build Coastguard Worker  */
26*2d1272b8SAndroid Build Coastguard Worker 
27*2d1272b8SAndroid Build Coastguard Worker #ifndef HB_UTF_HH
28*2d1272b8SAndroid Build Coastguard Worker #define HB_UTF_HH
29*2d1272b8SAndroid Build Coastguard Worker 
30*2d1272b8SAndroid Build Coastguard Worker #include "hb.hh"
31*2d1272b8SAndroid Build Coastguard Worker 
32*2d1272b8SAndroid Build Coastguard Worker #include "hb-open-type.hh"
33*2d1272b8SAndroid Build Coastguard Worker 
34*2d1272b8SAndroid Build Coastguard Worker 
35*2d1272b8SAndroid Build Coastguard Worker struct hb_utf8_t
36*2d1272b8SAndroid Build Coastguard Worker {
37*2d1272b8SAndroid Build Coastguard Worker   typedef uint8_t codepoint_t;
38*2d1272b8SAndroid Build Coastguard Worker   static constexpr unsigned max_len = 4;
39*2d1272b8SAndroid Build Coastguard Worker 
40*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
nexthb_utf8_t41*2d1272b8SAndroid Build Coastguard Worker   next (const codepoint_t *text,
42*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *end,
43*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
44*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
45*2d1272b8SAndroid Build Coastguard Worker   {
46*2d1272b8SAndroid Build Coastguard Worker     /* Written to only accept well-formed sequences.
47*2d1272b8SAndroid Build Coastguard Worker      * Based on ideas from ICU's U8_NEXT.
48*2d1272b8SAndroid Build Coastguard Worker      * Generates one "replacement" for each ill-formed byte. */
49*2d1272b8SAndroid Build Coastguard Worker 
50*2d1272b8SAndroid Build Coastguard Worker     hb_codepoint_t c = *text++;
51*2d1272b8SAndroid Build Coastguard Worker 
52*2d1272b8SAndroid Build Coastguard Worker     if (c > 0x7Fu)
53*2d1272b8SAndroid Build Coastguard Worker     {
54*2d1272b8SAndroid Build Coastguard Worker       if (hb_in_range<hb_codepoint_t> (c, 0xC2u, 0xDFu)) /* Two-byte */
55*2d1272b8SAndroid Build Coastguard Worker       {
56*2d1272b8SAndroid Build Coastguard Worker 	unsigned int t1;
57*2d1272b8SAndroid Build Coastguard Worker 	if (likely (text < end &&
58*2d1272b8SAndroid Build Coastguard Worker 		    (t1 = text[0] - 0x80u) <= 0x3Fu))
59*2d1272b8SAndroid Build Coastguard Worker 	{
60*2d1272b8SAndroid Build Coastguard Worker 	  c = ((c&0x1Fu)<<6) | t1;
61*2d1272b8SAndroid Build Coastguard Worker 	  text++;
62*2d1272b8SAndroid Build Coastguard Worker 	}
63*2d1272b8SAndroid Build Coastguard Worker 	else
64*2d1272b8SAndroid Build Coastguard Worker 	  goto error;
65*2d1272b8SAndroid Build Coastguard Worker       }
66*2d1272b8SAndroid Build Coastguard Worker       else if (hb_in_range<hb_codepoint_t> (c, 0xE0u, 0xEFu)) /* Three-byte */
67*2d1272b8SAndroid Build Coastguard Worker       {
68*2d1272b8SAndroid Build Coastguard Worker 	unsigned int t1, t2;
69*2d1272b8SAndroid Build Coastguard Worker 	if (likely (1 < end - text &&
70*2d1272b8SAndroid Build Coastguard Worker 		    (t1 = text[0] - 0x80u) <= 0x3Fu &&
71*2d1272b8SAndroid Build Coastguard Worker 		    (t2 = text[1] - 0x80u) <= 0x3Fu))
72*2d1272b8SAndroid Build Coastguard Worker 	{
73*2d1272b8SAndroid Build Coastguard Worker 	  c = ((c&0xFu)<<12) | (t1<<6) | t2;
74*2d1272b8SAndroid Build Coastguard Worker 	  if (unlikely (c < 0x0800u || hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
75*2d1272b8SAndroid Build Coastguard Worker 	    goto error;
76*2d1272b8SAndroid Build Coastguard Worker 	  text += 2;
77*2d1272b8SAndroid Build Coastguard Worker 	}
78*2d1272b8SAndroid Build Coastguard Worker 	else
79*2d1272b8SAndroid Build Coastguard Worker 	  goto error;
80*2d1272b8SAndroid Build Coastguard Worker       }
81*2d1272b8SAndroid Build Coastguard Worker       else if (hb_in_range<hb_codepoint_t> (c, 0xF0u, 0xF4u)) /* Four-byte */
82*2d1272b8SAndroid Build Coastguard Worker       {
83*2d1272b8SAndroid Build Coastguard Worker 	unsigned int t1, t2, t3;
84*2d1272b8SAndroid Build Coastguard Worker 	if (likely (2 < end - text &&
85*2d1272b8SAndroid Build Coastguard Worker 		    (t1 = text[0] - 0x80u) <= 0x3Fu &&
86*2d1272b8SAndroid Build Coastguard Worker 		    (t2 = text[1] - 0x80u) <= 0x3Fu &&
87*2d1272b8SAndroid Build Coastguard Worker 		    (t3 = text[2] - 0x80u) <= 0x3Fu))
88*2d1272b8SAndroid Build Coastguard Worker 	{
89*2d1272b8SAndroid Build Coastguard Worker 	  c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
90*2d1272b8SAndroid Build Coastguard Worker 	  if (unlikely (!hb_in_range<hb_codepoint_t> (c, 0x10000u, 0x10FFFFu)))
91*2d1272b8SAndroid Build Coastguard Worker 	    goto error;
92*2d1272b8SAndroid Build Coastguard Worker 	  text += 3;
93*2d1272b8SAndroid Build Coastguard Worker 	}
94*2d1272b8SAndroid Build Coastguard Worker 	else
95*2d1272b8SAndroid Build Coastguard Worker 	  goto error;
96*2d1272b8SAndroid Build Coastguard Worker       }
97*2d1272b8SAndroid Build Coastguard Worker       else
98*2d1272b8SAndroid Build Coastguard Worker 	goto error;
99*2d1272b8SAndroid Build Coastguard Worker     }
100*2d1272b8SAndroid Build Coastguard Worker 
101*2d1272b8SAndroid Build Coastguard Worker     *unicode = c;
102*2d1272b8SAndroid Build Coastguard Worker     return text;
103*2d1272b8SAndroid Build Coastguard Worker 
104*2d1272b8SAndroid Build Coastguard Worker   error:
105*2d1272b8SAndroid Build Coastguard Worker     *unicode = replacement;
106*2d1272b8SAndroid Build Coastguard Worker     return text;
107*2d1272b8SAndroid Build Coastguard Worker   }
108*2d1272b8SAndroid Build Coastguard Worker 
109*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
prevhb_utf8_t110*2d1272b8SAndroid Build Coastguard Worker   prev (const codepoint_t *text,
111*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *start,
112*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
113*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
114*2d1272b8SAndroid Build Coastguard Worker   {
115*2d1272b8SAndroid Build Coastguard Worker     const codepoint_t *end = text--;
116*2d1272b8SAndroid Build Coastguard Worker     while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
117*2d1272b8SAndroid Build Coastguard Worker       text--;
118*2d1272b8SAndroid Build Coastguard Worker 
119*2d1272b8SAndroid Build Coastguard Worker     if (likely (next (text, end, unicode, replacement) == end))
120*2d1272b8SAndroid Build Coastguard Worker       return text;
121*2d1272b8SAndroid Build Coastguard Worker 
122*2d1272b8SAndroid Build Coastguard Worker     *unicode = replacement;
123*2d1272b8SAndroid Build Coastguard Worker     return end - 1;
124*2d1272b8SAndroid Build Coastguard Worker   }
125*2d1272b8SAndroid Build Coastguard Worker 
126*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
strlenhb_utf8_t127*2d1272b8SAndroid Build Coastguard Worker   strlen (const codepoint_t *text)
128*2d1272b8SAndroid Build Coastguard Worker   { return ::strlen ((const char *) text); }
129*2d1272b8SAndroid Build Coastguard Worker 
130*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
encode_lenhb_utf8_t131*2d1272b8SAndroid Build Coastguard Worker   encode_len (hb_codepoint_t unicode)
132*2d1272b8SAndroid Build Coastguard Worker   {
133*2d1272b8SAndroid Build Coastguard Worker     if (unicode <   0x0080u) return 1;
134*2d1272b8SAndroid Build Coastguard Worker     if (unicode <   0x0800u) return 2;
135*2d1272b8SAndroid Build Coastguard Worker     if (unicode <  0x10000u) return 3;
136*2d1272b8SAndroid Build Coastguard Worker     if (unicode < 0x110000u) return 4;
137*2d1272b8SAndroid Build Coastguard Worker     return 3;
138*2d1272b8SAndroid Build Coastguard Worker   }
139*2d1272b8SAndroid Build Coastguard Worker 
140*2d1272b8SAndroid Build Coastguard Worker   static codepoint_t *
encodehb_utf8_t141*2d1272b8SAndroid Build Coastguard Worker   encode (codepoint_t *text,
142*2d1272b8SAndroid Build Coastguard Worker 	  const codepoint_t *end,
143*2d1272b8SAndroid Build Coastguard Worker 	  hb_codepoint_t unicode)
144*2d1272b8SAndroid Build Coastguard Worker   {
145*2d1272b8SAndroid Build Coastguard Worker     if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
146*2d1272b8SAndroid Build Coastguard Worker       unicode = 0xFFFDu;
147*2d1272b8SAndroid Build Coastguard Worker     if (unicode < 0x0080u)
148*2d1272b8SAndroid Build Coastguard Worker      *text++ = unicode;
149*2d1272b8SAndroid Build Coastguard Worker     else if (unicode < 0x0800u)
150*2d1272b8SAndroid Build Coastguard Worker     {
151*2d1272b8SAndroid Build Coastguard Worker       if (end - text >= 2)
152*2d1272b8SAndroid Build Coastguard Worker       {
153*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0xC0u + (0x1Fu & (unicode >>  6));
154*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode      ));
155*2d1272b8SAndroid Build Coastguard Worker       }
156*2d1272b8SAndroid Build Coastguard Worker     }
157*2d1272b8SAndroid Build Coastguard Worker     else if (unicode < 0x10000u)
158*2d1272b8SAndroid Build Coastguard Worker     {
159*2d1272b8SAndroid Build Coastguard Worker       if (end - text >= 3)
160*2d1272b8SAndroid Build Coastguard Worker       {
161*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0xE0u + (0x0Fu & (unicode >> 12));
162*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode >>  6));
163*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode      ));
164*2d1272b8SAndroid Build Coastguard Worker       }
165*2d1272b8SAndroid Build Coastguard Worker     }
166*2d1272b8SAndroid Build Coastguard Worker     else
167*2d1272b8SAndroid Build Coastguard Worker     {
168*2d1272b8SAndroid Build Coastguard Worker       if (end - text >= 4)
169*2d1272b8SAndroid Build Coastguard Worker       {
170*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0xF0u + (0x07u & (unicode >> 18));
171*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode >> 12));
172*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode >>  6));
173*2d1272b8SAndroid Build Coastguard Worker 	*text++ =  0x80u + (0x3Fu & (unicode      ));
174*2d1272b8SAndroid Build Coastguard Worker       }
175*2d1272b8SAndroid Build Coastguard Worker     }
176*2d1272b8SAndroid Build Coastguard Worker     return text;
177*2d1272b8SAndroid Build Coastguard Worker   }
178*2d1272b8SAndroid Build Coastguard Worker };
179*2d1272b8SAndroid Build Coastguard Worker 
180*2d1272b8SAndroid Build Coastguard Worker 
181*2d1272b8SAndroid Build Coastguard Worker template <typename TCodepoint>
182*2d1272b8SAndroid Build Coastguard Worker struct hb_utf16_xe_t
183*2d1272b8SAndroid Build Coastguard Worker {
184*2d1272b8SAndroid Build Coastguard Worker   static_assert (sizeof (TCodepoint) == 2, "");
185*2d1272b8SAndroid Build Coastguard Worker   typedef TCodepoint codepoint_t;
186*2d1272b8SAndroid Build Coastguard Worker   static constexpr unsigned max_len = 2;
187*2d1272b8SAndroid Build Coastguard Worker 
188*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
nexthb_utf16_xe_t189*2d1272b8SAndroid Build Coastguard Worker   next (const codepoint_t *text,
190*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *end,
191*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
192*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
193*2d1272b8SAndroid Build Coastguard Worker   {
194*2d1272b8SAndroid Build Coastguard Worker     hb_codepoint_t c = *text++;
195*2d1272b8SAndroid Build Coastguard Worker 
196*2d1272b8SAndroid Build Coastguard Worker     if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
197*2d1272b8SAndroid Build Coastguard Worker     {
198*2d1272b8SAndroid Build Coastguard Worker       *unicode = c;
199*2d1272b8SAndroid Build Coastguard Worker       return text;
200*2d1272b8SAndroid Build Coastguard Worker     }
201*2d1272b8SAndroid Build Coastguard Worker 
202*2d1272b8SAndroid Build Coastguard Worker     if (likely (c <= 0xDBFFu && text < end))
203*2d1272b8SAndroid Build Coastguard Worker     {
204*2d1272b8SAndroid Build Coastguard Worker       /* High-surrogate in c */
205*2d1272b8SAndroid Build Coastguard Worker       hb_codepoint_t l = *text;
206*2d1272b8SAndroid Build Coastguard Worker       if (likely (hb_in_range<hb_codepoint_t> (l, 0xDC00u, 0xDFFFu)))
207*2d1272b8SAndroid Build Coastguard Worker       {
208*2d1272b8SAndroid Build Coastguard Worker 	/* Low-surrogate in l */
209*2d1272b8SAndroid Build Coastguard Worker 	*unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
210*2d1272b8SAndroid Build Coastguard Worker 	 text++;
211*2d1272b8SAndroid Build Coastguard Worker 	 return text;
212*2d1272b8SAndroid Build Coastguard Worker       }
213*2d1272b8SAndroid Build Coastguard Worker     }
214*2d1272b8SAndroid Build Coastguard Worker 
215*2d1272b8SAndroid Build Coastguard Worker     /* Lonely / out-of-order surrogate. */
216*2d1272b8SAndroid Build Coastguard Worker     *unicode = replacement;
217*2d1272b8SAndroid Build Coastguard Worker     return text;
218*2d1272b8SAndroid Build Coastguard Worker   }
219*2d1272b8SAndroid Build Coastguard Worker 
220*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
prevhb_utf16_xe_t221*2d1272b8SAndroid Build Coastguard Worker   prev (const codepoint_t *text,
222*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *start,
223*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
224*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
225*2d1272b8SAndroid Build Coastguard Worker   {
226*2d1272b8SAndroid Build Coastguard Worker     hb_codepoint_t c = *--text;
227*2d1272b8SAndroid Build Coastguard Worker 
228*2d1272b8SAndroid Build Coastguard Worker     if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu)))
229*2d1272b8SAndroid Build Coastguard Worker     {
230*2d1272b8SAndroid Build Coastguard Worker       *unicode = c;
231*2d1272b8SAndroid Build Coastguard Worker       return text;
232*2d1272b8SAndroid Build Coastguard Worker     }
233*2d1272b8SAndroid Build Coastguard Worker 
234*2d1272b8SAndroid Build Coastguard Worker     if (likely (c >= 0xDC00u && start < text))
235*2d1272b8SAndroid Build Coastguard Worker     {
236*2d1272b8SAndroid Build Coastguard Worker       /* Low-surrogate in c */
237*2d1272b8SAndroid Build Coastguard Worker       hb_codepoint_t h = text[-1];
238*2d1272b8SAndroid Build Coastguard Worker       if (likely (hb_in_range<hb_codepoint_t> (h, 0xD800u, 0xDBFFu)))
239*2d1272b8SAndroid Build Coastguard Worker       {
240*2d1272b8SAndroid Build Coastguard Worker 	/* High-surrogate in h */
241*2d1272b8SAndroid Build Coastguard Worker 	*unicode = (h << 10) + c - ((0xD800u << 10) - 0x10000u + 0xDC00u);
242*2d1272b8SAndroid Build Coastguard Worker 	text--;
243*2d1272b8SAndroid Build Coastguard Worker 	return text;
244*2d1272b8SAndroid Build Coastguard Worker       }
245*2d1272b8SAndroid Build Coastguard Worker     }
246*2d1272b8SAndroid Build Coastguard Worker 
247*2d1272b8SAndroid Build Coastguard Worker     /* Lonely / out-of-order surrogate. */
248*2d1272b8SAndroid Build Coastguard Worker     *unicode = replacement;
249*2d1272b8SAndroid Build Coastguard Worker     return text;
250*2d1272b8SAndroid Build Coastguard Worker   }
251*2d1272b8SAndroid Build Coastguard Worker 
252*2d1272b8SAndroid Build Coastguard Worker 
253*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
strlenhb_utf16_xe_t254*2d1272b8SAndroid Build Coastguard Worker   strlen (const codepoint_t *text)
255*2d1272b8SAndroid Build Coastguard Worker   {
256*2d1272b8SAndroid Build Coastguard Worker     unsigned int l = 0;
257*2d1272b8SAndroid Build Coastguard Worker     while (*text++) l++;
258*2d1272b8SAndroid Build Coastguard Worker     return l;
259*2d1272b8SAndroid Build Coastguard Worker   }
260*2d1272b8SAndroid Build Coastguard Worker 
261*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
encode_lenhb_utf16_xe_t262*2d1272b8SAndroid Build Coastguard Worker   encode_len (hb_codepoint_t unicode)
263*2d1272b8SAndroid Build Coastguard Worker   {
264*2d1272b8SAndroid Build Coastguard Worker     return unicode < 0x10000 ? 1 : 2;
265*2d1272b8SAndroid Build Coastguard Worker   }
266*2d1272b8SAndroid Build Coastguard Worker 
267*2d1272b8SAndroid Build Coastguard Worker   static codepoint_t *
encodehb_utf16_xe_t268*2d1272b8SAndroid Build Coastguard Worker   encode (codepoint_t *text,
269*2d1272b8SAndroid Build Coastguard Worker 	  const codepoint_t *end,
270*2d1272b8SAndroid Build Coastguard Worker 	  hb_codepoint_t unicode)
271*2d1272b8SAndroid Build Coastguard Worker   {
272*2d1272b8SAndroid Build Coastguard Worker     if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
273*2d1272b8SAndroid Build Coastguard Worker       unicode = 0xFFFDu;
274*2d1272b8SAndroid Build Coastguard Worker     if (unicode < 0x10000u)
275*2d1272b8SAndroid Build Coastguard Worker      *text++ = unicode;
276*2d1272b8SAndroid Build Coastguard Worker     else if (end - text >= 2)
277*2d1272b8SAndroid Build Coastguard Worker     {
278*2d1272b8SAndroid Build Coastguard Worker       unicode -= 0x10000u;
279*2d1272b8SAndroid Build Coastguard Worker       *text++ =  0xD800u + (unicode >> 10);
280*2d1272b8SAndroid Build Coastguard Worker       *text++ =  0xDC00u + (unicode & 0x03FFu);
281*2d1272b8SAndroid Build Coastguard Worker     }
282*2d1272b8SAndroid Build Coastguard Worker     return text;
283*2d1272b8SAndroid Build Coastguard Worker   }
284*2d1272b8SAndroid Build Coastguard Worker };
285*2d1272b8SAndroid Build Coastguard Worker 
286*2d1272b8SAndroid Build Coastguard Worker typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
287*2d1272b8SAndroid Build Coastguard Worker typedef hb_utf16_xe_t<OT::HBUINT16> hb_utf16_be_t;
288*2d1272b8SAndroid Build Coastguard Worker 
289*2d1272b8SAndroid Build Coastguard Worker 
290*2d1272b8SAndroid Build Coastguard Worker template <typename TCodepoint, bool validate=true>
291*2d1272b8SAndroid Build Coastguard Worker struct hb_utf32_xe_t
292*2d1272b8SAndroid Build Coastguard Worker {
293*2d1272b8SAndroid Build Coastguard Worker   static_assert (sizeof (TCodepoint) == 4, "");
294*2d1272b8SAndroid Build Coastguard Worker   typedef TCodepoint codepoint_t;
295*2d1272b8SAndroid Build Coastguard Worker   static constexpr unsigned max_len = 1;
296*2d1272b8SAndroid Build Coastguard Worker 
297*2d1272b8SAndroid Build Coastguard Worker   static const TCodepoint *
nexthb_utf32_xe_t298*2d1272b8SAndroid Build Coastguard Worker   next (const TCodepoint *text,
299*2d1272b8SAndroid Build Coastguard Worker 	const TCodepoint *end HB_UNUSED,
300*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
301*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
302*2d1272b8SAndroid Build Coastguard Worker   {
303*2d1272b8SAndroid Build Coastguard Worker     hb_codepoint_t c = *unicode = *text++;
304*2d1272b8SAndroid Build Coastguard Worker     if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
305*2d1272b8SAndroid Build Coastguard Worker       *unicode = replacement;
306*2d1272b8SAndroid Build Coastguard Worker     return text;
307*2d1272b8SAndroid Build Coastguard Worker   }
308*2d1272b8SAndroid Build Coastguard Worker 
309*2d1272b8SAndroid Build Coastguard Worker   static const TCodepoint *
prevhb_utf32_xe_t310*2d1272b8SAndroid Build Coastguard Worker   prev (const TCodepoint *text,
311*2d1272b8SAndroid Build Coastguard Worker 	const TCodepoint *start HB_UNUSED,
312*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
313*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
314*2d1272b8SAndroid Build Coastguard Worker   {
315*2d1272b8SAndroid Build Coastguard Worker     hb_codepoint_t c = *unicode = *--text;
316*2d1272b8SAndroid Build Coastguard Worker     if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
317*2d1272b8SAndroid Build Coastguard Worker       *unicode = replacement;
318*2d1272b8SAndroid Build Coastguard Worker     return text;
319*2d1272b8SAndroid Build Coastguard Worker   }
320*2d1272b8SAndroid Build Coastguard Worker 
321*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
strlenhb_utf32_xe_t322*2d1272b8SAndroid Build Coastguard Worker   strlen (const TCodepoint *text)
323*2d1272b8SAndroid Build Coastguard Worker   {
324*2d1272b8SAndroid Build Coastguard Worker     unsigned int l = 0;
325*2d1272b8SAndroid Build Coastguard Worker     while (*text++) l++;
326*2d1272b8SAndroid Build Coastguard Worker     return l;
327*2d1272b8SAndroid Build Coastguard Worker   }
328*2d1272b8SAndroid Build Coastguard Worker 
329*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
encode_lenhb_utf32_xe_t330*2d1272b8SAndroid Build Coastguard Worker   encode_len (hb_codepoint_t unicode HB_UNUSED)
331*2d1272b8SAndroid Build Coastguard Worker   {
332*2d1272b8SAndroid Build Coastguard Worker     return 1;
333*2d1272b8SAndroid Build Coastguard Worker   }
334*2d1272b8SAndroid Build Coastguard Worker 
335*2d1272b8SAndroid Build Coastguard Worker   static codepoint_t *
encodehb_utf32_xe_t336*2d1272b8SAndroid Build Coastguard Worker   encode (codepoint_t *text,
337*2d1272b8SAndroid Build Coastguard Worker 	  const codepoint_t *end HB_UNUSED,
338*2d1272b8SAndroid Build Coastguard Worker 	  hb_codepoint_t unicode)
339*2d1272b8SAndroid Build Coastguard Worker   {
340*2d1272b8SAndroid Build Coastguard Worker     if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
341*2d1272b8SAndroid Build Coastguard Worker       unicode = 0xFFFDu;
342*2d1272b8SAndroid Build Coastguard Worker     *text++ = unicode;
343*2d1272b8SAndroid Build Coastguard Worker     return text;
344*2d1272b8SAndroid Build Coastguard Worker   }
345*2d1272b8SAndroid Build Coastguard Worker };
346*2d1272b8SAndroid Build Coastguard Worker 
347*2d1272b8SAndroid Build Coastguard Worker typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
348*2d1272b8SAndroid Build Coastguard Worker typedef hb_utf32_xe_t<uint32_t, false> hb_utf32_novalidate_t;
349*2d1272b8SAndroid Build Coastguard Worker 
350*2d1272b8SAndroid Build Coastguard Worker 
351*2d1272b8SAndroid Build Coastguard Worker struct hb_latin1_t
352*2d1272b8SAndroid Build Coastguard Worker {
353*2d1272b8SAndroid Build Coastguard Worker   typedef uint8_t codepoint_t;
354*2d1272b8SAndroid Build Coastguard Worker   static constexpr unsigned max_len = 1;
355*2d1272b8SAndroid Build Coastguard Worker 
356*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
nexthb_latin1_t357*2d1272b8SAndroid Build Coastguard Worker   next (const codepoint_t *text,
358*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *end HB_UNUSED,
359*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
360*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement HB_UNUSED)
361*2d1272b8SAndroid Build Coastguard Worker   {
362*2d1272b8SAndroid Build Coastguard Worker     *unicode = *text++;
363*2d1272b8SAndroid Build Coastguard Worker     return text;
364*2d1272b8SAndroid Build Coastguard Worker   }
365*2d1272b8SAndroid Build Coastguard Worker 
366*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
prevhb_latin1_t367*2d1272b8SAndroid Build Coastguard Worker   prev (const codepoint_t *text,
368*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *start HB_UNUSED,
369*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
370*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement HB_UNUSED)
371*2d1272b8SAndroid Build Coastguard Worker   {
372*2d1272b8SAndroid Build Coastguard Worker     *unicode = *--text;
373*2d1272b8SAndroid Build Coastguard Worker     return text;
374*2d1272b8SAndroid Build Coastguard Worker   }
375*2d1272b8SAndroid Build Coastguard Worker 
376*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
strlenhb_latin1_t377*2d1272b8SAndroid Build Coastguard Worker   strlen (const codepoint_t *text)
378*2d1272b8SAndroid Build Coastguard Worker   {
379*2d1272b8SAndroid Build Coastguard Worker     unsigned int l = 0;
380*2d1272b8SAndroid Build Coastguard Worker     while (*text++) l++;
381*2d1272b8SAndroid Build Coastguard Worker     return l;
382*2d1272b8SAndroid Build Coastguard Worker   }
383*2d1272b8SAndroid Build Coastguard Worker 
384*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
encode_lenhb_latin1_t385*2d1272b8SAndroid Build Coastguard Worker   encode_len (hb_codepoint_t unicode HB_UNUSED)
386*2d1272b8SAndroid Build Coastguard Worker   {
387*2d1272b8SAndroid Build Coastguard Worker     return 1;
388*2d1272b8SAndroid Build Coastguard Worker   }
389*2d1272b8SAndroid Build Coastguard Worker 
390*2d1272b8SAndroid Build Coastguard Worker   static codepoint_t *
encodehb_latin1_t391*2d1272b8SAndroid Build Coastguard Worker   encode (codepoint_t *text,
392*2d1272b8SAndroid Build Coastguard Worker 	  const codepoint_t *end HB_UNUSED,
393*2d1272b8SAndroid Build Coastguard Worker 	  hb_codepoint_t unicode)
394*2d1272b8SAndroid Build Coastguard Worker   {
395*2d1272b8SAndroid Build Coastguard Worker     if (unlikely (unicode >= 0x0100u))
396*2d1272b8SAndroid Build Coastguard Worker       unicode = '?';
397*2d1272b8SAndroid Build Coastguard Worker     *text++ = unicode;
398*2d1272b8SAndroid Build Coastguard Worker     return text;
399*2d1272b8SAndroid Build Coastguard Worker   }
400*2d1272b8SAndroid Build Coastguard Worker };
401*2d1272b8SAndroid Build Coastguard Worker 
402*2d1272b8SAndroid Build Coastguard Worker 
403*2d1272b8SAndroid Build Coastguard Worker struct hb_ascii_t
404*2d1272b8SAndroid Build Coastguard Worker {
405*2d1272b8SAndroid Build Coastguard Worker   typedef uint8_t codepoint_t;
406*2d1272b8SAndroid Build Coastguard Worker   static constexpr unsigned max_len = 1;
407*2d1272b8SAndroid Build Coastguard Worker 
408*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
nexthb_ascii_t409*2d1272b8SAndroid Build Coastguard Worker   next (const codepoint_t *text,
410*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *end HB_UNUSED,
411*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
412*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
413*2d1272b8SAndroid Build Coastguard Worker   {
414*2d1272b8SAndroid Build Coastguard Worker     *unicode = *text++;
415*2d1272b8SAndroid Build Coastguard Worker     if (*unicode >= 0x0080u)
416*2d1272b8SAndroid Build Coastguard Worker       *unicode = replacement;
417*2d1272b8SAndroid Build Coastguard Worker     return text;
418*2d1272b8SAndroid Build Coastguard Worker   }
419*2d1272b8SAndroid Build Coastguard Worker 
420*2d1272b8SAndroid Build Coastguard Worker   static const codepoint_t *
prevhb_ascii_t421*2d1272b8SAndroid Build Coastguard Worker   prev (const codepoint_t *text,
422*2d1272b8SAndroid Build Coastguard Worker 	const codepoint_t *start HB_UNUSED,
423*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t *unicode,
424*2d1272b8SAndroid Build Coastguard Worker 	hb_codepoint_t replacement)
425*2d1272b8SAndroid Build Coastguard Worker   {
426*2d1272b8SAndroid Build Coastguard Worker     *unicode = *--text;
427*2d1272b8SAndroid Build Coastguard Worker     if (*unicode >= 0x0080u)
428*2d1272b8SAndroid Build Coastguard Worker       *unicode = replacement;
429*2d1272b8SAndroid Build Coastguard Worker     return text;
430*2d1272b8SAndroid Build Coastguard Worker   }
431*2d1272b8SAndroid Build Coastguard Worker 
432*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
strlenhb_ascii_t433*2d1272b8SAndroid Build Coastguard Worker   strlen (const codepoint_t *text)
434*2d1272b8SAndroid Build Coastguard Worker   {
435*2d1272b8SAndroid Build Coastguard Worker     unsigned int l = 0;
436*2d1272b8SAndroid Build Coastguard Worker     while (*text++) l++;
437*2d1272b8SAndroid Build Coastguard Worker     return l;
438*2d1272b8SAndroid Build Coastguard Worker   }
439*2d1272b8SAndroid Build Coastguard Worker 
440*2d1272b8SAndroid Build Coastguard Worker   static unsigned int
encode_lenhb_ascii_t441*2d1272b8SAndroid Build Coastguard Worker   encode_len (hb_codepoint_t unicode HB_UNUSED)
442*2d1272b8SAndroid Build Coastguard Worker   {
443*2d1272b8SAndroid Build Coastguard Worker     return 1;
444*2d1272b8SAndroid Build Coastguard Worker   }
445*2d1272b8SAndroid Build Coastguard Worker 
446*2d1272b8SAndroid Build Coastguard Worker   static codepoint_t *
encodehb_ascii_t447*2d1272b8SAndroid Build Coastguard Worker   encode (codepoint_t *text,
448*2d1272b8SAndroid Build Coastguard Worker 	  const codepoint_t *end HB_UNUSED,
449*2d1272b8SAndroid Build Coastguard Worker 	  hb_codepoint_t unicode)
450*2d1272b8SAndroid Build Coastguard Worker   {
451*2d1272b8SAndroid Build Coastguard Worker     if (unlikely (unicode >= 0x0080u))
452*2d1272b8SAndroid Build Coastguard Worker       unicode = '?';
453*2d1272b8SAndroid Build Coastguard Worker     *text++ = unicode;
454*2d1272b8SAndroid Build Coastguard Worker     return text;
455*2d1272b8SAndroid Build Coastguard Worker   }
456*2d1272b8SAndroid Build Coastguard Worker };
457*2d1272b8SAndroid Build Coastguard Worker 
458*2d1272b8SAndroid Build Coastguard Worker template <typename utf_t>
459*2d1272b8SAndroid Build Coastguard Worker static inline const typename utf_t::codepoint_t *
hb_utf_offset_to_pointer(const typename utf_t::codepoint_t * start,signed offset)460*2d1272b8SAndroid Build Coastguard Worker hb_utf_offset_to_pointer (const typename utf_t::codepoint_t *start,
461*2d1272b8SAndroid Build Coastguard Worker 			  signed offset)
462*2d1272b8SAndroid Build Coastguard Worker {
463*2d1272b8SAndroid Build Coastguard Worker   hb_codepoint_t unicode;
464*2d1272b8SAndroid Build Coastguard Worker 
465*2d1272b8SAndroid Build Coastguard Worker   while (offset-- > 0)
466*2d1272b8SAndroid Build Coastguard Worker     start = utf_t::next (start,
467*2d1272b8SAndroid Build Coastguard Worker 			 start + utf_t::max_len,
468*2d1272b8SAndroid Build Coastguard Worker 			 &unicode,
469*2d1272b8SAndroid Build Coastguard Worker 			 HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
470*2d1272b8SAndroid Build Coastguard Worker 
471*2d1272b8SAndroid Build Coastguard Worker   while (offset++ < 0)
472*2d1272b8SAndroid Build Coastguard Worker     start = utf_t::prev (start,
473*2d1272b8SAndroid Build Coastguard Worker 			 start - utf_t::max_len,
474*2d1272b8SAndroid Build Coastguard Worker 			 &unicode,
475*2d1272b8SAndroid Build Coastguard Worker 			 HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT);
476*2d1272b8SAndroid Build Coastguard Worker 
477*2d1272b8SAndroid Build Coastguard Worker   return start;
478*2d1272b8SAndroid Build Coastguard Worker }
479*2d1272b8SAndroid Build Coastguard Worker 
480*2d1272b8SAndroid Build Coastguard Worker 
481*2d1272b8SAndroid Build Coastguard Worker #endif /* HB_UTF_HH */
482