1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifndef UPB_LEX_UNICODE_H_
29 #define UPB_LEX_UNICODE_H_
30
31 // Must be last.
32 #include "upb/port/def.inc"
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37
38 // Returns true iff a codepoint is the value for a high surrogate.
upb_Unicode_IsHigh(uint32_t cp)39 UPB_INLINE bool upb_Unicode_IsHigh(uint32_t cp) {
40 return (cp >= 0xd800 && cp <= 0xdbff);
41 }
42
43 // Returns true iff a codepoint is the value for a low surrogate.
upb_Unicode_IsLow(uint32_t cp)44 UPB_INLINE bool upb_Unicode_IsLow(uint32_t cp) {
45 return (cp >= 0xdc00 && cp <= 0xdfff);
46 }
47
48 // Returns the high 16-bit surrogate value for a supplementary codepoint.
49 // Does not sanity-check the input.
upb_Unicode_ToHigh(uint32_t cp)50 UPB_INLINE uint16_t upb_Unicode_ToHigh(uint32_t cp) {
51 return (cp >> 10) + 0xd7c0;
52 }
53
54 // Returns the low 16-bit surrogate value for a supplementary codepoint.
55 // Does not sanity-check the input.
upb_Unicode_ToLow(uint32_t cp)56 UPB_INLINE uint16_t upb_Unicode_ToLow(uint32_t cp) {
57 return (cp & 0x3ff) | 0xdc00;
58 }
59
60 // Returns the 32-bit value corresponding to a pair of 16-bit surrogates.
61 // Does not sanity-check the input.
upb_Unicode_FromPair(uint32_t high,uint32_t low)62 UPB_INLINE uint32_t upb_Unicode_FromPair(uint32_t high, uint32_t low) {
63 return ((high & 0x3ff) << 10) + (low & 0x3ff) + 0x10000;
64 }
65
66 // Outputs a codepoint as UTF8.
67 // Returns the number of bytes written (1-4 on success, 0 on error).
68 // Does not sanity-check the input. Specifically does not check for surrogates.
69 int upb_Unicode_ToUTF8(uint32_t cp, char* out);
70
71 #ifdef __cplusplus
72 } /* extern "C" */
73 #endif
74
75 #include "upb/port/undef.inc"
76
77 #endif /* UPB_LEX_UNICODE_H_ */
78