1*c8dee2aaSAndroid Build Coastguard Worker // Copyright 2018 Google LLC.
2*c8dee2aaSAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3*c8dee2aaSAndroid Build Coastguard Worker #ifndef SkUTF_DEFINED
4*c8dee2aaSAndroid Build Coastguard Worker #define SkUTF_DEFINED
5*c8dee2aaSAndroid Build Coastguard Worker
6*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkAPI.h"
7*c8dee2aaSAndroid Build Coastguard Worker
8*c8dee2aaSAndroid Build Coastguard Worker #include <cstddef>
9*c8dee2aaSAndroid Build Coastguard Worker #include <cstdint>
10*c8dee2aaSAndroid Build Coastguard Worker
11*c8dee2aaSAndroid Build Coastguard Worker typedef int32_t SkUnichar;
12*c8dee2aaSAndroid Build Coastguard Worker
13*c8dee2aaSAndroid Build Coastguard Worker namespace SkUTF {
14*c8dee2aaSAndroid Build Coastguard Worker
15*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
16*c8dee2aaSAndroid Build Coastguard Worker If the sequence is invalid UTF-8, return -1.
17*c8dee2aaSAndroid Build Coastguard Worker */
18*c8dee2aaSAndroid Build Coastguard Worker SK_SPI int CountUTF8(const char* utf8, size_t byteLength);
19*c8dee2aaSAndroid Build Coastguard Worker
20*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of aligned UTF-16 characters in machine-endian form,
21*c8dee2aaSAndroid Build Coastguard Worker return the number of unicode codepoints. If the sequence is invalid
22*c8dee2aaSAndroid Build Coastguard Worker UTF-16, return -1.
23*c8dee2aaSAndroid Build Coastguard Worker */
24*c8dee2aaSAndroid Build Coastguard Worker SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength);
25*c8dee2aaSAndroid Build Coastguard Worker
26*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of aligned UTF-32 characters in machine-endian form,
27*c8dee2aaSAndroid Build Coastguard Worker return the number of unicode codepoints. If the sequence is invalid
28*c8dee2aaSAndroid Build Coastguard Worker UTF-32, return -1.
29*c8dee2aaSAndroid Build Coastguard Worker */
30*c8dee2aaSAndroid Build Coastguard Worker SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength);
31*c8dee2aaSAndroid Build Coastguard Worker
32*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
33*c8dee2aaSAndroid Build Coastguard Worker The pointer will be incremented to point at the next codepoint's start. If
34*c8dee2aaSAndroid Build Coastguard Worker invalid UTF-8 is encountered, set *ptr to end and return -1.
35*c8dee2aaSAndroid Build Coastguard Worker */
36*c8dee2aaSAndroid Build Coastguard Worker SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end);
37*c8dee2aaSAndroid Build Coastguard Worker
38*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
39*c8dee2aaSAndroid Build Coastguard Worker The pointer will be incremented to point at the next codepoint's start. If
40*c8dee2aaSAndroid Build Coastguard Worker invalid UTF-8 is encountered, set *ptr to end and
41*c8dee2aaSAndroid Build Coastguard Worker return the replacement character (0xFFFD)
42*c8dee2aaSAndroid Build Coastguard Worker */
43*c8dee2aaSAndroid Build Coastguard Worker SK_SPI SkUnichar NextUTF8WithReplacement(const char** ptr, const char* end);
44*c8dee2aaSAndroid Build Coastguard Worker
45*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of aligned UTF-16 characters in machine-endian form,
46*c8dee2aaSAndroid Build Coastguard Worker return the first unicode codepoint. The pointer will be incremented to
47*c8dee2aaSAndroid Build Coastguard Worker point at the next codepoint's start. If invalid UTF-16 is encountered,
48*c8dee2aaSAndroid Build Coastguard Worker set *ptr to end and return -1.
49*c8dee2aaSAndroid Build Coastguard Worker */
50*c8dee2aaSAndroid Build Coastguard Worker SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
51*c8dee2aaSAndroid Build Coastguard Worker
52*c8dee2aaSAndroid Build Coastguard Worker /** Given a sequence of aligned UTF-32 characters in machine-endian form,
53*c8dee2aaSAndroid Build Coastguard Worker return the first unicode codepoint. The pointer will be incremented to
54*c8dee2aaSAndroid Build Coastguard Worker point at the next codepoint's start. If invalid UTF-32 is encountered,
55*c8dee2aaSAndroid Build Coastguard Worker set *ptr to end and return -1.
56*c8dee2aaSAndroid Build Coastguard Worker */
57*c8dee2aaSAndroid Build Coastguard Worker SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
58*c8dee2aaSAndroid Build Coastguard Worker
59*c8dee2aaSAndroid Build Coastguard Worker constexpr unsigned kMaxBytesInUTF8Sequence = 4;
60*c8dee2aaSAndroid Build Coastguard Worker
61*c8dee2aaSAndroid Build Coastguard Worker /** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the
62*c8dee2aaSAndroid Build Coastguard Worker result in that array. Return the number of bytes in the result. If `utf8`
63*c8dee2aaSAndroid Build Coastguard Worker is null, simply return the number of bytes that would be used. For invalid
64*c8dee2aaSAndroid Build Coastguard Worker unicode codepoints, return 0.
65*c8dee2aaSAndroid Build Coastguard Worker */
66*c8dee2aaSAndroid Build Coastguard Worker SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
67*c8dee2aaSAndroid Build Coastguard Worker
68*c8dee2aaSAndroid Build Coastguard Worker /** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place
69*c8dee2aaSAndroid Build Coastguard Worker the result in that array. Return the number of UTF-16 code units in the
70*c8dee2aaSAndroid Build Coastguard Worker result (1 or 2). If `utf16` is null, simply return the number of code
71*c8dee2aaSAndroid Build Coastguard Worker units that would be used. For invalid unicode codepoints, return 0.
72*c8dee2aaSAndroid Build Coastguard Worker */
73*c8dee2aaSAndroid Build Coastguard Worker SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
74*c8dee2aaSAndroid Build Coastguard Worker
75*c8dee2aaSAndroid Build Coastguard Worker /** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
76*c8dee2aaSAndroid Build Coastguard Worker * If dst is not null, it is filled with the corresponding values up to its capacity.
77*c8dee2aaSAndroid Build Coastguard Worker * If there is an error, -1 is returned and the dst[] buffer is undefined.
78*c8dee2aaSAndroid Build Coastguard Worker */
79*c8dee2aaSAndroid Build Coastguard Worker SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
80*c8dee2aaSAndroid Build Coastguard Worker
81*c8dee2aaSAndroid Build Coastguard Worker /** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence.
82*c8dee2aaSAndroid Build Coastguard Worker * If dst is not null, it is filled with the corresponding values up to its capacity.
83*c8dee2aaSAndroid Build Coastguard Worker * If there is an error, -1 is returned and the dst[] buffer is undefined.
84*c8dee2aaSAndroid Build Coastguard Worker */
85*c8dee2aaSAndroid Build Coastguard Worker SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength);
86*c8dee2aaSAndroid Build Coastguard Worker
87*c8dee2aaSAndroid Build Coastguard Worker /**
88*c8dee2aaSAndroid Build Coastguard Worker * Given a UTF-16 code point, returns true iff it is a leading surrogate.
89*c8dee2aaSAndroid Build Coastguard Worker * https://unicode.org/faq/utf_bom.html#utf16-2
90*c8dee2aaSAndroid Build Coastguard Worker */
IsLeadingSurrogateUTF16(uint16_t c)91*c8dee2aaSAndroid Build Coastguard Worker static inline bool IsLeadingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xD800; }
92*c8dee2aaSAndroid Build Coastguard Worker
93*c8dee2aaSAndroid Build Coastguard Worker /**
94*c8dee2aaSAndroid Build Coastguard Worker * Given a UTF-16 code point, returns true iff it is a trailing surrogate.
95*c8dee2aaSAndroid Build Coastguard Worker * https://unicode.org/faq/utf_bom.html#utf16-2
96*c8dee2aaSAndroid Build Coastguard Worker */
IsTrailingSurrogateUTF16(uint16_t c)97*c8dee2aaSAndroid Build Coastguard Worker static inline bool IsTrailingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xDC00; }
98*c8dee2aaSAndroid Build Coastguard Worker
99*c8dee2aaSAndroid Build Coastguard Worker
100*c8dee2aaSAndroid Build Coastguard Worker } // namespace SkUTF
101*c8dee2aaSAndroid Build Coastguard Worker
102*c8dee2aaSAndroid Build Coastguard Worker #endif // SkUTF_DEFINED
103