xref: /aosp_15_r20/frameworks/minikin/tests/util/UnicodeUtils.cpp (revision 834a2baab5fdfc28e9a428ee87c7ea8f6a06a53d)
1*834a2baaSAndroid Build Coastguard Worker /*
2*834a2baaSAndroid Build Coastguard Worker  * Copyright (C) 2015 The Android Open Source Project
3*834a2baaSAndroid Build Coastguard Worker  *
4*834a2baaSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*834a2baaSAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*834a2baaSAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*834a2baaSAndroid Build Coastguard Worker  *
8*834a2baaSAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*834a2baaSAndroid Build Coastguard Worker  *
10*834a2baaSAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*834a2baaSAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*834a2baaSAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*834a2baaSAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*834a2baaSAndroid Build Coastguard Worker  * limitations under the License.
15*834a2baaSAndroid Build Coastguard Worker  */
16*834a2baaSAndroid Build Coastguard Worker 
17*834a2baaSAndroid Build Coastguard Worker #include <cutils/log.h>
18*834a2baaSAndroid Build Coastguard Worker #include <unicode/utf.h>
19*834a2baaSAndroid Build Coastguard Worker #include <unicode/utf8.h>
20*834a2baaSAndroid Build Coastguard Worker 
21*834a2baaSAndroid Build Coastguard Worker #include <cstdlib>
22*834a2baaSAndroid Build Coastguard Worker #include <sstream>
23*834a2baaSAndroid Build Coastguard Worker #include <string>
24*834a2baaSAndroid Build Coastguard Worker #include <vector>
25*834a2baaSAndroid Build Coastguard Worker 
26*834a2baaSAndroid Build Coastguard Worker #include "minikin/U16StringPiece.h"
27*834a2baaSAndroid Build Coastguard Worker 
28*834a2baaSAndroid Build Coastguard Worker namespace minikin {
29*834a2baaSAndroid Build Coastguard Worker 
30*834a2baaSAndroid Build Coastguard Worker // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
31*834a2baaSAndroid Build Coastguard Worker // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
ParseUnicode(uint16_t * buf,size_t buf_size,const char * src,size_t * result_size,size_t * offset)32*834a2baaSAndroid Build Coastguard Worker void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
33*834a2baaSAndroid Build Coastguard Worker                   size_t* offset) {
34*834a2baaSAndroid Build Coastguard Worker     size_t input_ix = 0;
35*834a2baaSAndroid Build Coastguard Worker     size_t output_ix = 0;
36*834a2baaSAndroid Build Coastguard Worker     bool seen_offset = false;
37*834a2baaSAndroid Build Coastguard Worker 
38*834a2baaSAndroid Build Coastguard Worker     while (src[input_ix] != 0) {
39*834a2baaSAndroid Build Coastguard Worker         switch (src[input_ix]) {
40*834a2baaSAndroid Build Coastguard Worker             case '\'':
41*834a2baaSAndroid Build Coastguard Worker                 // single ASCII char
42*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
43*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
44*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
45*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
46*834a2baaSAndroid Build Coastguard Worker                 buf[output_ix++] = (uint16_t)src[input_ix++];
47*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
48*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
49*834a2baaSAndroid Build Coastguard Worker                 break;
50*834a2baaSAndroid Build Coastguard Worker             case 'u':
51*834a2baaSAndroid Build Coastguard Worker             case 'U': {
52*834a2baaSAndroid Build Coastguard Worker                 // Unicode codepoint in hex syntax
53*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
54*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
55*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
56*834a2baaSAndroid Build Coastguard Worker                 char* endptr = (char*)src + input_ix;
57*834a2baaSAndroid Build Coastguard Worker                 unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
58*834a2baaSAndroid Build Coastguard Worker                 size_t num_hex_digits = endptr - (src + input_ix);
59*834a2baaSAndroid Build Coastguard Worker 
60*834a2baaSAndroid Build Coastguard Worker                 // also triggers on invalid number syntax, digits = 0
61*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
62*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
63*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
64*834a2baaSAndroid Build Coastguard Worker                 input_ix += num_hex_digits;
65*834a2baaSAndroid Build Coastguard Worker                 if (U16_LENGTH(codepoint) == 1) {
66*834a2baaSAndroid Build Coastguard Worker                     LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
67*834a2baaSAndroid Build Coastguard Worker                     buf[output_ix++] = codepoint;
68*834a2baaSAndroid Build Coastguard Worker                 } else {
69*834a2baaSAndroid Build Coastguard Worker                     // UTF-16 encoding
70*834a2baaSAndroid Build Coastguard Worker                     LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
71*834a2baaSAndroid Build Coastguard Worker                     buf[output_ix++] = U16_LEAD(codepoint);
72*834a2baaSAndroid Build Coastguard Worker                     buf[output_ix++] = U16_TRAIL(codepoint);
73*834a2baaSAndroid Build Coastguard Worker                 }
74*834a2baaSAndroid Build Coastguard Worker                 break;
75*834a2baaSAndroid Build Coastguard Worker             }
76*834a2baaSAndroid Build Coastguard Worker             case ' ':
77*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
78*834a2baaSAndroid Build Coastguard Worker                 break;
79*834a2baaSAndroid Build Coastguard Worker             case '|':
80*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(seen_offset);
81*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL_IF(offset == nullptr);
82*834a2baaSAndroid Build Coastguard Worker                 *offset = output_ix;
83*834a2baaSAndroid Build Coastguard Worker                 seen_offset = true;
84*834a2baaSAndroid Build Coastguard Worker                 input_ix++;
85*834a2baaSAndroid Build Coastguard Worker                 break;
86*834a2baaSAndroid Build Coastguard Worker             default:
87*834a2baaSAndroid Build Coastguard Worker                 LOG_ALWAYS_FATAL("Unexpected Character");
88*834a2baaSAndroid Build Coastguard Worker         }
89*834a2baaSAndroid Build Coastguard Worker     }
90*834a2baaSAndroid Build Coastguard Worker     LOG_ALWAYS_FATAL_IF(result_size == nullptr);
91*834a2baaSAndroid Build Coastguard Worker     *result_size = output_ix;
92*834a2baaSAndroid Build Coastguard Worker     LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
93*834a2baaSAndroid Build Coastguard Worker }
94*834a2baaSAndroid Build Coastguard Worker 
parseUnicodeStringWithOffset(const std::string & in,size_t * offset)95*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
96*834a2baaSAndroid Build Coastguard Worker     std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
97*834a2baaSAndroid Build Coastguard Worker     size_t result_size = 0;
98*834a2baaSAndroid Build Coastguard Worker     ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
99*834a2baaSAndroid Build Coastguard Worker     return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
100*834a2baaSAndroid Build Coastguard Worker }
101*834a2baaSAndroid Build Coastguard Worker 
parseUnicodeString(const std::string & in)102*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> parseUnicodeString(const std::string& in) {
103*834a2baaSAndroid Build Coastguard Worker     return parseUnicodeStringWithOffset(in, nullptr);
104*834a2baaSAndroid Build Coastguard Worker }
105*834a2baaSAndroid Build Coastguard Worker 
utf8ToUtf16(const std::string & text)106*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
107*834a2baaSAndroid Build Coastguard Worker     std::vector<uint16_t> result;
108*834a2baaSAndroid Build Coastguard Worker     int32_t i = 0;
109*834a2baaSAndroid Build Coastguard Worker     const int32_t textLength = static_cast<int32_t>(text.size());
110*834a2baaSAndroid Build Coastguard Worker     uint32_t c = 0;
111*834a2baaSAndroid Build Coastguard Worker     while (i < textLength) {
112*834a2baaSAndroid Build Coastguard Worker         U8_NEXT(text.c_str(), i, textLength, c);
113*834a2baaSAndroid Build Coastguard Worker         if (U16_LENGTH(c) == 1) {
114*834a2baaSAndroid Build Coastguard Worker             result.push_back(c);
115*834a2baaSAndroid Build Coastguard Worker         } else {
116*834a2baaSAndroid Build Coastguard Worker             result.push_back(U16_LEAD(c));
117*834a2baaSAndroid Build Coastguard Worker             result.push_back(U16_TRAIL(c));
118*834a2baaSAndroid Build Coastguard Worker         }
119*834a2baaSAndroid Build Coastguard Worker     }
120*834a2baaSAndroid Build Coastguard Worker     return result;
121*834a2baaSAndroid Build Coastguard Worker }
122*834a2baaSAndroid Build Coastguard Worker 
utf16ToUtf8(const U16StringPiece & u16String)123*834a2baaSAndroid Build Coastguard Worker std::string utf16ToUtf8(const U16StringPiece& u16String) {
124*834a2baaSAndroid Build Coastguard Worker     const uint32_t textLength = u16String.size();
125*834a2baaSAndroid Build Coastguard Worker     uint32_t i = 0;
126*834a2baaSAndroid Build Coastguard Worker     uint32_t c = 0;
127*834a2baaSAndroid Build Coastguard Worker 
128*834a2baaSAndroid Build Coastguard Worker     std::string out;
129*834a2baaSAndroid Build Coastguard Worker     out.reserve(textLength * 4);
130*834a2baaSAndroid Build Coastguard Worker 
131*834a2baaSAndroid Build Coastguard Worker     while (i < textLength) {
132*834a2baaSAndroid Build Coastguard Worker         U16_NEXT(u16String.data(), i, textLength, c);
133*834a2baaSAndroid Build Coastguard Worker 
134*834a2baaSAndroid Build Coastguard Worker         char buf[U8_MAX_LENGTH] = {};
135*834a2baaSAndroid Build Coastguard Worker         uint32_t outIndex = 0;
136*834a2baaSAndroid Build Coastguard Worker         U8_APPEND_UNSAFE(buf, outIndex, c);
137*834a2baaSAndroid Build Coastguard Worker         out.append(buf, outIndex);
138*834a2baaSAndroid Build Coastguard Worker     }
139*834a2baaSAndroid Build Coastguard Worker     return out;
140*834a2baaSAndroid Build Coastguard Worker }
141*834a2baaSAndroid Build Coastguard Worker 
repeat(const std::string & text,int count)142*834a2baaSAndroid Build Coastguard Worker std::string repeat(const std::string& text, int count) {
143*834a2baaSAndroid Build Coastguard Worker     std::stringstream ss;
144*834a2baaSAndroid Build Coastguard Worker     for (int i = 0; i < count; ++i) {
145*834a2baaSAndroid Build Coastguard Worker         ss << text;
146*834a2baaSAndroid Build Coastguard Worker     }
147*834a2baaSAndroid Build Coastguard Worker     return ss.str();
148*834a2baaSAndroid Build Coastguard Worker }
149*834a2baaSAndroid Build Coastguard Worker 
150*834a2baaSAndroid Build Coastguard Worker }  // namespace minikin
151