1*834a2baaSAndroid Build Coastguard Worker /*
2*834a2baaSAndroid Build Coastguard Worker * Copyright (C) 2015 The Android Open Source Project
3*834a2baaSAndroid Build Coastguard Worker *
4*834a2baaSAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*834a2baaSAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*834a2baaSAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*834a2baaSAndroid Build Coastguard Worker *
8*834a2baaSAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*834a2baaSAndroid Build Coastguard Worker *
10*834a2baaSAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*834a2baaSAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*834a2baaSAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*834a2baaSAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*834a2baaSAndroid Build Coastguard Worker * limitations under the License.
15*834a2baaSAndroid Build Coastguard Worker */
16*834a2baaSAndroid Build Coastguard Worker
17*834a2baaSAndroid Build Coastguard Worker #include <cutils/log.h>
18*834a2baaSAndroid Build Coastguard Worker #include <unicode/utf.h>
19*834a2baaSAndroid Build Coastguard Worker #include <unicode/utf8.h>
20*834a2baaSAndroid Build Coastguard Worker
21*834a2baaSAndroid Build Coastguard Worker #include <cstdlib>
22*834a2baaSAndroid Build Coastguard Worker #include <sstream>
23*834a2baaSAndroid Build Coastguard Worker #include <string>
24*834a2baaSAndroid Build Coastguard Worker #include <vector>
25*834a2baaSAndroid Build Coastguard Worker
26*834a2baaSAndroid Build Coastguard Worker #include "minikin/U16StringPiece.h"
27*834a2baaSAndroid Build Coastguard Worker
28*834a2baaSAndroid Build Coastguard Worker namespace minikin {
29*834a2baaSAndroid Build Coastguard Worker
30*834a2baaSAndroid Build Coastguard Worker // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
31*834a2baaSAndroid Build Coastguard Worker // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
ParseUnicode(uint16_t * buf,size_t buf_size,const char * src,size_t * result_size,size_t * offset)32*834a2baaSAndroid Build Coastguard Worker void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
33*834a2baaSAndroid Build Coastguard Worker size_t* offset) {
34*834a2baaSAndroid Build Coastguard Worker size_t input_ix = 0;
35*834a2baaSAndroid Build Coastguard Worker size_t output_ix = 0;
36*834a2baaSAndroid Build Coastguard Worker bool seen_offset = false;
37*834a2baaSAndroid Build Coastguard Worker
38*834a2baaSAndroid Build Coastguard Worker while (src[input_ix] != 0) {
39*834a2baaSAndroid Build Coastguard Worker switch (src[input_ix]) {
40*834a2baaSAndroid Build Coastguard Worker case '\'':
41*834a2baaSAndroid Build Coastguard Worker // single ASCII char
42*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
43*834a2baaSAndroid Build Coastguard Worker input_ix++;
44*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
45*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
46*834a2baaSAndroid Build Coastguard Worker buf[output_ix++] = (uint16_t)src[input_ix++];
47*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
48*834a2baaSAndroid Build Coastguard Worker input_ix++;
49*834a2baaSAndroid Build Coastguard Worker break;
50*834a2baaSAndroid Build Coastguard Worker case 'u':
51*834a2baaSAndroid Build Coastguard Worker case 'U': {
52*834a2baaSAndroid Build Coastguard Worker // Unicode codepoint in hex syntax
53*834a2baaSAndroid Build Coastguard Worker input_ix++;
54*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
55*834a2baaSAndroid Build Coastguard Worker input_ix++;
56*834a2baaSAndroid Build Coastguard Worker char* endptr = (char*)src + input_ix;
57*834a2baaSAndroid Build Coastguard Worker unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
58*834a2baaSAndroid Build Coastguard Worker size_t num_hex_digits = endptr - (src + input_ix);
59*834a2baaSAndroid Build Coastguard Worker
60*834a2baaSAndroid Build Coastguard Worker // also triggers on invalid number syntax, digits = 0
61*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
62*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
63*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
64*834a2baaSAndroid Build Coastguard Worker input_ix += num_hex_digits;
65*834a2baaSAndroid Build Coastguard Worker if (U16_LENGTH(codepoint) == 1) {
66*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
67*834a2baaSAndroid Build Coastguard Worker buf[output_ix++] = codepoint;
68*834a2baaSAndroid Build Coastguard Worker } else {
69*834a2baaSAndroid Build Coastguard Worker // UTF-16 encoding
70*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
71*834a2baaSAndroid Build Coastguard Worker buf[output_ix++] = U16_LEAD(codepoint);
72*834a2baaSAndroid Build Coastguard Worker buf[output_ix++] = U16_TRAIL(codepoint);
73*834a2baaSAndroid Build Coastguard Worker }
74*834a2baaSAndroid Build Coastguard Worker break;
75*834a2baaSAndroid Build Coastguard Worker }
76*834a2baaSAndroid Build Coastguard Worker case ' ':
77*834a2baaSAndroid Build Coastguard Worker input_ix++;
78*834a2baaSAndroid Build Coastguard Worker break;
79*834a2baaSAndroid Build Coastguard Worker case '|':
80*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(seen_offset);
81*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(offset == nullptr);
82*834a2baaSAndroid Build Coastguard Worker *offset = output_ix;
83*834a2baaSAndroid Build Coastguard Worker seen_offset = true;
84*834a2baaSAndroid Build Coastguard Worker input_ix++;
85*834a2baaSAndroid Build Coastguard Worker break;
86*834a2baaSAndroid Build Coastguard Worker default:
87*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL("Unexpected Character");
88*834a2baaSAndroid Build Coastguard Worker }
89*834a2baaSAndroid Build Coastguard Worker }
90*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(result_size == nullptr);
91*834a2baaSAndroid Build Coastguard Worker *result_size = output_ix;
92*834a2baaSAndroid Build Coastguard Worker LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
93*834a2baaSAndroid Build Coastguard Worker }
94*834a2baaSAndroid Build Coastguard Worker
parseUnicodeStringWithOffset(const std::string & in,size_t * offset)95*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
96*834a2baaSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
97*834a2baaSAndroid Build Coastguard Worker size_t result_size = 0;
98*834a2baaSAndroid Build Coastguard Worker ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
99*834a2baaSAndroid Build Coastguard Worker return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
100*834a2baaSAndroid Build Coastguard Worker }
101*834a2baaSAndroid Build Coastguard Worker
parseUnicodeString(const std::string & in)102*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> parseUnicodeString(const std::string& in) {
103*834a2baaSAndroid Build Coastguard Worker return parseUnicodeStringWithOffset(in, nullptr);
104*834a2baaSAndroid Build Coastguard Worker }
105*834a2baaSAndroid Build Coastguard Worker
utf8ToUtf16(const std::string & text)106*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
107*834a2baaSAndroid Build Coastguard Worker std::vector<uint16_t> result;
108*834a2baaSAndroid Build Coastguard Worker int32_t i = 0;
109*834a2baaSAndroid Build Coastguard Worker const int32_t textLength = static_cast<int32_t>(text.size());
110*834a2baaSAndroid Build Coastguard Worker uint32_t c = 0;
111*834a2baaSAndroid Build Coastguard Worker while (i < textLength) {
112*834a2baaSAndroid Build Coastguard Worker U8_NEXT(text.c_str(), i, textLength, c);
113*834a2baaSAndroid Build Coastguard Worker if (U16_LENGTH(c) == 1) {
114*834a2baaSAndroid Build Coastguard Worker result.push_back(c);
115*834a2baaSAndroid Build Coastguard Worker } else {
116*834a2baaSAndroid Build Coastguard Worker result.push_back(U16_LEAD(c));
117*834a2baaSAndroid Build Coastguard Worker result.push_back(U16_TRAIL(c));
118*834a2baaSAndroid Build Coastguard Worker }
119*834a2baaSAndroid Build Coastguard Worker }
120*834a2baaSAndroid Build Coastguard Worker return result;
121*834a2baaSAndroid Build Coastguard Worker }
122*834a2baaSAndroid Build Coastguard Worker
utf16ToUtf8(const U16StringPiece & u16String)123*834a2baaSAndroid Build Coastguard Worker std::string utf16ToUtf8(const U16StringPiece& u16String) {
124*834a2baaSAndroid Build Coastguard Worker const uint32_t textLength = u16String.size();
125*834a2baaSAndroid Build Coastguard Worker uint32_t i = 0;
126*834a2baaSAndroid Build Coastguard Worker uint32_t c = 0;
127*834a2baaSAndroid Build Coastguard Worker
128*834a2baaSAndroid Build Coastguard Worker std::string out;
129*834a2baaSAndroid Build Coastguard Worker out.reserve(textLength * 4);
130*834a2baaSAndroid Build Coastguard Worker
131*834a2baaSAndroid Build Coastguard Worker while (i < textLength) {
132*834a2baaSAndroid Build Coastguard Worker U16_NEXT(u16String.data(), i, textLength, c);
133*834a2baaSAndroid Build Coastguard Worker
134*834a2baaSAndroid Build Coastguard Worker char buf[U8_MAX_LENGTH] = {};
135*834a2baaSAndroid Build Coastguard Worker uint32_t outIndex = 0;
136*834a2baaSAndroid Build Coastguard Worker U8_APPEND_UNSAFE(buf, outIndex, c);
137*834a2baaSAndroid Build Coastguard Worker out.append(buf, outIndex);
138*834a2baaSAndroid Build Coastguard Worker }
139*834a2baaSAndroid Build Coastguard Worker return out;
140*834a2baaSAndroid Build Coastguard Worker }
141*834a2baaSAndroid Build Coastguard Worker
repeat(const std::string & text,int count)142*834a2baaSAndroid Build Coastguard Worker std::string repeat(const std::string& text, int count) {
143*834a2baaSAndroid Build Coastguard Worker std::stringstream ss;
144*834a2baaSAndroid Build Coastguard Worker for (int i = 0; i < count; ++i) {
145*834a2baaSAndroid Build Coastguard Worker ss << text;
146*834a2baaSAndroid Build Coastguard Worker }
147*834a2baaSAndroid Build Coastguard Worker return ss.str();
148*834a2baaSAndroid Build Coastguard Worker }
149*834a2baaSAndroid Build Coastguard Worker
150*834a2baaSAndroid Build Coastguard Worker } // namespace minikin
151