xref: /aosp_15_r20/frameworks/base/tools/aapt2/text/Unicode.cpp (revision d57664e9bc4670b3ecf6748a746a57c557b6bc9e)
1*d57664e9SAndroid Build Coastguard Worker /*
2*d57664e9SAndroid Build Coastguard Worker  * Copyright (C) 2017 The Android Open Source Project
3*d57664e9SAndroid Build Coastguard Worker  *
4*d57664e9SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*d57664e9SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*d57664e9SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*d57664e9SAndroid Build Coastguard Worker  *
8*d57664e9SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*d57664e9SAndroid Build Coastguard Worker  *
10*d57664e9SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*d57664e9SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*d57664e9SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*d57664e9SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*d57664e9SAndroid Build Coastguard Worker  * limitations under the License.
15*d57664e9SAndroid Build Coastguard Worker  */
16*d57664e9SAndroid Build Coastguard Worker 
17*d57664e9SAndroid Build Coastguard Worker #include "text/Unicode.h"
18*d57664e9SAndroid Build Coastguard Worker 
19*d57664e9SAndroid Build Coastguard Worker #include <algorithm>
20*d57664e9SAndroid Build Coastguard Worker #include <array>
21*d57664e9SAndroid Build Coastguard Worker 
22*d57664e9SAndroid Build Coastguard Worker #include "text/Utf8Iterator.h"
23*d57664e9SAndroid Build Coastguard Worker 
24*d57664e9SAndroid Build Coastguard Worker using ::android::StringPiece;
25*d57664e9SAndroid Build Coastguard Worker 
26*d57664e9SAndroid Build Coastguard Worker namespace aapt {
27*d57664e9SAndroid Build Coastguard Worker namespace text {
28*d57664e9SAndroid Build Coastguard Worker 
29*d57664e9SAndroid Build Coastguard Worker namespace {
30*d57664e9SAndroid Build Coastguard Worker 
31*d57664e9SAndroid Build Coastguard Worker struct CharacterProperties {
32*d57664e9SAndroid Build Coastguard Worker   enum : uint32_t {
33*d57664e9SAndroid Build Coastguard Worker     kXidStart = 1 << 0,
34*d57664e9SAndroid Build Coastguard Worker     kXidContinue = 1 << 1,
35*d57664e9SAndroid Build Coastguard Worker   };
36*d57664e9SAndroid Build Coastguard Worker 
37*d57664e9SAndroid Build Coastguard Worker   char32_t first_char;
38*d57664e9SAndroid Build Coastguard Worker   char32_t last_char;
39*d57664e9SAndroid Build Coastguard Worker   uint32_t properties;
40*d57664e9SAndroid Build Coastguard Worker };
41*d57664e9SAndroid Build Coastguard Worker 
42*d57664e9SAndroid Build Coastguard Worker // Incude the generated data table.
43*d57664e9SAndroid Build Coastguard Worker #include "text/Unicode_data.cpp"
44*d57664e9SAndroid Build Coastguard Worker 
CompareCharacterProperties(const CharacterProperties & a,char32_t codepoint)45*d57664e9SAndroid Build Coastguard Worker bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
46*d57664e9SAndroid Build Coastguard Worker   return a.last_char < codepoint;
47*d57664e9SAndroid Build Coastguard Worker }
48*d57664e9SAndroid Build Coastguard Worker 
FindCharacterProperties(char32_t codepoint)49*d57664e9SAndroid Build Coastguard Worker uint32_t FindCharacterProperties(char32_t codepoint) {
50*d57664e9SAndroid Build Coastguard Worker   const auto iter_end = sCharacterProperties.end();
51*d57664e9SAndroid Build Coastguard Worker   const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
52*d57664e9SAndroid Build Coastguard Worker                                      CompareCharacterProperties);
53*d57664e9SAndroid Build Coastguard Worker   if (iter != iter_end && codepoint >= iter->first_char) {
54*d57664e9SAndroid Build Coastguard Worker     return iter->properties;
55*d57664e9SAndroid Build Coastguard Worker   }
56*d57664e9SAndroid Build Coastguard Worker   return 0u;
57*d57664e9SAndroid Build Coastguard Worker }
58*d57664e9SAndroid Build Coastguard Worker 
59*d57664e9SAndroid Build Coastguard Worker }  // namespace
60*d57664e9SAndroid Build Coastguard Worker 
IsXidStart(char32_t codepoint)61*d57664e9SAndroid Build Coastguard Worker bool IsXidStart(char32_t codepoint) {
62*d57664e9SAndroid Build Coastguard Worker   return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
63*d57664e9SAndroid Build Coastguard Worker }
64*d57664e9SAndroid Build Coastguard Worker 
IsXidContinue(char32_t codepoint)65*d57664e9SAndroid Build Coastguard Worker bool IsXidContinue(char32_t codepoint) {
66*d57664e9SAndroid Build Coastguard Worker   return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
67*d57664e9SAndroid Build Coastguard Worker }
68*d57664e9SAndroid Build Coastguard Worker 
69*d57664e9SAndroid Build Coastguard Worker // Hardcode the White_Space characters since they are few and the external/icu project doesn't
70*d57664e9SAndroid Build Coastguard Worker // list them as data files to parse.
71*d57664e9SAndroid Build Coastguard Worker // Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
IsWhitespace(char32_t codepoint)72*d57664e9SAndroid Build Coastguard Worker bool IsWhitespace(char32_t codepoint) {
73*d57664e9SAndroid Build Coastguard Worker   return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
74*d57664e9SAndroid Build Coastguard Worker          (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
75*d57664e9SAndroid Build Coastguard Worker          (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
76*d57664e9SAndroid Build Coastguard Worker          (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
77*d57664e9SAndroid Build Coastguard Worker          (codepoint == 0x3000);
78*d57664e9SAndroid Build Coastguard Worker }
79*d57664e9SAndroid Build Coastguard Worker 
IsJavaIdentifier(StringPiece str)80*d57664e9SAndroid Build Coastguard Worker bool IsJavaIdentifier(StringPiece str) {
81*d57664e9SAndroid Build Coastguard Worker   Utf8Iterator iter(str);
82*d57664e9SAndroid Build Coastguard Worker 
83*d57664e9SAndroid Build Coastguard Worker   // Check the first character.
84*d57664e9SAndroid Build Coastguard Worker   if (!iter.HasNext()) {
85*d57664e9SAndroid Build Coastguard Worker     return false;
86*d57664e9SAndroid Build Coastguard Worker   }
87*d57664e9SAndroid Build Coastguard Worker 
88*d57664e9SAndroid Build Coastguard Worker   const char32_t first_codepoint = iter.Next();
89*d57664e9SAndroid Build Coastguard Worker   if (!IsXidStart(first_codepoint) && first_codepoint != U'_' && first_codepoint != U'$') {
90*d57664e9SAndroid Build Coastguard Worker     return false;
91*d57664e9SAndroid Build Coastguard Worker   }
92*d57664e9SAndroid Build Coastguard Worker 
93*d57664e9SAndroid Build Coastguard Worker   while (iter.HasNext()) {
94*d57664e9SAndroid Build Coastguard Worker     const char32_t codepoint = iter.Next();
95*d57664e9SAndroid Build Coastguard Worker     if (!IsXidContinue(codepoint) && codepoint != U'$') {
96*d57664e9SAndroid Build Coastguard Worker       return false;
97*d57664e9SAndroid Build Coastguard Worker     }
98*d57664e9SAndroid Build Coastguard Worker   }
99*d57664e9SAndroid Build Coastguard Worker   return true;
100*d57664e9SAndroid Build Coastguard Worker }
101*d57664e9SAndroid Build Coastguard Worker 
IsValidResourceEntryName(StringPiece str)102*d57664e9SAndroid Build Coastguard Worker bool IsValidResourceEntryName(StringPiece str) {
103*d57664e9SAndroid Build Coastguard Worker   Utf8Iterator iter(str);
104*d57664e9SAndroid Build Coastguard Worker 
105*d57664e9SAndroid Build Coastguard Worker   // Check the first character.
106*d57664e9SAndroid Build Coastguard Worker   if (!iter.HasNext()) {
107*d57664e9SAndroid Build Coastguard Worker     return false;
108*d57664e9SAndroid Build Coastguard Worker   }
109*d57664e9SAndroid Build Coastguard Worker 
110*d57664e9SAndroid Build Coastguard Worker   // Resources are allowed to start with '_'
111*d57664e9SAndroid Build Coastguard Worker   const char32_t first_codepoint = iter.Next();
112*d57664e9SAndroid Build Coastguard Worker   if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
113*d57664e9SAndroid Build Coastguard Worker     return false;
114*d57664e9SAndroid Build Coastguard Worker   }
115*d57664e9SAndroid Build Coastguard Worker 
116*d57664e9SAndroid Build Coastguard Worker   while (iter.HasNext()) {
117*d57664e9SAndroid Build Coastguard Worker     const char32_t codepoint = iter.Next();
118*d57664e9SAndroid Build Coastguard Worker     if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
119*d57664e9SAndroid Build Coastguard Worker       return false;
120*d57664e9SAndroid Build Coastguard Worker     }
121*d57664e9SAndroid Build Coastguard Worker   }
122*d57664e9SAndroid Build Coastguard Worker   return true;
123*d57664e9SAndroid Build Coastguard Worker }
124*d57664e9SAndroid Build Coastguard Worker 
125*d57664e9SAndroid Build Coastguard Worker }  // namespace text
126*d57664e9SAndroid Build Coastguard Worker }  // namespace aapt
127