xref: /aosp_15_r20/art/libdexfile/dex/descriptors_names.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1*795d594fSAndroid Build Coastguard Worker /*
2*795d594fSAndroid Build Coastguard Worker  * Copyright (C) 2011 The Android Open Source Project
3*795d594fSAndroid Build Coastguard Worker  *
4*795d594fSAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*795d594fSAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*795d594fSAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*795d594fSAndroid Build Coastguard Worker  *
8*795d594fSAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*795d594fSAndroid Build Coastguard Worker  *
10*795d594fSAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*795d594fSAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*795d594fSAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*795d594fSAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*795d594fSAndroid Build Coastguard Worker  * limitations under the License.
15*795d594fSAndroid Build Coastguard Worker  */
16*795d594fSAndroid Build Coastguard Worker 
17*795d594fSAndroid Build Coastguard Worker #include "descriptors_names.h"
18*795d594fSAndroid Build Coastguard Worker 
19*795d594fSAndroid Build Coastguard Worker #include <algorithm>
20*795d594fSAndroid Build Coastguard Worker 
21*795d594fSAndroid Build Coastguard Worker #include "android-base/stringprintf.h"
22*795d594fSAndroid Build Coastguard Worker #include "android-base/strings.h"
23*795d594fSAndroid Build Coastguard Worker 
24*795d594fSAndroid Build Coastguard Worker #include "base/macros.h"
25*795d594fSAndroid Build Coastguard Worker #include "dex/utf-inl.h"
26*795d594fSAndroid Build Coastguard Worker 
27*795d594fSAndroid Build Coastguard Worker namespace art {
28*795d594fSAndroid Build Coastguard Worker 
29*795d594fSAndroid Build Coastguard Worker using android::base::StringAppendF;
30*795d594fSAndroid Build Coastguard Worker 
AppendPrettyDescriptor(const char * descriptor,std::string * result)31*795d594fSAndroid Build Coastguard Worker void AppendPrettyDescriptor(const char* descriptor, std::string* result) {
32*795d594fSAndroid Build Coastguard Worker   // Count the number of '['s to get the dimensionality.
33*795d594fSAndroid Build Coastguard Worker   const char* c = descriptor;
34*795d594fSAndroid Build Coastguard Worker   size_t dim = 0;
35*795d594fSAndroid Build Coastguard Worker   while (*c == '[') {
36*795d594fSAndroid Build Coastguard Worker     dim++;
37*795d594fSAndroid Build Coastguard Worker     c++;
38*795d594fSAndroid Build Coastguard Worker   }
39*795d594fSAndroid Build Coastguard Worker 
40*795d594fSAndroid Build Coastguard Worker   // Reference or primitive?
41*795d594fSAndroid Build Coastguard Worker   if (*c == 'L') {
42*795d594fSAndroid Build Coastguard Worker     // "[[La/b/C;" -> "a.b.C[][]".
43*795d594fSAndroid Build Coastguard Worker     std::string_view stripped = std::string_view(c + 1);  // Skip the 'L'...
44*795d594fSAndroid Build Coastguard Worker     if (stripped.ends_with(';')) {
45*795d594fSAndroid Build Coastguard Worker       stripped.remove_suffix(1u);  // ...and remove the semicolon.
46*795d594fSAndroid Build Coastguard Worker     }
47*795d594fSAndroid Build Coastguard Worker     // At this point, `stripped` is of the form "fully/qualified/Type".
48*795d594fSAndroid Build Coastguard Worker     // Append it to the `*result` and replace all '/'s with '.' in place.
49*795d594fSAndroid Build Coastguard Worker     size_t old_size = result->size();
50*795d594fSAndroid Build Coastguard Worker     *result += stripped;
51*795d594fSAndroid Build Coastguard Worker     std::replace(result->begin() + old_size, result->end(), '/', '.');
52*795d594fSAndroid Build Coastguard Worker   } else {
53*795d594fSAndroid Build Coastguard Worker     // "[[B" -> "byte[][]".
54*795d594fSAndroid Build Coastguard Worker     std::string_view pretty_primitive;
55*795d594fSAndroid Build Coastguard Worker     switch (*c) {
56*795d594fSAndroid Build Coastguard Worker       case 'B':
57*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "byte";
58*795d594fSAndroid Build Coastguard Worker         break;
59*795d594fSAndroid Build Coastguard Worker       case 'C':
60*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "char";
61*795d594fSAndroid Build Coastguard Worker         break;
62*795d594fSAndroid Build Coastguard Worker       case 'D':
63*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "double";
64*795d594fSAndroid Build Coastguard Worker         break;
65*795d594fSAndroid Build Coastguard Worker       case 'F':
66*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "float";
67*795d594fSAndroid Build Coastguard Worker         break;
68*795d594fSAndroid Build Coastguard Worker       case 'I':
69*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "int";
70*795d594fSAndroid Build Coastguard Worker         break;
71*795d594fSAndroid Build Coastguard Worker       case 'J':
72*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "long";
73*795d594fSAndroid Build Coastguard Worker         break;
74*795d594fSAndroid Build Coastguard Worker       case 'S':
75*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "short";
76*795d594fSAndroid Build Coastguard Worker         break;
77*795d594fSAndroid Build Coastguard Worker       case 'Z':
78*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "boolean";
79*795d594fSAndroid Build Coastguard Worker         break;
80*795d594fSAndroid Build Coastguard Worker       case 'V':
81*795d594fSAndroid Build Coastguard Worker         pretty_primitive = "void";
82*795d594fSAndroid Build Coastguard Worker         break;  // Used when decoding return types.
83*795d594fSAndroid Build Coastguard Worker       default: result->append(descriptor); return;
84*795d594fSAndroid Build Coastguard Worker     }
85*795d594fSAndroid Build Coastguard Worker     result->append(pretty_primitive);
86*795d594fSAndroid Build Coastguard Worker   }
87*795d594fSAndroid Build Coastguard Worker 
88*795d594fSAndroid Build Coastguard Worker   // Finally, add 'dim' "[]" pairs:
89*795d594fSAndroid Build Coastguard Worker   for (size_t i = 0; i < dim; ++i) {
90*795d594fSAndroid Build Coastguard Worker     result->append("[]");
91*795d594fSAndroid Build Coastguard Worker   }
92*795d594fSAndroid Build Coastguard Worker }
93*795d594fSAndroid Build Coastguard Worker 
PrettyDescriptor(const char * descriptor)94*795d594fSAndroid Build Coastguard Worker std::string PrettyDescriptor(const char* descriptor) {
95*795d594fSAndroid Build Coastguard Worker   std::string result;
96*795d594fSAndroid Build Coastguard Worker   AppendPrettyDescriptor(descriptor, &result);
97*795d594fSAndroid Build Coastguard Worker   return result;
98*795d594fSAndroid Build Coastguard Worker }
99*795d594fSAndroid Build Coastguard Worker 
InversePrettyDescriptor(const std::string & pretty_descriptor)100*795d594fSAndroid Build Coastguard Worker std::string InversePrettyDescriptor(const std::string& pretty_descriptor) {
101*795d594fSAndroid Build Coastguard Worker   std::string result;
102*795d594fSAndroid Build Coastguard Worker 
103*795d594fSAndroid Build Coastguard Worker   // Used to determine the length of the descriptor without trailing "[]"s.
104*795d594fSAndroid Build Coastguard Worker   size_t l = pretty_descriptor.length();
105*795d594fSAndroid Build Coastguard Worker 
106*795d594fSAndroid Build Coastguard Worker   // Determine dimensionality, and append the necessary leading '['s.
107*795d594fSAndroid Build Coastguard Worker   size_t dim = 0;
108*795d594fSAndroid Build Coastguard Worker   size_t pos = 0;
109*795d594fSAndroid Build Coastguard Worker   static const std::string array_indicator = "[]";
110*795d594fSAndroid Build Coastguard Worker   while ((pos = pretty_descriptor.find(array_indicator, pos)) != std::string::npos) {
111*795d594fSAndroid Build Coastguard Worker     if (dim == 0) {
112*795d594fSAndroid Build Coastguard Worker       l = pos;
113*795d594fSAndroid Build Coastguard Worker     }
114*795d594fSAndroid Build Coastguard Worker     ++dim;
115*795d594fSAndroid Build Coastguard Worker     pos += array_indicator.length();
116*795d594fSAndroid Build Coastguard Worker   }
117*795d594fSAndroid Build Coastguard Worker   for (size_t i = 0; i < dim; ++i) {
118*795d594fSAndroid Build Coastguard Worker     result += '[';
119*795d594fSAndroid Build Coastguard Worker   }
120*795d594fSAndroid Build Coastguard Worker 
121*795d594fSAndroid Build Coastguard Worker   // temp_descriptor is now in the form of "some.pretty.Type" or "primitive".
122*795d594fSAndroid Build Coastguard Worker   std::string temp_descriptor(pretty_descriptor, 0, l);
123*795d594fSAndroid Build Coastguard Worker   if (temp_descriptor == "byte") {
124*795d594fSAndroid Build Coastguard Worker     result += 'B';
125*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "char") {
126*795d594fSAndroid Build Coastguard Worker     result += 'C';
127*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "double") {
128*795d594fSAndroid Build Coastguard Worker     result += 'D';
129*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "float") {
130*795d594fSAndroid Build Coastguard Worker     result += 'F';
131*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "int") {
132*795d594fSAndroid Build Coastguard Worker     result += 'I';
133*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "long") {
134*795d594fSAndroid Build Coastguard Worker     result += 'J';
135*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "short") {
136*795d594fSAndroid Build Coastguard Worker     result += 'S';
137*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "boolean") {
138*795d594fSAndroid Build Coastguard Worker     result += 'Z';
139*795d594fSAndroid Build Coastguard Worker   } else if (temp_descriptor == "void") {
140*795d594fSAndroid Build Coastguard Worker     result += 'V';
141*795d594fSAndroid Build Coastguard Worker   } else {
142*795d594fSAndroid Build Coastguard Worker     result += 'L';
143*795d594fSAndroid Build Coastguard Worker     std::replace(temp_descriptor.begin(), temp_descriptor.end(), '.', '/');
144*795d594fSAndroid Build Coastguard Worker     result += temp_descriptor;
145*795d594fSAndroid Build Coastguard Worker     result += ';';
146*795d594fSAndroid Build Coastguard Worker   }
147*795d594fSAndroid Build Coastguard Worker   return result;
148*795d594fSAndroid Build Coastguard Worker }
149*795d594fSAndroid Build Coastguard Worker 
GetJniShortName(const std::string & class_descriptor,const std::string & method)150*795d594fSAndroid Build Coastguard Worker std::string GetJniShortName(const std::string& class_descriptor, const std::string& method) {
151*795d594fSAndroid Build Coastguard Worker   // Remove the leading 'L' and trailing ';'...
152*795d594fSAndroid Build Coastguard Worker   std::string class_name(class_descriptor);
153*795d594fSAndroid Build Coastguard Worker   CHECK_EQ(class_name[0], 'L') << class_name;
154*795d594fSAndroid Build Coastguard Worker   CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
155*795d594fSAndroid Build Coastguard Worker   class_name.erase(0, 1);
156*795d594fSAndroid Build Coastguard Worker   class_name.erase(class_name.size() - 1, 1);
157*795d594fSAndroid Build Coastguard Worker 
158*795d594fSAndroid Build Coastguard Worker   std::string short_name;
159*795d594fSAndroid Build Coastguard Worker   short_name += "Java_";
160*795d594fSAndroid Build Coastguard Worker   short_name += MangleForJni(class_name);
161*795d594fSAndroid Build Coastguard Worker   short_name += "_";
162*795d594fSAndroid Build Coastguard Worker   short_name += MangleForJni(method);
163*795d594fSAndroid Build Coastguard Worker   return short_name;
164*795d594fSAndroid Build Coastguard Worker }
165*795d594fSAndroid Build Coastguard Worker 
166*795d594fSAndroid Build Coastguard Worker // See http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/design.html#wp615 for the full rules.
MangleForJni(const std::string & s)167*795d594fSAndroid Build Coastguard Worker std::string MangleForJni(const std::string& s) {
168*795d594fSAndroid Build Coastguard Worker   std::string result;
169*795d594fSAndroid Build Coastguard Worker   size_t char_count = CountModifiedUtf8Chars(s.c_str());
170*795d594fSAndroid Build Coastguard Worker   const char* cp = &s[0];
171*795d594fSAndroid Build Coastguard Worker   for (size_t i = 0; i < char_count; ++i) {
172*795d594fSAndroid Build Coastguard Worker     uint32_t ch = GetUtf16FromUtf8(&cp);
173*795d594fSAndroid Build Coastguard Worker     if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
174*795d594fSAndroid Build Coastguard Worker       result.push_back(ch);
175*795d594fSAndroid Build Coastguard Worker     } else if (ch == '.' || ch == '/') {
176*795d594fSAndroid Build Coastguard Worker       result += "_";
177*795d594fSAndroid Build Coastguard Worker     } else if (ch == '_') {
178*795d594fSAndroid Build Coastguard Worker       result += "_1";
179*795d594fSAndroid Build Coastguard Worker     } else if (ch == ';') {
180*795d594fSAndroid Build Coastguard Worker       result += "_2";
181*795d594fSAndroid Build Coastguard Worker     } else if (ch == '[') {
182*795d594fSAndroid Build Coastguard Worker       result += "_3";
183*795d594fSAndroid Build Coastguard Worker     } else {
184*795d594fSAndroid Build Coastguard Worker       const uint16_t leading = GetLeadingUtf16Char(ch);
185*795d594fSAndroid Build Coastguard Worker       const uint32_t trailing = GetTrailingUtf16Char(ch);
186*795d594fSAndroid Build Coastguard Worker 
187*795d594fSAndroid Build Coastguard Worker       StringAppendF(&result, "_0%04x", leading);
188*795d594fSAndroid Build Coastguard Worker       if (trailing != 0) {
189*795d594fSAndroid Build Coastguard Worker         StringAppendF(&result, "_0%04x", trailing);
190*795d594fSAndroid Build Coastguard Worker       }
191*795d594fSAndroid Build Coastguard Worker     }
192*795d594fSAndroid Build Coastguard Worker   }
193*795d594fSAndroid Build Coastguard Worker   return result;
194*795d594fSAndroid Build Coastguard Worker }
195*795d594fSAndroid Build Coastguard Worker 
DotToDescriptor(const char * class_name)196*795d594fSAndroid Build Coastguard Worker std::string DotToDescriptor(const char* class_name) {
197*795d594fSAndroid Build Coastguard Worker   std::string descriptor(class_name);
198*795d594fSAndroid Build Coastguard Worker   std::replace(descriptor.begin(), descriptor.end(), '.', '/');
199*795d594fSAndroid Build Coastguard Worker   if (descriptor.length() > 0 && descriptor[0] != '[') {
200*795d594fSAndroid Build Coastguard Worker     descriptor = "L" + descriptor + ";";
201*795d594fSAndroid Build Coastguard Worker   }
202*795d594fSAndroid Build Coastguard Worker   return descriptor;
203*795d594fSAndroid Build Coastguard Worker }
204*795d594fSAndroid Build Coastguard Worker 
DescriptorToDot(const char * descriptor)205*795d594fSAndroid Build Coastguard Worker std::string DescriptorToDot(const char* descriptor) {
206*795d594fSAndroid Build Coastguard Worker   size_t length = strlen(descriptor);
207*795d594fSAndroid Build Coastguard Worker   if (length > 1) {
208*795d594fSAndroid Build Coastguard Worker     if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
209*795d594fSAndroid Build Coastguard Worker       // Descriptors have the leading 'L' and trailing ';' stripped.
210*795d594fSAndroid Build Coastguard Worker       std::string result(descriptor + 1, length - 2);
211*795d594fSAndroid Build Coastguard Worker       std::replace(result.begin(), result.end(), '/', '.');
212*795d594fSAndroid Build Coastguard Worker       return result;
213*795d594fSAndroid Build Coastguard Worker     } else {
214*795d594fSAndroid Build Coastguard Worker       // For arrays the 'L' and ';' remain intact.
215*795d594fSAndroid Build Coastguard Worker       std::string result(descriptor);
216*795d594fSAndroid Build Coastguard Worker       std::replace(result.begin(), result.end(), '/', '.');
217*795d594fSAndroid Build Coastguard Worker       return result;
218*795d594fSAndroid Build Coastguard Worker     }
219*795d594fSAndroid Build Coastguard Worker   }
220*795d594fSAndroid Build Coastguard Worker   // Do nothing for non-class/array descriptors.
221*795d594fSAndroid Build Coastguard Worker   return descriptor;
222*795d594fSAndroid Build Coastguard Worker }
223*795d594fSAndroid Build Coastguard Worker 
DescriptorToName(const char * descriptor)224*795d594fSAndroid Build Coastguard Worker std::string DescriptorToName(const char* descriptor) {
225*795d594fSAndroid Build Coastguard Worker   size_t length = strlen(descriptor);
226*795d594fSAndroid Build Coastguard Worker   if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
227*795d594fSAndroid Build Coastguard Worker     std::string result(descriptor + 1, length - 2);
228*795d594fSAndroid Build Coastguard Worker     return result;
229*795d594fSAndroid Build Coastguard Worker   }
230*795d594fSAndroid Build Coastguard Worker   return descriptor;
231*795d594fSAndroid Build Coastguard Worker }
232*795d594fSAndroid Build Coastguard Worker 
233*795d594fSAndroid Build Coastguard Worker // Helper for IsValidPartOfMemberNameUtf8(), a bit vector indicating valid low ascii.
234*795d594fSAndroid Build Coastguard Worker static constexpr uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
235*795d594fSAndroid Build Coastguard Worker   0x00000000,  // 00..1f low control characters; nothing valid
236*795d594fSAndroid Build Coastguard Worker   0x03ff2011,  // 20..3f space, digits and symbols; valid: ' ', '0'..'9', '$', '-'
237*795d594fSAndroid Build Coastguard Worker   0x87fffffe,  // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
238*795d594fSAndroid Build Coastguard Worker   0x07fffffe   // 60..7f lowercase etc.; valid: 'a'..'z'
239*795d594fSAndroid Build Coastguard Worker };
240*795d594fSAndroid Build Coastguard Worker 
241*795d594fSAndroid Build Coastguard Worker // Helper for IsValidPartOfMemberNameUtf8(); do not call directly.
242*795d594fSAndroid Build Coastguard Worker COLD_ATTR
IsValidPartOfMemberNameUtf8Slow(const char ** pUtf8Ptr)243*795d594fSAndroid Build Coastguard Worker static bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
244*795d594fSAndroid Build Coastguard Worker   /*
245*795d594fSAndroid Build Coastguard Worker    * It's a multibyte encoded character. Decode it and analyze. We
246*795d594fSAndroid Build Coastguard Worker    * accept anything that isn't:
247*795d594fSAndroid Build Coastguard Worker    *   - an improperly encoded low value
248*795d594fSAndroid Build Coastguard Worker    *   - an improper surrogate pair
249*795d594fSAndroid Build Coastguard Worker    *   - an encoded '\0'
250*795d594fSAndroid Build Coastguard Worker    *   - a C1 control character U+0080..U+009f
251*795d594fSAndroid Build Coastguard Worker    *   - a format character U+200b..U+200f, U+2028..U+202e
252*795d594fSAndroid Build Coastguard Worker    *   - a special character U+fff0..U+ffff
253*795d594fSAndroid Build Coastguard Worker    * Prior to DEX format version 040, we also excluded some of the Unicode
254*795d594fSAndroid Build Coastguard Worker    * space characters:
255*795d594fSAndroid Build Coastguard Worker    *   - U+00a0, U+2000..U+200a, U+202f
256*795d594fSAndroid Build Coastguard Worker    * This is all specified in the dex format document.
257*795d594fSAndroid Build Coastguard Worker    */
258*795d594fSAndroid Build Coastguard Worker 
259*795d594fSAndroid Build Coastguard Worker   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
260*795d594fSAndroid Build Coastguard Worker   const uint16_t leading = GetLeadingUtf16Char(pair);
261*795d594fSAndroid Build Coastguard Worker 
262*795d594fSAndroid Build Coastguard Worker   // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
263*795d594fSAndroid Build Coastguard Worker   // No further checks are necessary because 4 byte sequences span code
264*795d594fSAndroid Build Coastguard Worker   // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
265*795d594fSAndroid Build Coastguard Worker   // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
266*795d594fSAndroid Build Coastguard Worker   // the surrogate halves are valid and well formed in this instance.
267*795d594fSAndroid Build Coastguard Worker   if (GetTrailingUtf16Char(pair) != 0) {
268*795d594fSAndroid Build Coastguard Worker     return true;
269*795d594fSAndroid Build Coastguard Worker   }
270*795d594fSAndroid Build Coastguard Worker 
271*795d594fSAndroid Build Coastguard Worker 
272*795d594fSAndroid Build Coastguard Worker   // We've encountered a one, two or three byte UTF-8 sequence. The
273*795d594fSAndroid Build Coastguard Worker   // three byte UTF-8 sequence could be one half of a surrogate pair.
274*795d594fSAndroid Build Coastguard Worker   switch (leading >> 8) {
275*795d594fSAndroid Build Coastguard Worker     case 0x00:
276*795d594fSAndroid Build Coastguard Worker       // It's in the range that has C1 control characters.
277*795d594fSAndroid Build Coastguard Worker       return (leading >= 0x00a0);
278*795d594fSAndroid Build Coastguard Worker     case 0xd8:
279*795d594fSAndroid Build Coastguard Worker     case 0xd9:
280*795d594fSAndroid Build Coastguard Worker     case 0xda:
281*795d594fSAndroid Build Coastguard Worker     case 0xdb:
282*795d594fSAndroid Build Coastguard Worker       {
283*795d594fSAndroid Build Coastguard Worker         // We found a three byte sequence encoding one half of a surrogate.
284*795d594fSAndroid Build Coastguard Worker         // Look for the other half.
285*795d594fSAndroid Build Coastguard Worker         const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
286*795d594fSAndroid Build Coastguard Worker         const uint16_t trailing = GetLeadingUtf16Char(pair2);
287*795d594fSAndroid Build Coastguard Worker 
288*795d594fSAndroid Build Coastguard Worker         return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
289*795d594fSAndroid Build Coastguard Worker       }
290*795d594fSAndroid Build Coastguard Worker     case 0xdc:
291*795d594fSAndroid Build Coastguard Worker     case 0xdd:
292*795d594fSAndroid Build Coastguard Worker     case 0xde:
293*795d594fSAndroid Build Coastguard Worker     case 0xdf:
294*795d594fSAndroid Build Coastguard Worker       // It's a trailing surrogate, which is not valid at this point.
295*795d594fSAndroid Build Coastguard Worker       return false;
296*795d594fSAndroid Build Coastguard Worker     case 0x20:
297*795d594fSAndroid Build Coastguard Worker     case 0xff:
298*795d594fSAndroid Build Coastguard Worker       // It's in the range that has format characters and specials.
299*795d594fSAndroid Build Coastguard Worker       switch (leading & 0xfff8) {
300*795d594fSAndroid Build Coastguard Worker         case 0x2008:
301*795d594fSAndroid Build Coastguard Worker           return (leading <= 0x200a);
302*795d594fSAndroid Build Coastguard Worker         case 0x2028:
303*795d594fSAndroid Build Coastguard Worker           return (leading == 0x202f);
304*795d594fSAndroid Build Coastguard Worker         case 0xfff0:
305*795d594fSAndroid Build Coastguard Worker         case 0xfff8:
306*795d594fSAndroid Build Coastguard Worker           return false;
307*795d594fSAndroid Build Coastguard Worker       }
308*795d594fSAndroid Build Coastguard Worker       return true;
309*795d594fSAndroid Build Coastguard Worker     default:
310*795d594fSAndroid Build Coastguard Worker       return true;
311*795d594fSAndroid Build Coastguard Worker   }
312*795d594fSAndroid Build Coastguard Worker }
313*795d594fSAndroid Build Coastguard Worker 
314*795d594fSAndroid Build Coastguard Worker /* Return whether the pointed-at modified-UTF-8 encoded character is
315*795d594fSAndroid Build Coastguard Worker  * valid as part of a member name, updating the pointer to point past
316*795d594fSAndroid Build Coastguard Worker  * the consumed character. This will consume two encoded UTF-16 code
317*795d594fSAndroid Build Coastguard Worker  * points if the character is encoded as a surrogate pair. Also, if
318*795d594fSAndroid Build Coastguard Worker  * this function returns false, then the given pointer may only have
319*795d594fSAndroid Build Coastguard Worker  * been partially advanced.
320*795d594fSAndroid Build Coastguard Worker  */
321*795d594fSAndroid Build Coastguard Worker ALWAYS_INLINE
IsValidPartOfMemberNameUtf8(const char ** pUtf8Ptr)322*795d594fSAndroid Build Coastguard Worker static bool IsValidPartOfMemberNameUtf8(const char** pUtf8Ptr) {
323*795d594fSAndroid Build Coastguard Worker   uint8_t c = (uint8_t) **pUtf8Ptr;
324*795d594fSAndroid Build Coastguard Worker   if (LIKELY(c <= 0x7f)) {
325*795d594fSAndroid Build Coastguard Worker     // It's low-ascii, so check the table.
326*795d594fSAndroid Build Coastguard Worker     uint32_t wordIdx = c >> 5;
327*795d594fSAndroid Build Coastguard Worker     uint32_t bitIdx = c & 0x1f;
328*795d594fSAndroid Build Coastguard Worker     (*pUtf8Ptr)++;
329*795d594fSAndroid Build Coastguard Worker     return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
330*795d594fSAndroid Build Coastguard Worker   }
331*795d594fSAndroid Build Coastguard Worker 
332*795d594fSAndroid Build Coastguard Worker   // It's a multibyte encoded character. Call a non-inline function
333*795d594fSAndroid Build Coastguard Worker   // for the heavy lifting.
334*795d594fSAndroid Build Coastguard Worker   return IsValidPartOfMemberNameUtf8Slow(pUtf8Ptr);
335*795d594fSAndroid Build Coastguard Worker }
336*795d594fSAndroid Build Coastguard Worker 
IsValidMemberName(const char * s)337*795d594fSAndroid Build Coastguard Worker bool IsValidMemberName(const char* s) {
338*795d594fSAndroid Build Coastguard Worker   bool angle_name = false;
339*795d594fSAndroid Build Coastguard Worker 
340*795d594fSAndroid Build Coastguard Worker   switch (*s) {
341*795d594fSAndroid Build Coastguard Worker     case '\0':
342*795d594fSAndroid Build Coastguard Worker       // The empty string is not a valid name.
343*795d594fSAndroid Build Coastguard Worker       return false;
344*795d594fSAndroid Build Coastguard Worker     case '<':
345*795d594fSAndroid Build Coastguard Worker       angle_name = true;
346*795d594fSAndroid Build Coastguard Worker       s++;
347*795d594fSAndroid Build Coastguard Worker       break;
348*795d594fSAndroid Build Coastguard Worker   }
349*795d594fSAndroid Build Coastguard Worker 
350*795d594fSAndroid Build Coastguard Worker   while (true) {
351*795d594fSAndroid Build Coastguard Worker     switch (*s) {
352*795d594fSAndroid Build Coastguard Worker       case '\0':
353*795d594fSAndroid Build Coastguard Worker         return !angle_name;
354*795d594fSAndroid Build Coastguard Worker       case '>':
355*795d594fSAndroid Build Coastguard Worker         return angle_name && s[1] == '\0';
356*795d594fSAndroid Build Coastguard Worker     }
357*795d594fSAndroid Build Coastguard Worker 
358*795d594fSAndroid Build Coastguard Worker     if (!IsValidPartOfMemberNameUtf8(&s)) {
359*795d594fSAndroid Build Coastguard Worker       return false;
360*795d594fSAndroid Build Coastguard Worker     }
361*795d594fSAndroid Build Coastguard Worker   }
362*795d594fSAndroid Build Coastguard Worker }
363*795d594fSAndroid Build Coastguard Worker 
364*795d594fSAndroid Build Coastguard Worker enum ClassNameType { kName, kDescriptor };
365*795d594fSAndroid Build Coastguard Worker template<ClassNameType kType, char kSeparator>
IsValidClassName(const char * s)366*795d594fSAndroid Build Coastguard Worker static bool IsValidClassName(const char* s) {
367*795d594fSAndroid Build Coastguard Worker   int arrayCount = 0;
368*795d594fSAndroid Build Coastguard Worker   while (*s == '[') {
369*795d594fSAndroid Build Coastguard Worker     arrayCount++;
370*795d594fSAndroid Build Coastguard Worker     s++;
371*795d594fSAndroid Build Coastguard Worker   }
372*795d594fSAndroid Build Coastguard Worker 
373*795d594fSAndroid Build Coastguard Worker   if (arrayCount > 255) {
374*795d594fSAndroid Build Coastguard Worker     // Arrays may have no more than 255 dimensions.
375*795d594fSAndroid Build Coastguard Worker     return false;
376*795d594fSAndroid Build Coastguard Worker   }
377*795d594fSAndroid Build Coastguard Worker 
378*795d594fSAndroid Build Coastguard Worker   ClassNameType type = kType;
379*795d594fSAndroid Build Coastguard Worker   if (type != kDescriptor && arrayCount != 0) {
380*795d594fSAndroid Build Coastguard Worker     /*
381*795d594fSAndroid Build Coastguard Worker      * If we're looking at an array of some sort, then it doesn't
382*795d594fSAndroid Build Coastguard Worker      * matter if what is being asked for is a class name; the
383*795d594fSAndroid Build Coastguard Worker      * format looks the same as a type descriptor in that case, so
384*795d594fSAndroid Build Coastguard Worker      * treat it as such.
385*795d594fSAndroid Build Coastguard Worker      */
386*795d594fSAndroid Build Coastguard Worker     type = kDescriptor;
387*795d594fSAndroid Build Coastguard Worker   }
388*795d594fSAndroid Build Coastguard Worker 
389*795d594fSAndroid Build Coastguard Worker   if (type == kDescriptor) {
390*795d594fSAndroid Build Coastguard Worker     /*
391*795d594fSAndroid Build Coastguard Worker      * We are looking for a descriptor. Either validate it as a
392*795d594fSAndroid Build Coastguard Worker      * single-character primitive type, or continue on to check the
393*795d594fSAndroid Build Coastguard Worker      * embedded class name (bracketed by "L" and ";").
394*795d594fSAndroid Build Coastguard Worker      */
395*795d594fSAndroid Build Coastguard Worker     switch (*(s++)) {
396*795d594fSAndroid Build Coastguard Worker     case 'B':
397*795d594fSAndroid Build Coastguard Worker     case 'C':
398*795d594fSAndroid Build Coastguard Worker     case 'D':
399*795d594fSAndroid Build Coastguard Worker     case 'F':
400*795d594fSAndroid Build Coastguard Worker     case 'I':
401*795d594fSAndroid Build Coastguard Worker     case 'J':
402*795d594fSAndroid Build Coastguard Worker     case 'S':
403*795d594fSAndroid Build Coastguard Worker     case 'Z':
404*795d594fSAndroid Build Coastguard Worker       // These are all single-character descriptors for primitive types.
405*795d594fSAndroid Build Coastguard Worker       return (*s == '\0');
406*795d594fSAndroid Build Coastguard Worker     case 'V':
407*795d594fSAndroid Build Coastguard Worker       // Non-array void is valid, but you can't have an array of void.
408*795d594fSAndroid Build Coastguard Worker       return (arrayCount == 0) && (*s == '\0');
409*795d594fSAndroid Build Coastguard Worker     case 'L':
410*795d594fSAndroid Build Coastguard Worker       // Class name: Break out and continue below.
411*795d594fSAndroid Build Coastguard Worker       break;
412*795d594fSAndroid Build Coastguard Worker     default:
413*795d594fSAndroid Build Coastguard Worker       // Oddball descriptor character.
414*795d594fSAndroid Build Coastguard Worker       return false;
415*795d594fSAndroid Build Coastguard Worker     }
416*795d594fSAndroid Build Coastguard Worker   }
417*795d594fSAndroid Build Coastguard Worker 
418*795d594fSAndroid Build Coastguard Worker   /*
419*795d594fSAndroid Build Coastguard Worker    * We just consumed the 'L' that introduces a class name as part
420*795d594fSAndroid Build Coastguard Worker    * of a type descriptor, or we are looking for an unadorned class
421*795d594fSAndroid Build Coastguard Worker    * name.
422*795d594fSAndroid Build Coastguard Worker    */
423*795d594fSAndroid Build Coastguard Worker 
424*795d594fSAndroid Build Coastguard Worker   bool sepOrFirst = true;  // first character or just encountered a separator.
425*795d594fSAndroid Build Coastguard Worker   for (;;) {
426*795d594fSAndroid Build Coastguard Worker     uint8_t c = (uint8_t) *s;
427*795d594fSAndroid Build Coastguard Worker     switch (c) {
428*795d594fSAndroid Build Coastguard Worker     case '\0':
429*795d594fSAndroid Build Coastguard Worker       /*
430*795d594fSAndroid Build Coastguard Worker        * Premature end for a type descriptor, but valid for
431*795d594fSAndroid Build Coastguard Worker        * a class name as long as we haven't encountered an
432*795d594fSAndroid Build Coastguard Worker        * empty component (including the degenerate case of
433*795d594fSAndroid Build Coastguard Worker        * the empty string "").
434*795d594fSAndroid Build Coastguard Worker        */
435*795d594fSAndroid Build Coastguard Worker       return (type == kName) && !sepOrFirst;
436*795d594fSAndroid Build Coastguard Worker     case ';':
437*795d594fSAndroid Build Coastguard Worker       /*
438*795d594fSAndroid Build Coastguard Worker        * Invalid character for a class name, but the
439*795d594fSAndroid Build Coastguard Worker        * legitimate end of a type descriptor. In the latter
440*795d594fSAndroid Build Coastguard Worker        * case, make sure that this is the end of the string
441*795d594fSAndroid Build Coastguard Worker        * and that it doesn't end with an empty component
442*795d594fSAndroid Build Coastguard Worker        * (including the degenerate case of "L;").
443*795d594fSAndroid Build Coastguard Worker        */
444*795d594fSAndroid Build Coastguard Worker       return (type == kDescriptor) && !sepOrFirst && (s[1] == '\0');
445*795d594fSAndroid Build Coastguard Worker     case '/':
446*795d594fSAndroid Build Coastguard Worker     case '.':
447*795d594fSAndroid Build Coastguard Worker       if (c != kSeparator) {
448*795d594fSAndroid Build Coastguard Worker         // The wrong separator character.
449*795d594fSAndroid Build Coastguard Worker         return false;
450*795d594fSAndroid Build Coastguard Worker       }
451*795d594fSAndroid Build Coastguard Worker       if (sepOrFirst) {
452*795d594fSAndroid Build Coastguard Worker         // Separator at start or two separators in a row.
453*795d594fSAndroid Build Coastguard Worker         return false;
454*795d594fSAndroid Build Coastguard Worker       }
455*795d594fSAndroid Build Coastguard Worker       sepOrFirst = true;
456*795d594fSAndroid Build Coastguard Worker       s++;
457*795d594fSAndroid Build Coastguard Worker       break;
458*795d594fSAndroid Build Coastguard Worker     default:
459*795d594fSAndroid Build Coastguard Worker       if (!IsValidPartOfMemberNameUtf8(&s)) {
460*795d594fSAndroid Build Coastguard Worker         return false;
461*795d594fSAndroid Build Coastguard Worker       }
462*795d594fSAndroid Build Coastguard Worker       sepOrFirst = false;
463*795d594fSAndroid Build Coastguard Worker       break;
464*795d594fSAndroid Build Coastguard Worker     }
465*795d594fSAndroid Build Coastguard Worker   }
466*795d594fSAndroid Build Coastguard Worker }
467*795d594fSAndroid Build Coastguard Worker 
IsValidBinaryClassName(const char * s)468*795d594fSAndroid Build Coastguard Worker bool IsValidBinaryClassName(const char* s) {
469*795d594fSAndroid Build Coastguard Worker   return IsValidClassName<kName, '.'>(s);
470*795d594fSAndroid Build Coastguard Worker }
471*795d594fSAndroid Build Coastguard Worker 
IsValidJniClassName(const char * s)472*795d594fSAndroid Build Coastguard Worker bool IsValidJniClassName(const char* s) {
473*795d594fSAndroid Build Coastguard Worker   return IsValidClassName<kName, '/'>(s);
474*795d594fSAndroid Build Coastguard Worker }
475*795d594fSAndroid Build Coastguard Worker 
IsValidDescriptor(const char * s)476*795d594fSAndroid Build Coastguard Worker bool IsValidDescriptor(const char* s) {
477*795d594fSAndroid Build Coastguard Worker   return IsValidClassName<kDescriptor, '/'>(s);
478*795d594fSAndroid Build Coastguard Worker }
479*795d594fSAndroid Build Coastguard Worker 
PrettyDescriptor(Primitive::Type type)480*795d594fSAndroid Build Coastguard Worker std::string PrettyDescriptor(Primitive::Type type) {
481*795d594fSAndroid Build Coastguard Worker   return PrettyDescriptor(Primitive::Descriptor(type));
482*795d594fSAndroid Build Coastguard Worker }
483*795d594fSAndroid Build Coastguard Worker 
484*795d594fSAndroid Build Coastguard Worker }  // namespace art
485