1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <string.h>
29
30 #include "upb/reflection/def_builder_internal.h"
31 #include "upb/reflection/def_pool.h"
32 #include "upb/reflection/def_type.h"
33 #include "upb/reflection/field_def.h"
34
35 // Must be last.
36 #include "upb/port/def.inc"
37
38 /* The upb core does not generally have a concept of default instances. However
39 * for descriptor options we make an exception since the max size is known and
40 * modest (<200 bytes). All types can share a default instance since it is
41 * initialized to zeroes.
42 *
43 * We have to allocate an extra pointer for upb's internal metadata. */
44 static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
45 const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)];
46
_upb_DefBuilder_FullToShort(const char * fullname)47 const char* _upb_DefBuilder_FullToShort(const char* fullname) {
48 const char* p;
49
50 if (fullname == NULL) {
51 return NULL;
52 } else if ((p = strrchr(fullname, '.')) == NULL) {
53 /* No '.' in the name, return the full string. */
54 return fullname;
55 } else {
56 /* Return one past the last '.'. */
57 return p + 1;
58 }
59 }
60
_upb_DefBuilder_FailJmp(upb_DefBuilder * ctx)61 void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); }
62
_upb_DefBuilder_Errf(upb_DefBuilder * ctx,const char * fmt,...)63 void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) {
64 va_list argp;
65 va_start(argp, fmt);
66 upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
67 va_end(argp);
68 _upb_DefBuilder_FailJmp(ctx);
69 }
70
_upb_DefBuilder_OomErr(upb_DefBuilder * ctx)71 void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
72 upb_Status_SetErrorMessage(ctx->status, "out of memory");
73 _upb_DefBuilder_FailJmp(ctx);
74 }
75
76 // Verify a relative identifier string. The loop is branchless for speed.
_upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder * ctx,upb_StringView name)77 static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
78 upb_StringView name) {
79 bool good = name.size > 0;
80
81 for (size_t i = 0; i < name.size; i++) {
82 const char c = name.data[i];
83 const char d = c | 0x20; // force lowercase
84 const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
85 const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
86
87 good &= is_alpha | is_numer;
88 }
89
90 if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
91 }
92
_upb_DefBuilder_MakeFullName(upb_DefBuilder * ctx,const char * prefix,upb_StringView name)93 const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
94 const char* prefix,
95 upb_StringView name) {
96 _upb_DefBuilder_CheckIdentNotFull(ctx, name);
97 if (prefix) {
98 // ret = prefix + '.' + name;
99 size_t n = strlen(prefix);
100 char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2);
101 strcpy(ret, prefix);
102 ret[n] = '.';
103 memcpy(&ret[n + 1], name.data, name.size);
104 ret[n + 1 + name.size] = '\0';
105 return ret;
106 } else {
107 char* ret = upb_strdup2(name.data, name.size, ctx->arena);
108 if (!ret) _upb_DefBuilder_OomErr(ctx);
109 return ret;
110 }
111 }
112
remove_component(char * base,size_t * len)113 static bool remove_component(char* base, size_t* len) {
114 if (*len == 0) return false;
115
116 for (size_t i = *len - 1; i > 0; i--) {
117 if (base[i] == '.') {
118 *len = i;
119 return true;
120 }
121 }
122
123 *len = 0;
124 return true;
125 }
126
_upb_DefBuilder_ResolveAny(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)127 const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
128 const char* from_name_dbg,
129 const char* base, upb_StringView sym,
130 upb_deftype_t* type) {
131 if (sym.size == 0) goto notfound;
132 upb_value v;
133 if (sym.data[0] == '.') {
134 /* Symbols starting with '.' are absolute, so we do a single lookup.
135 * Slice to omit the leading '.' */
136 if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) {
137 goto notfound;
138 }
139 } else {
140 /* Remove components from base until we find an entry or run out. */
141 size_t baselen = base ? strlen(base) : 0;
142 char* tmp = malloc(sym.size + baselen + 1);
143 while (1) {
144 char* p = tmp;
145 if (baselen) {
146 memcpy(p, base, baselen);
147 p[baselen] = '.';
148 p += baselen + 1;
149 }
150 memcpy(p, sym.data, sym.size);
151 p += sym.size;
152 if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) {
153 break;
154 }
155 if (!remove_component(tmp, &baselen)) {
156 free(tmp);
157 goto notfound;
158 }
159 }
160 free(tmp);
161 }
162
163 *type = _upb_DefType_Type(v);
164 return _upb_DefType_Unpack(v, *type);
165
166 notfound:
167 _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
168 UPB_STRINGVIEW_ARGS(sym));
169 }
170
_upb_DefBuilder_Resolve(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)171 const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
172 const char* from_name_dbg, const char* base,
173 upb_StringView sym, upb_deftype_t type) {
174 upb_deftype_t found_type;
175 const void* ret =
176 _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type);
177 if (ret && found_type != type) {
178 _upb_DefBuilder_Errf(ctx,
179 "type mismatch when resolving %s: couldn't find "
180 "name " UPB_STRINGVIEW_FORMAT " with type=%d",
181 from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
182 }
183 return ret;
184 }
185
186 // Per ASCII this will lower-case a letter. If the result is a letter, the
187 // input was definitely a letter. If the output is not a letter, this may
188 // have transformed the character unpredictably.
upb_ascii_lower(char ch)189 static char upb_ascii_lower(char ch) { return ch | 0x20; }
190
191 // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)192 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
193 return low <= c && c <= high;
194 }
195
upb_isletter(char c)196 static bool upb_isletter(char c) {
197 char lower = upb_ascii_lower(c);
198 return upb_isbetween(lower, 'a', 'z') || c == '_';
199 }
200
upb_isalphanum(char c)201 static bool upb_isalphanum(char c) {
202 return upb_isletter(c) || upb_isbetween(c, '0', '9');
203 }
204
TryGetChar(const char ** src,const char * end,char * ch)205 static bool TryGetChar(const char** src, const char* end, char* ch) {
206 if (*src == end) return false;
207 *ch = **src;
208 *src += 1;
209 return true;
210 }
211
TryGetHexDigit(const char ** src,const char * end)212 static int TryGetHexDigit(const char** src, const char* end) {
213 char ch;
214 if (!TryGetChar(src, end, &ch)) return -1;
215 if ('0' <= ch && ch <= '9') {
216 return ch - '0';
217 }
218 ch = upb_ascii_lower(ch);
219 if ('a' <= ch && ch <= 'f') {
220 return ch - 'a' + 0xa;
221 }
222 *src -= 1; // Char wasn't actually a hex digit.
223 return -1;
224 }
225
upb_DefBuilder_ParseHexEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)226 static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
227 const upb_FieldDef* f,
228 const char** src, const char* end) {
229 int hex_digit = TryGetHexDigit(src, end);
230 if (hex_digit < 0) {
231 _upb_DefBuilder_Errf(
232 ctx, "\\x must be followed by at least one hex digit (field='%s')",
233 upb_FieldDef_FullName(f));
234 return 0;
235 }
236 unsigned int ret = hex_digit;
237 while ((hex_digit = TryGetHexDigit(src, end)) >= 0) {
238 ret = (ret << 4) | hex_digit;
239 }
240 if (ret > 0xff) {
241 _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
242 upb_FieldDef_FullName(f));
243 return 0;
244 }
245 return ret;
246 }
247
TryGetOctalDigit(const char ** src,const char * end)248 static char TryGetOctalDigit(const char** src, const char* end) {
249 char ch;
250 if (!TryGetChar(src, end, &ch)) return -1;
251 if ('0' <= ch && ch <= '7') {
252 return ch - '0';
253 }
254 *src -= 1; // Char wasn't actually an octal digit.
255 return -1;
256 }
257
upb_DefBuilder_ParseOctalEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)258 static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx,
259 const upb_FieldDef* f,
260 const char** src, const char* end) {
261 char ch = 0;
262 for (int i = 0; i < 3; i++) {
263 char digit;
264 if ((digit = TryGetOctalDigit(src, end)) >= 0) {
265 ch = (ch << 3) | digit;
266 }
267 }
268 return ch;
269 }
270
_upb_DefBuilder_ParseEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)271 char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
272 const char** src, const char* end) {
273 char ch;
274 if (!TryGetChar(src, end, &ch)) {
275 _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s",
276 upb_FieldDef_FullName(f));
277 return 0;
278 }
279 switch (ch) {
280 case 'a':
281 return '\a';
282 case 'b':
283 return '\b';
284 case 'f':
285 return '\f';
286 case 'n':
287 return '\n';
288 case 'r':
289 return '\r';
290 case 't':
291 return '\t';
292 case 'v':
293 return '\v';
294 case '\\':
295 return '\\';
296 case '\'':
297 return '\'';
298 case '\"':
299 return '\"';
300 case '?':
301 return '\?';
302 case 'x':
303 case 'X':
304 return upb_DefBuilder_ParseHexEscape(ctx, f, src, end);
305 case '0':
306 case '1':
307 case '2':
308 case '3':
309 case '4':
310 case '5':
311 case '6':
312 case '7':
313 *src -= 1;
314 return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end);
315 }
316 _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch);
317 }
318
_upb_DefBuilder_CheckIdentSlow(upb_DefBuilder * ctx,upb_StringView name,bool full)319 void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
320 bool full) {
321 const char* str = name.data;
322 const size_t len = name.size;
323 bool start = true;
324 for (size_t i = 0; i < len; i++) {
325 const char c = str[i];
326 if (c == '.') {
327 if (start || !full) {
328 _upb_DefBuilder_Errf(
329 ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")",
330 UPB_STRINGVIEW_ARGS(name));
331 }
332 start = true;
333 } else if (start) {
334 if (!upb_isletter(c)) {
335 _upb_DefBuilder_Errf(ctx,
336 "invalid name: path components must start with a "
337 "letter (" UPB_STRINGVIEW_FORMAT ")",
338 UPB_STRINGVIEW_ARGS(name));
339 }
340 start = false;
341 } else if (!upb_isalphanum(c)) {
342 _upb_DefBuilder_Errf(
343 ctx,
344 "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT
345 ")",
346 UPB_STRINGVIEW_ARGS(name));
347 }
348 }
349 if (start) {
350 _upb_DefBuilder_Errf(ctx,
351 "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")",
352 UPB_STRINGVIEW_ARGS(name));
353 }
354
355 // We should never reach this point.
356 UPB_ASSERT(false);
357 }
358