1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <string.h>
29 
30 #include "upb/reflection/def_builder_internal.h"
31 #include "upb/reflection/def_pool.h"
32 #include "upb/reflection/def_type.h"
33 #include "upb/reflection/field_def.h"
34 
35 // Must be last.
36 #include "upb/port/def.inc"
37 
38 /* The upb core does not generally have a concept of default instances. However
39  * for descriptor options we make an exception since the max size is known and
40  * modest (<200 bytes). All types can share a default instance since it is
41  * initialized to zeroes.
42  *
43  * We have to allocate an extra pointer for upb's internal metadata. */
44 static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
45 const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)];
46 
_upb_DefBuilder_FullToShort(const char * fullname)47 const char* _upb_DefBuilder_FullToShort(const char* fullname) {
48   const char* p;
49 
50   if (fullname == NULL) {
51     return NULL;
52   } else if ((p = strrchr(fullname, '.')) == NULL) {
53     /* No '.' in the name, return the full string. */
54     return fullname;
55   } else {
56     /* Return one past the last '.'. */
57     return p + 1;
58   }
59 }
60 
_upb_DefBuilder_FailJmp(upb_DefBuilder * ctx)61 void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); }
62 
_upb_DefBuilder_Errf(upb_DefBuilder * ctx,const char * fmt,...)63 void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) {
64   va_list argp;
65   va_start(argp, fmt);
66   upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
67   va_end(argp);
68   _upb_DefBuilder_FailJmp(ctx);
69 }
70 
_upb_DefBuilder_OomErr(upb_DefBuilder * ctx)71 void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
72   upb_Status_SetErrorMessage(ctx->status, "out of memory");
73   _upb_DefBuilder_FailJmp(ctx);
74 }
75 
76 // Verify a relative identifier string. The loop is branchless for speed.
_upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder * ctx,upb_StringView name)77 static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
78                                               upb_StringView name) {
79   bool good = name.size > 0;
80 
81   for (size_t i = 0; i < name.size; i++) {
82     const char c = name.data[i];
83     const char d = c | 0x20;  // force lowercase
84     const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
85     const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
86 
87     good &= is_alpha | is_numer;
88   }
89 
90   if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
91 }
92 
_upb_DefBuilder_MakeFullName(upb_DefBuilder * ctx,const char * prefix,upb_StringView name)93 const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
94                                          const char* prefix,
95                                          upb_StringView name) {
96   _upb_DefBuilder_CheckIdentNotFull(ctx, name);
97   if (prefix) {
98     // ret = prefix + '.' + name;
99     size_t n = strlen(prefix);
100     char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2);
101     strcpy(ret, prefix);
102     ret[n] = '.';
103     memcpy(&ret[n + 1], name.data, name.size);
104     ret[n + 1 + name.size] = '\0';
105     return ret;
106   } else {
107     char* ret = upb_strdup2(name.data, name.size, ctx->arena);
108     if (!ret) _upb_DefBuilder_OomErr(ctx);
109     return ret;
110   }
111 }
112 
remove_component(char * base,size_t * len)113 static bool remove_component(char* base, size_t* len) {
114   if (*len == 0) return false;
115 
116   for (size_t i = *len - 1; i > 0; i--) {
117     if (base[i] == '.') {
118       *len = i;
119       return true;
120     }
121   }
122 
123   *len = 0;
124   return true;
125 }
126 
_upb_DefBuilder_ResolveAny(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)127 const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
128                                        const char* from_name_dbg,
129                                        const char* base, upb_StringView sym,
130                                        upb_deftype_t* type) {
131   if (sym.size == 0) goto notfound;
132   upb_value v;
133   if (sym.data[0] == '.') {
134     /* Symbols starting with '.' are absolute, so we do a single lookup.
135      * Slice to omit the leading '.' */
136     if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) {
137       goto notfound;
138     }
139   } else {
140     /* Remove components from base until we find an entry or run out. */
141     size_t baselen = base ? strlen(base) : 0;
142     char* tmp = malloc(sym.size + baselen + 1);
143     while (1) {
144       char* p = tmp;
145       if (baselen) {
146         memcpy(p, base, baselen);
147         p[baselen] = '.';
148         p += baselen + 1;
149       }
150       memcpy(p, sym.data, sym.size);
151       p += sym.size;
152       if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) {
153         break;
154       }
155       if (!remove_component(tmp, &baselen)) {
156         free(tmp);
157         goto notfound;
158       }
159     }
160     free(tmp);
161   }
162 
163   *type = _upb_DefType_Type(v);
164   return _upb_DefType_Unpack(v, *type);
165 
166 notfound:
167   _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
168                        UPB_STRINGVIEW_ARGS(sym));
169 }
170 
_upb_DefBuilder_Resolve(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)171 const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
172                                     const char* from_name_dbg, const char* base,
173                                     upb_StringView sym, upb_deftype_t type) {
174   upb_deftype_t found_type;
175   const void* ret =
176       _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type);
177   if (ret && found_type != type) {
178     _upb_DefBuilder_Errf(ctx,
179                          "type mismatch when resolving %s: couldn't find "
180                          "name " UPB_STRINGVIEW_FORMAT " with type=%d",
181                          from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
182   }
183   return ret;
184 }
185 
186 // Per ASCII this will lower-case a letter. If the result is a letter, the
187 // input was definitely a letter. If the output is not a letter, this may
188 // have transformed the character unpredictably.
upb_ascii_lower(char ch)189 static char upb_ascii_lower(char ch) { return ch | 0x20; }
190 
191 // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)192 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
193   return low <= c && c <= high;
194 }
195 
upb_isletter(char c)196 static bool upb_isletter(char c) {
197   char lower = upb_ascii_lower(c);
198   return upb_isbetween(lower, 'a', 'z') || c == '_';
199 }
200 
upb_isalphanum(char c)201 static bool upb_isalphanum(char c) {
202   return upb_isletter(c) || upb_isbetween(c, '0', '9');
203 }
204 
TryGetChar(const char ** src,const char * end,char * ch)205 static bool TryGetChar(const char** src, const char* end, char* ch) {
206   if (*src == end) return false;
207   *ch = **src;
208   *src += 1;
209   return true;
210 }
211 
TryGetHexDigit(const char ** src,const char * end)212 static int TryGetHexDigit(const char** src, const char* end) {
213   char ch;
214   if (!TryGetChar(src, end, &ch)) return -1;
215   if ('0' <= ch && ch <= '9') {
216     return ch - '0';
217   }
218   ch = upb_ascii_lower(ch);
219   if ('a' <= ch && ch <= 'f') {
220     return ch - 'a' + 0xa;
221   }
222   *src -= 1;  // Char wasn't actually a hex digit.
223   return -1;
224 }
225 
upb_DefBuilder_ParseHexEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)226 static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
227                                           const upb_FieldDef* f,
228                                           const char** src, const char* end) {
229   int hex_digit = TryGetHexDigit(src, end);
230   if (hex_digit < 0) {
231     _upb_DefBuilder_Errf(
232         ctx, "\\x must be followed by at least one hex digit (field='%s')",
233         upb_FieldDef_FullName(f));
234     return 0;
235   }
236   unsigned int ret = hex_digit;
237   while ((hex_digit = TryGetHexDigit(src, end)) >= 0) {
238     ret = (ret << 4) | hex_digit;
239   }
240   if (ret > 0xff) {
241     _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
242                          upb_FieldDef_FullName(f));
243     return 0;
244   }
245   return ret;
246 }
247 
TryGetOctalDigit(const char ** src,const char * end)248 static char TryGetOctalDigit(const char** src, const char* end) {
249   char ch;
250   if (!TryGetChar(src, end, &ch)) return -1;
251   if ('0' <= ch && ch <= '7') {
252     return ch - '0';
253   }
254   *src -= 1;  // Char wasn't actually an octal digit.
255   return -1;
256 }
257 
upb_DefBuilder_ParseOctalEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)258 static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx,
259                                             const upb_FieldDef* f,
260                                             const char** src, const char* end) {
261   char ch = 0;
262   for (int i = 0; i < 3; i++) {
263     char digit;
264     if ((digit = TryGetOctalDigit(src, end)) >= 0) {
265       ch = (ch << 3) | digit;
266     }
267   }
268   return ch;
269 }
270 
_upb_DefBuilder_ParseEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)271 char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
272                                  const char** src, const char* end) {
273   char ch;
274   if (!TryGetChar(src, end, &ch)) {
275     _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s",
276                          upb_FieldDef_FullName(f));
277     return 0;
278   }
279   switch (ch) {
280     case 'a':
281       return '\a';
282     case 'b':
283       return '\b';
284     case 'f':
285       return '\f';
286     case 'n':
287       return '\n';
288     case 'r':
289       return '\r';
290     case 't':
291       return '\t';
292     case 'v':
293       return '\v';
294     case '\\':
295       return '\\';
296     case '\'':
297       return '\'';
298     case '\"':
299       return '\"';
300     case '?':
301       return '\?';
302     case 'x':
303     case 'X':
304       return upb_DefBuilder_ParseHexEscape(ctx, f, src, end);
305     case '0':
306     case '1':
307     case '2':
308     case '3':
309     case '4':
310     case '5':
311     case '6':
312     case '7':
313       *src -= 1;
314       return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end);
315   }
316   _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch);
317 }
318 
_upb_DefBuilder_CheckIdentSlow(upb_DefBuilder * ctx,upb_StringView name,bool full)319 void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
320                                     bool full) {
321   const char* str = name.data;
322   const size_t len = name.size;
323   bool start = true;
324   for (size_t i = 0; i < len; i++) {
325     const char c = str[i];
326     if (c == '.') {
327       if (start || !full) {
328         _upb_DefBuilder_Errf(
329             ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")",
330             UPB_STRINGVIEW_ARGS(name));
331       }
332       start = true;
333     } else if (start) {
334       if (!upb_isletter(c)) {
335         _upb_DefBuilder_Errf(ctx,
336                              "invalid name: path components must start with a "
337                              "letter (" UPB_STRINGVIEW_FORMAT ")",
338                              UPB_STRINGVIEW_ARGS(name));
339       }
340       start = false;
341     } else if (!upb_isalphanum(c)) {
342       _upb_DefBuilder_Errf(
343           ctx,
344           "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT
345           ")",
346           UPB_STRINGVIEW_ARGS(name));
347     }
348   }
349   if (start) {
350     _upb_DefBuilder_Errf(ctx,
351                          "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")",
352                          UPB_STRINGVIEW_ARGS(name));
353   }
354 
355   // We should never reach this point.
356   UPB_ASSERT(false);
357 }
358