1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "upb/text/encode.h"
29 
30 #include <ctype.h>
31 #include <float.h>
32 #include <inttypes.h>
33 #include <stdarg.h>
34 #include <string.h>
35 
36 #include "upb/collections/map.h"
37 #include "upb/collections/map_sorter_internal.h"
38 #include "upb/lex/round_trip.h"
39 #include "upb/port/vsnprintf_compat.h"
40 #include "upb/reflection/message.h"
41 #include "upb/wire/eps_copy_input_stream.h"
42 #include "upb/wire/reader.h"
43 #include "upb/wire/types.h"
44 
45 // Must be last.
46 #include "upb/port/def.inc"
47 
48 typedef struct {
49   char *buf, *ptr, *end;
50   size_t overflow;
51   int indent_depth;
52   int options;
53   const upb_DefPool* ext_pool;
54   _upb_mapsorter sorter;
55 } txtenc;
56 
57 static void txtenc_msg(txtenc* e, const upb_Message* msg,
58                        const upb_MessageDef* m);
59 
txtenc_putbytes(txtenc * e,const void * data,size_t len)60 static void txtenc_putbytes(txtenc* e, const void* data, size_t len) {
61   size_t have = e->end - e->ptr;
62   if (UPB_LIKELY(have >= len)) {
63     memcpy(e->ptr, data, len);
64     e->ptr += len;
65   } else {
66     if (have) {
67       memcpy(e->ptr, data, have);
68       e->ptr += have;
69     }
70     e->overflow += (len - have);
71   }
72 }
73 
txtenc_putstr(txtenc * e,const char * str)74 static void txtenc_putstr(txtenc* e, const char* str) {
75   txtenc_putbytes(e, str, strlen(str));
76 }
77 
txtenc_printf(txtenc * e,const char * fmt,...)78 static void txtenc_printf(txtenc* e, const char* fmt, ...) {
79   size_t n;
80   size_t have = e->end - e->ptr;
81   va_list args;
82 
83   va_start(args, fmt);
84   n = _upb_vsnprintf(e->ptr, have, fmt, args);
85   va_end(args);
86 
87   if (UPB_LIKELY(have > n)) {
88     e->ptr += n;
89   } else {
90     e->ptr = UPB_PTRADD(e->ptr, have);
91     e->overflow += (n - have);
92   }
93 }
94 
txtenc_indent(txtenc * e)95 static void txtenc_indent(txtenc* e) {
96   if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
97     int i = e->indent_depth;
98     while (i-- > 0) {
99       txtenc_putstr(e, "  ");
100     }
101   }
102 }
103 
txtenc_endfield(txtenc * e)104 static void txtenc_endfield(txtenc* e) {
105   if (e->options & UPB_TXTENC_SINGLELINE) {
106     txtenc_putstr(e, " ");
107   } else {
108     txtenc_putstr(e, "\n");
109   }
110 }
111 
txtenc_enum(int32_t val,const upb_FieldDef * f,txtenc * e)112 static void txtenc_enum(int32_t val, const upb_FieldDef* f, txtenc* e) {
113   const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
114   const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
115 
116   if (ev) {
117     txtenc_printf(e, "%s", upb_EnumValueDef_Name(ev));
118   } else {
119     txtenc_printf(e, "%" PRId32, val);
120   }
121 }
122 
txtenc_string(txtenc * e,upb_StringView str,bool bytes)123 static void txtenc_string(txtenc* e, upb_StringView str, bool bytes) {
124   const char* ptr = str.data;
125   const char* end = ptr + str.size;
126   txtenc_putstr(e, "\"");
127 
128   while (ptr < end) {
129     switch (*ptr) {
130       case '\n':
131         txtenc_putstr(e, "\\n");
132         break;
133       case '\r':
134         txtenc_putstr(e, "\\r");
135         break;
136       case '\t':
137         txtenc_putstr(e, "\\t");
138         break;
139       case '\"':
140         txtenc_putstr(e, "\\\"");
141         break;
142       case '\'':
143         txtenc_putstr(e, "\\'");
144         break;
145       case '\\':
146         txtenc_putstr(e, "\\\\");
147         break;
148       default:
149         if ((bytes || (uint8_t)*ptr < 0x80) && !isprint(*ptr)) {
150           txtenc_printf(e, "\\%03o", (int)(uint8_t)*ptr);
151         } else {
152           txtenc_putbytes(e, ptr, 1);
153         }
154         break;
155     }
156     ptr++;
157   }
158 
159   txtenc_putstr(e, "\"");
160 }
161 
txtenc_field(txtenc * e,upb_MessageValue val,const upb_FieldDef * f)162 static void txtenc_field(txtenc* e, upb_MessageValue val,
163                          const upb_FieldDef* f) {
164   txtenc_indent(e);
165   const upb_CType type = upb_FieldDef_CType(f);
166   const bool is_ext = upb_FieldDef_IsExtension(f);
167   const char* full = upb_FieldDef_FullName(f);
168   const char* name = upb_FieldDef_Name(f);
169 
170   if (type == kUpb_CType_Message) {
171     if (is_ext) {
172       txtenc_printf(e, "[%s] {", full);
173     } else {
174       txtenc_printf(e, "%s {", name);
175     }
176     txtenc_endfield(e);
177     e->indent_depth++;
178     txtenc_msg(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
179     e->indent_depth--;
180     txtenc_indent(e);
181     txtenc_putstr(e, "}");
182     txtenc_endfield(e);
183     return;
184   }
185 
186   if (is_ext) {
187     txtenc_printf(e, "[%s]: ", full);
188   } else {
189     txtenc_printf(e, "%s: ", name);
190   }
191 
192   switch (type) {
193     case kUpb_CType_Bool:
194       txtenc_putstr(e, val.bool_val ? "true" : "false");
195       break;
196     case kUpb_CType_Float: {
197       char buf[32];
198       _upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
199       txtenc_putstr(e, buf);
200       break;
201     }
202     case kUpb_CType_Double: {
203       char buf[32];
204       _upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
205       txtenc_putstr(e, buf);
206       break;
207     }
208     case kUpb_CType_Int32:
209       txtenc_printf(e, "%" PRId32, val.int32_val);
210       break;
211     case kUpb_CType_UInt32:
212       txtenc_printf(e, "%" PRIu32, val.uint32_val);
213       break;
214     case kUpb_CType_Int64:
215       txtenc_printf(e, "%" PRId64, val.int64_val);
216       break;
217     case kUpb_CType_UInt64:
218       txtenc_printf(e, "%" PRIu64, val.uint64_val);
219       break;
220     case kUpb_CType_String:
221       txtenc_string(e, val.str_val, false);
222       break;
223     case kUpb_CType_Bytes:
224       txtenc_string(e, val.str_val, true);
225       break;
226     case kUpb_CType_Enum:
227       txtenc_enum(val.int32_val, f, e);
228       break;
229     default:
230       UPB_UNREACHABLE();
231   }
232 
233   txtenc_endfield(e);
234 }
235 
236 /*
237  * Arrays print as simple repeated elements, eg.
238  *
239  *    foo_field: 1
240  *    foo_field: 2
241  *    foo_field: 3
242  */
txtenc_array(txtenc * e,const upb_Array * arr,const upb_FieldDef * f)243 static void txtenc_array(txtenc* e, const upb_Array* arr,
244                          const upb_FieldDef* f) {
245   size_t i;
246   size_t size = upb_Array_Size(arr);
247 
248   for (i = 0; i < size; i++) {
249     txtenc_field(e, upb_Array_Get(arr, i), f);
250   }
251 }
252 
txtenc_mapentry(txtenc * e,upb_MessageValue key,upb_MessageValue val,const upb_FieldDef * f)253 static void txtenc_mapentry(txtenc* e, upb_MessageValue key,
254                             upb_MessageValue val, const upb_FieldDef* f) {
255   const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
256   const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
257   const upb_FieldDef* val_f = upb_MessageDef_Field(entry, 1);
258   txtenc_indent(e);
259   txtenc_printf(e, "%s {", upb_FieldDef_Name(f));
260   txtenc_endfield(e);
261   e->indent_depth++;
262 
263   txtenc_field(e, key, key_f);
264   txtenc_field(e, val, val_f);
265 
266   e->indent_depth--;
267   txtenc_indent(e);
268   txtenc_putstr(e, "}");
269   txtenc_endfield(e);
270 }
271 
272 /*
273  * Maps print as messages of key/value, etc.
274  *
275  *    foo_map: {
276  *      key: "abc"
277  *      value: 123
278  *    }
279  *    foo_map: {
280  *      key: "def"
281  *      value: 456
282  *    }
283  */
txtenc_map(txtenc * e,const upb_Map * map,const upb_FieldDef * f)284 static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) {
285   if (e->options & UPB_TXTENC_NOSORT) {
286     size_t iter = kUpb_Map_Begin;
287     upb_MessageValue key, val;
288     while (upb_Map_Next(map, &key, &val, &iter)) {
289       txtenc_mapentry(e, key, val, f);
290     }
291   } else {
292     const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
293     const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
294     _upb_sortedmap sorted;
295     upb_MapEntry ent;
296 
297     _upb_mapsorter_pushmap(&e->sorter, upb_FieldDef_Type(key_f), map, &sorted);
298     while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
299       upb_MessageValue key, val;
300       memcpy(&key, &ent.data.k, sizeof(key));
301       memcpy(&val, &ent.data.v, sizeof(val));
302       txtenc_mapentry(e, key, val, f);
303     }
304     _upb_mapsorter_popmap(&e->sorter, &sorted);
305   }
306 }
307 
308 #define CHK(x)      \
309   do {              \
310     if (!(x)) {     \
311       return false; \
312     }               \
313   } while (0)
314 
315 /*
316  * Unknown fields are printed by number.
317  *
318  * 1001: 123
319  * 1002: "hello"
320  * 1006: 0xdeadbeef
321  * 1003: {
322  *   1: 111
323  * }
324  */
txtenc_unknown(txtenc * e,const char * ptr,upb_EpsCopyInputStream * stream,int groupnum)325 static const char* txtenc_unknown(txtenc* e, const char* ptr,
326                                   upb_EpsCopyInputStream* stream,
327                                   int groupnum) {
328   // We are guaranteed that the unknown data is valid wire format, and will not
329   // contain tag zero.
330   uint32_t end_group = groupnum > 0
331                            ? ((groupnum << kUpb_WireReader_WireTypeBits) |
332                               kUpb_WireType_EndGroup)
333                            : 0;
334 
335   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
336     uint32_t tag;
337     CHK(ptr = upb_WireReader_ReadTag(ptr, &tag));
338     if (tag == end_group) return ptr;
339 
340     txtenc_indent(e);
341     txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag));
342 
343     switch (upb_WireReader_GetWireType(tag)) {
344       case kUpb_WireType_Varint: {
345         uint64_t val;
346         CHK(ptr = upb_WireReader_ReadVarint(ptr, &val));
347         txtenc_printf(e, "%" PRIu64, val);
348         break;
349       }
350       case kUpb_WireType_32Bit: {
351         uint32_t val;
352         ptr = upb_WireReader_ReadFixed32(ptr, &val);
353         txtenc_printf(e, "0x%08" PRIu32, val);
354         break;
355       }
356       case kUpb_WireType_64Bit: {
357         uint64_t val;
358         ptr = upb_WireReader_ReadFixed64(ptr, &val);
359         txtenc_printf(e, "0x%016" PRIu64, val);
360         break;
361       }
362       case kUpb_WireType_Delimited: {
363         int size;
364         char* start = e->ptr;
365         size_t start_overflow = e->overflow;
366         CHK(ptr = upb_WireReader_ReadSize(ptr, &size));
367         CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size));
368 
369         // Speculatively try to parse as message.
370         txtenc_putstr(e, "{");
371         txtenc_endfield(e);
372 
373         // EpsCopyInputStream can't back up, so create a sub-stream for the
374         // speculative parse.
375         upb_EpsCopyInputStream sub_stream;
376         const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr);
377         upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true);
378 
379         e->indent_depth++;
380         if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) {
381           ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size);
382           e->indent_depth--;
383           txtenc_indent(e);
384           txtenc_putstr(e, "}");
385         } else {
386           // Didn't work out, print as raw bytes.
387           e->indent_depth--;
388           e->ptr = start;
389           e->overflow = start_overflow;
390           const char* str = ptr;
391           ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL);
392           assert(ptr);
393           txtenc_string(e, (upb_StringView){.data = str, .size = size}, true);
394         }
395         break;
396       }
397       case kUpb_WireType_StartGroup:
398         txtenc_putstr(e, "{");
399         txtenc_endfield(e);
400         e->indent_depth++;
401         CHK(ptr = txtenc_unknown(e, ptr, stream,
402                                  upb_WireReader_GetFieldNumber(tag)));
403         e->indent_depth--;
404         txtenc_indent(e);
405         txtenc_putstr(e, "}");
406         break;
407       default:
408         return NULL;
409     }
410     txtenc_endfield(e);
411   }
412 
413   return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL;
414 }
415 
416 #undef CHK
417 
txtenc_msg(txtenc * e,const upb_Message * msg,const upb_MessageDef * m)418 static void txtenc_msg(txtenc* e, const upb_Message* msg,
419                        const upb_MessageDef* m) {
420   size_t iter = kUpb_Message_Begin;
421   const upb_FieldDef* f;
422   upb_MessageValue val;
423 
424   while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
425     if (upb_FieldDef_IsMap(f)) {
426       txtenc_map(e, val.map_val, f);
427     } else if (upb_FieldDef_IsRepeated(f)) {
428       txtenc_array(e, val.array_val, f);
429     } else {
430       txtenc_field(e, val, f);
431     }
432   }
433 
434   if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
435     size_t size;
436     const char* ptr = upb_Message_GetUnknown(msg, &size);
437     if (size != 0) {
438       char* start = e->ptr;
439       upb_EpsCopyInputStream stream;
440       upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
441       if (!txtenc_unknown(e, ptr, &stream, -1)) {
442         /* Unknown failed to parse, back up and don't print it at all. */
443         e->ptr = start;
444       }
445     }
446   }
447 }
448 
txtenc_nullz(txtenc * e,size_t size)449 size_t txtenc_nullz(txtenc* e, size_t size) {
450   size_t ret = e->ptr - e->buf + e->overflow;
451 
452   if (size > 0) {
453     if (e->ptr == e->end) e->ptr--;
454     *e->ptr = '\0';
455   }
456 
457   return ret;
458 }
459 
upb_TextEncode(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,int options,char * buf,size_t size)460 size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
461                       const upb_DefPool* ext_pool, int options, char* buf,
462                       size_t size) {
463   txtenc e;
464 
465   e.buf = buf;
466   e.ptr = buf;
467   e.end = UPB_PTRADD(buf, size);
468   e.overflow = 0;
469   e.indent_depth = 0;
470   e.options = options;
471   e.ext_pool = ext_pool;
472   _upb_mapsorter_init(&e.sorter);
473 
474   txtenc_msg(&e, msg, m);
475   _upb_mapsorter_destroy(&e.sorter);
476   return txtenc_nullz(&e, size);
477 }
478