1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "upb/text/encode.h"
29
30 #include <ctype.h>
31 #include <float.h>
32 #include <inttypes.h>
33 #include <stdarg.h>
34 #include <string.h>
35
36 #include "upb/collections/map.h"
37 #include "upb/collections/map_sorter_internal.h"
38 #include "upb/lex/round_trip.h"
39 #include "upb/port/vsnprintf_compat.h"
40 #include "upb/reflection/message.h"
41 #include "upb/wire/eps_copy_input_stream.h"
42 #include "upb/wire/reader.h"
43 #include "upb/wire/types.h"
44
45 // Must be last.
46 #include "upb/port/def.inc"
47
48 typedef struct {
49 char *buf, *ptr, *end;
50 size_t overflow;
51 int indent_depth;
52 int options;
53 const upb_DefPool* ext_pool;
54 _upb_mapsorter sorter;
55 } txtenc;
56
57 static void txtenc_msg(txtenc* e, const upb_Message* msg,
58 const upb_MessageDef* m);
59
txtenc_putbytes(txtenc * e,const void * data,size_t len)60 static void txtenc_putbytes(txtenc* e, const void* data, size_t len) {
61 size_t have = e->end - e->ptr;
62 if (UPB_LIKELY(have >= len)) {
63 memcpy(e->ptr, data, len);
64 e->ptr += len;
65 } else {
66 if (have) {
67 memcpy(e->ptr, data, have);
68 e->ptr += have;
69 }
70 e->overflow += (len - have);
71 }
72 }
73
txtenc_putstr(txtenc * e,const char * str)74 static void txtenc_putstr(txtenc* e, const char* str) {
75 txtenc_putbytes(e, str, strlen(str));
76 }
77
txtenc_printf(txtenc * e,const char * fmt,...)78 static void txtenc_printf(txtenc* e, const char* fmt, ...) {
79 size_t n;
80 size_t have = e->end - e->ptr;
81 va_list args;
82
83 va_start(args, fmt);
84 n = _upb_vsnprintf(e->ptr, have, fmt, args);
85 va_end(args);
86
87 if (UPB_LIKELY(have > n)) {
88 e->ptr += n;
89 } else {
90 e->ptr = UPB_PTRADD(e->ptr, have);
91 e->overflow += (n - have);
92 }
93 }
94
txtenc_indent(txtenc * e)95 static void txtenc_indent(txtenc* e) {
96 if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
97 int i = e->indent_depth;
98 while (i-- > 0) {
99 txtenc_putstr(e, " ");
100 }
101 }
102 }
103
txtenc_endfield(txtenc * e)104 static void txtenc_endfield(txtenc* e) {
105 if (e->options & UPB_TXTENC_SINGLELINE) {
106 txtenc_putstr(e, " ");
107 } else {
108 txtenc_putstr(e, "\n");
109 }
110 }
111
txtenc_enum(int32_t val,const upb_FieldDef * f,txtenc * e)112 static void txtenc_enum(int32_t val, const upb_FieldDef* f, txtenc* e) {
113 const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
114 const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
115
116 if (ev) {
117 txtenc_printf(e, "%s", upb_EnumValueDef_Name(ev));
118 } else {
119 txtenc_printf(e, "%" PRId32, val);
120 }
121 }
122
txtenc_string(txtenc * e,upb_StringView str,bool bytes)123 static void txtenc_string(txtenc* e, upb_StringView str, bool bytes) {
124 const char* ptr = str.data;
125 const char* end = ptr + str.size;
126 txtenc_putstr(e, "\"");
127
128 while (ptr < end) {
129 switch (*ptr) {
130 case '\n':
131 txtenc_putstr(e, "\\n");
132 break;
133 case '\r':
134 txtenc_putstr(e, "\\r");
135 break;
136 case '\t':
137 txtenc_putstr(e, "\\t");
138 break;
139 case '\"':
140 txtenc_putstr(e, "\\\"");
141 break;
142 case '\'':
143 txtenc_putstr(e, "\\'");
144 break;
145 case '\\':
146 txtenc_putstr(e, "\\\\");
147 break;
148 default:
149 if ((bytes || (uint8_t)*ptr < 0x80) && !isprint(*ptr)) {
150 txtenc_printf(e, "\\%03o", (int)(uint8_t)*ptr);
151 } else {
152 txtenc_putbytes(e, ptr, 1);
153 }
154 break;
155 }
156 ptr++;
157 }
158
159 txtenc_putstr(e, "\"");
160 }
161
txtenc_field(txtenc * e,upb_MessageValue val,const upb_FieldDef * f)162 static void txtenc_field(txtenc* e, upb_MessageValue val,
163 const upb_FieldDef* f) {
164 txtenc_indent(e);
165 const upb_CType type = upb_FieldDef_CType(f);
166 const bool is_ext = upb_FieldDef_IsExtension(f);
167 const char* full = upb_FieldDef_FullName(f);
168 const char* name = upb_FieldDef_Name(f);
169
170 if (type == kUpb_CType_Message) {
171 if (is_ext) {
172 txtenc_printf(e, "[%s] {", full);
173 } else {
174 txtenc_printf(e, "%s {", name);
175 }
176 txtenc_endfield(e);
177 e->indent_depth++;
178 txtenc_msg(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
179 e->indent_depth--;
180 txtenc_indent(e);
181 txtenc_putstr(e, "}");
182 txtenc_endfield(e);
183 return;
184 }
185
186 if (is_ext) {
187 txtenc_printf(e, "[%s]: ", full);
188 } else {
189 txtenc_printf(e, "%s: ", name);
190 }
191
192 switch (type) {
193 case kUpb_CType_Bool:
194 txtenc_putstr(e, val.bool_val ? "true" : "false");
195 break;
196 case kUpb_CType_Float: {
197 char buf[32];
198 _upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
199 txtenc_putstr(e, buf);
200 break;
201 }
202 case kUpb_CType_Double: {
203 char buf[32];
204 _upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
205 txtenc_putstr(e, buf);
206 break;
207 }
208 case kUpb_CType_Int32:
209 txtenc_printf(e, "%" PRId32, val.int32_val);
210 break;
211 case kUpb_CType_UInt32:
212 txtenc_printf(e, "%" PRIu32, val.uint32_val);
213 break;
214 case kUpb_CType_Int64:
215 txtenc_printf(e, "%" PRId64, val.int64_val);
216 break;
217 case kUpb_CType_UInt64:
218 txtenc_printf(e, "%" PRIu64, val.uint64_val);
219 break;
220 case kUpb_CType_String:
221 txtenc_string(e, val.str_val, false);
222 break;
223 case kUpb_CType_Bytes:
224 txtenc_string(e, val.str_val, true);
225 break;
226 case kUpb_CType_Enum:
227 txtenc_enum(val.int32_val, f, e);
228 break;
229 default:
230 UPB_UNREACHABLE();
231 }
232
233 txtenc_endfield(e);
234 }
235
236 /*
237 * Arrays print as simple repeated elements, eg.
238 *
239 * foo_field: 1
240 * foo_field: 2
241 * foo_field: 3
242 */
txtenc_array(txtenc * e,const upb_Array * arr,const upb_FieldDef * f)243 static void txtenc_array(txtenc* e, const upb_Array* arr,
244 const upb_FieldDef* f) {
245 size_t i;
246 size_t size = upb_Array_Size(arr);
247
248 for (i = 0; i < size; i++) {
249 txtenc_field(e, upb_Array_Get(arr, i), f);
250 }
251 }
252
txtenc_mapentry(txtenc * e,upb_MessageValue key,upb_MessageValue val,const upb_FieldDef * f)253 static void txtenc_mapentry(txtenc* e, upb_MessageValue key,
254 upb_MessageValue val, const upb_FieldDef* f) {
255 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
256 const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
257 const upb_FieldDef* val_f = upb_MessageDef_Field(entry, 1);
258 txtenc_indent(e);
259 txtenc_printf(e, "%s {", upb_FieldDef_Name(f));
260 txtenc_endfield(e);
261 e->indent_depth++;
262
263 txtenc_field(e, key, key_f);
264 txtenc_field(e, val, val_f);
265
266 e->indent_depth--;
267 txtenc_indent(e);
268 txtenc_putstr(e, "}");
269 txtenc_endfield(e);
270 }
271
272 /*
273 * Maps print as messages of key/value, etc.
274 *
275 * foo_map: {
276 * key: "abc"
277 * value: 123
278 * }
279 * foo_map: {
280 * key: "def"
281 * value: 456
282 * }
283 */
txtenc_map(txtenc * e,const upb_Map * map,const upb_FieldDef * f)284 static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) {
285 if (e->options & UPB_TXTENC_NOSORT) {
286 size_t iter = kUpb_Map_Begin;
287 upb_MessageValue key, val;
288 while (upb_Map_Next(map, &key, &val, &iter)) {
289 txtenc_mapentry(e, key, val, f);
290 }
291 } else {
292 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
293 const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
294 _upb_sortedmap sorted;
295 upb_MapEntry ent;
296
297 _upb_mapsorter_pushmap(&e->sorter, upb_FieldDef_Type(key_f), map, &sorted);
298 while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
299 upb_MessageValue key, val;
300 memcpy(&key, &ent.data.k, sizeof(key));
301 memcpy(&val, &ent.data.v, sizeof(val));
302 txtenc_mapentry(e, key, val, f);
303 }
304 _upb_mapsorter_popmap(&e->sorter, &sorted);
305 }
306 }
307
308 #define CHK(x) \
309 do { \
310 if (!(x)) { \
311 return false; \
312 } \
313 } while (0)
314
315 /*
316 * Unknown fields are printed by number.
317 *
318 * 1001: 123
319 * 1002: "hello"
320 * 1006: 0xdeadbeef
321 * 1003: {
322 * 1: 111
323 * }
324 */
txtenc_unknown(txtenc * e,const char * ptr,upb_EpsCopyInputStream * stream,int groupnum)325 static const char* txtenc_unknown(txtenc* e, const char* ptr,
326 upb_EpsCopyInputStream* stream,
327 int groupnum) {
328 // We are guaranteed that the unknown data is valid wire format, and will not
329 // contain tag zero.
330 uint32_t end_group = groupnum > 0
331 ? ((groupnum << kUpb_WireReader_WireTypeBits) |
332 kUpb_WireType_EndGroup)
333 : 0;
334
335 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
336 uint32_t tag;
337 CHK(ptr = upb_WireReader_ReadTag(ptr, &tag));
338 if (tag == end_group) return ptr;
339
340 txtenc_indent(e);
341 txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag));
342
343 switch (upb_WireReader_GetWireType(tag)) {
344 case kUpb_WireType_Varint: {
345 uint64_t val;
346 CHK(ptr = upb_WireReader_ReadVarint(ptr, &val));
347 txtenc_printf(e, "%" PRIu64, val);
348 break;
349 }
350 case kUpb_WireType_32Bit: {
351 uint32_t val;
352 ptr = upb_WireReader_ReadFixed32(ptr, &val);
353 txtenc_printf(e, "0x%08" PRIu32, val);
354 break;
355 }
356 case kUpb_WireType_64Bit: {
357 uint64_t val;
358 ptr = upb_WireReader_ReadFixed64(ptr, &val);
359 txtenc_printf(e, "0x%016" PRIu64, val);
360 break;
361 }
362 case kUpb_WireType_Delimited: {
363 int size;
364 char* start = e->ptr;
365 size_t start_overflow = e->overflow;
366 CHK(ptr = upb_WireReader_ReadSize(ptr, &size));
367 CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size));
368
369 // Speculatively try to parse as message.
370 txtenc_putstr(e, "{");
371 txtenc_endfield(e);
372
373 // EpsCopyInputStream can't back up, so create a sub-stream for the
374 // speculative parse.
375 upb_EpsCopyInputStream sub_stream;
376 const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr);
377 upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true);
378
379 e->indent_depth++;
380 if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) {
381 ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size);
382 e->indent_depth--;
383 txtenc_indent(e);
384 txtenc_putstr(e, "}");
385 } else {
386 // Didn't work out, print as raw bytes.
387 e->indent_depth--;
388 e->ptr = start;
389 e->overflow = start_overflow;
390 const char* str = ptr;
391 ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL);
392 assert(ptr);
393 txtenc_string(e, (upb_StringView){.data = str, .size = size}, true);
394 }
395 break;
396 }
397 case kUpb_WireType_StartGroup:
398 txtenc_putstr(e, "{");
399 txtenc_endfield(e);
400 e->indent_depth++;
401 CHK(ptr = txtenc_unknown(e, ptr, stream,
402 upb_WireReader_GetFieldNumber(tag)));
403 e->indent_depth--;
404 txtenc_indent(e);
405 txtenc_putstr(e, "}");
406 break;
407 default:
408 return NULL;
409 }
410 txtenc_endfield(e);
411 }
412
413 return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL;
414 }
415
416 #undef CHK
417
txtenc_msg(txtenc * e,const upb_Message * msg,const upb_MessageDef * m)418 static void txtenc_msg(txtenc* e, const upb_Message* msg,
419 const upb_MessageDef* m) {
420 size_t iter = kUpb_Message_Begin;
421 const upb_FieldDef* f;
422 upb_MessageValue val;
423
424 while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
425 if (upb_FieldDef_IsMap(f)) {
426 txtenc_map(e, val.map_val, f);
427 } else if (upb_FieldDef_IsRepeated(f)) {
428 txtenc_array(e, val.array_val, f);
429 } else {
430 txtenc_field(e, val, f);
431 }
432 }
433
434 if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
435 size_t size;
436 const char* ptr = upb_Message_GetUnknown(msg, &size);
437 if (size != 0) {
438 char* start = e->ptr;
439 upb_EpsCopyInputStream stream;
440 upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
441 if (!txtenc_unknown(e, ptr, &stream, -1)) {
442 /* Unknown failed to parse, back up and don't print it at all. */
443 e->ptr = start;
444 }
445 }
446 }
447 }
448
txtenc_nullz(txtenc * e,size_t size)449 size_t txtenc_nullz(txtenc* e, size_t size) {
450 size_t ret = e->ptr - e->buf + e->overflow;
451
452 if (size > 0) {
453 if (e->ptr == e->end) e->ptr--;
454 *e->ptr = '\0';
455 }
456
457 return ret;
458 }
459
upb_TextEncode(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,int options,char * buf,size_t size)460 size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
461 const upb_DefPool* ext_pool, int options, char* buf,
462 size_t size) {
463 txtenc e;
464
465 e.buf = buf;
466 e.ptr = buf;
467 e.end = UPB_PTRADD(buf, size);
468 e.overflow = 0;
469 e.indent_depth = 0;
470 e.options = options;
471 e.ext_pool = ext_pool;
472 _upb_mapsorter_init(&e.sorter);
473
474 txtenc_msg(&e, msg, m);
475 _upb_mapsorter_destroy(&e.sorter);
476 return txtenc_nullz(&e, size);
477 }
478