1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // upb_decode: parsing into a upb_Message using a upb_MiniTable.
9
10 #ifndef UPB_WIRE_DECODE_H_
11 #define UPB_WIRE_DECODE_H_
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include "upb/mem/arena.h"
17 #include "upb/message/message.h"
18 #include "upb/mini_table/extension_registry.h"
19 #include "upb/mini_table/message.h"
20
21 // Must be last.
22 #include "upb/port/def.inc"
23
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27
28 enum {
29 /* If set, strings will alias the input buffer instead of copying into the
30 * arena. */
31 kUpb_DecodeOption_AliasString = 1,
32
33 /* If set, the parse will return failure if any message is missing any
34 * required fields when the message data ends. The parse will still continue,
35 * and the failure will only be reported at the end.
36 *
37 * IMPORTANT CAVEATS:
38 *
39 * 1. This can throw a false positive failure if an incomplete message is seen
40 * on the wire but is later completed when the sub-message occurs again.
41 * For this reason, a second pass is required to verify a failure, to be
42 * truly robust.
43 *
44 * 2. This can return a false success if you are decoding into a message that
45 * already has some sub-message fields present. If the sub-message does
46 * not occur in the binary payload, we will never visit it and discover the
47 * incomplete sub-message. For this reason, this check is only useful for
48 * implemting ParseFromString() semantics. For MergeFromString(), a
49 * post-parse validation step will always be necessary. */
50 kUpb_DecodeOption_CheckRequired = 2,
51
52 /* EXPERIMENTAL:
53 *
54 * If set, the parser will allow parsing of sub-message fields that were not
55 * previously linked using upb_MiniTable_SetSubMessage(). The data will be
56 * parsed into an internal "empty" message type that cannot be accessed
57 * directly, but can be later promoted into the true message type if the
58 * sub-message fields are linked at a later time.
59 *
60 * Users should set this option if they intend to perform dynamic tree shaking
61 * and promoting using the interfaces in message/promote.h. If this option is
62 * enabled, it is important that the resulting messages are only accessed by
63 * code that is aware of promotion rules:
64 *
65 * 1. Message pointers in upb_Message, upb_Array, and upb_Map are represented
66 * by a tagged pointer upb_TaggedMessagePointer. The tag indicates whether
67 * the message uses the internal "empty" type.
68 *
69 * 2. Any code *reading* these message pointers must test whether the "empty"
70 * tag bit is set, using the interfaces in mini_table/types.h. However
71 * writing of message pointers should always use plain upb_Message*, since
72 * users are not allowed to create "empty" messages.
73 *
74 * 3. It is always safe to test whether a field is present or test the array
75 * length; these interfaces will reflect that empty messages are present,
76 * even though their data cannot be accessed without promoting first.
77 *
78 * 4. If a message pointer is indeed tagged as empty, the message may not be
79 * accessed directly, only promoted through the interfaces in
80 * message/promote.h.
81 *
82 * 5. Tagged/empty messages may never be created by the user. They may only
83 * be created by the parser or the message-copying logic in message/copy.h.
84 */
85 kUpb_DecodeOption_ExperimentalAllowUnlinked = 4,
86
87 /* EXPERIMENTAL:
88 *
89 * If set, decoding will enforce UTF-8 validation for string fields, even for
90 * proto2 or fields with `features.utf8_validation = NONE`. Normally, only
91 * proto3 string fields will be validated for UTF-8. Decoding will return
92 * kUpb_DecodeStatus_BadUtf8 for non-UTF-8 strings, which is the same behavior
93 * as non-UTF-8 proto3 string fields.
94 */
95 kUpb_DecodeOption_AlwaysValidateUtf8 = 8,
96 };
97
upb_DecodeOptions_MaxDepth(uint16_t depth)98 UPB_INLINE uint32_t upb_DecodeOptions_MaxDepth(uint16_t depth) {
99 return (uint32_t)depth << 16;
100 }
101
upb_DecodeOptions_GetMaxDepth(uint32_t options)102 UPB_INLINE uint16_t upb_DecodeOptions_GetMaxDepth(uint32_t options) {
103 return options >> 16;
104 }
105
106 // Enforce an upper bound on recursion depth.
upb_Decode_LimitDepth(uint32_t decode_options,uint32_t limit)107 UPB_INLINE int upb_Decode_LimitDepth(uint32_t decode_options, uint32_t limit) {
108 uint32_t max_depth = upb_DecodeOptions_GetMaxDepth(decode_options);
109 if (max_depth > limit) max_depth = limit;
110 return upb_DecodeOptions_MaxDepth(max_depth) | (decode_options & 0xffff);
111 }
112
113 typedef enum {
114 kUpb_DecodeStatus_Ok = 0,
115 kUpb_DecodeStatus_Malformed = 1, // Wire format was corrupt
116 kUpb_DecodeStatus_OutOfMemory = 2, // Arena alloc failed
117 kUpb_DecodeStatus_BadUtf8 = 3, // String field had bad UTF-8
118 kUpb_DecodeStatus_MaxDepthExceeded =
119 4, // Exceeded upb_DecodeOptions_MaxDepth
120
121 // kUpb_DecodeOption_CheckRequired failed (see above), but the parse otherwise
122 // succeeded.
123 kUpb_DecodeStatus_MissingRequired = 5,
124
125 // Unlinked sub-message field was present, but
126 // kUpb_DecodeOptions_ExperimentalAllowUnlinked was not specified in the list
127 // of options.
128 kUpb_DecodeStatus_UnlinkedSubMessage = 6,
129 } upb_DecodeStatus;
130
131 UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size,
132 upb_Message* msg, const upb_MiniTable* l,
133 const upb_ExtensionRegistry* extreg,
134 int options, upb_Arena* arena);
135
136 #ifdef __cplusplus
137 } /* extern "C" */
138 #endif
139
140 #include "upb/port/undef.inc"
141
142 #endif /* UPB_WIRE_DECODE_H_ */
143