xref: /aosp_15_r20/external/grpc-grpc/third_party/upb/upb/wire/decode.h (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // upb_decode: parsing into a upb_Message using a upb_MiniTable.
9 
10 #ifndef UPB_WIRE_DECODE_H_
11 #define UPB_WIRE_DECODE_H_
12 
13 #include <stddef.h>
14 #include <stdint.h>
15 
16 #include "upb/mem/arena.h"
17 #include "upb/message/message.h"
18 #include "upb/mini_table/extension_registry.h"
19 #include "upb/mini_table/message.h"
20 
21 // Must be last.
22 #include "upb/port/def.inc"
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27 
28 enum {
29   /* If set, strings will alias the input buffer instead of copying into the
30    * arena. */
31   kUpb_DecodeOption_AliasString = 1,
32 
33   /* If set, the parse will return failure if any message is missing any
34    * required fields when the message data ends.  The parse will still continue,
35    * and the failure will only be reported at the end.
36    *
37    * IMPORTANT CAVEATS:
38    *
39    * 1. This can throw a false positive failure if an incomplete message is seen
40    *    on the wire but is later completed when the sub-message occurs again.
41    *    For this reason, a second pass is required to verify a failure, to be
42    *    truly robust.
43    *
44    * 2. This can return a false success if you are decoding into a message that
45    *    already has some sub-message fields present.  If the sub-message does
46    *    not occur in the binary payload, we will never visit it and discover the
47    *    incomplete sub-message.  For this reason, this check is only useful for
48    *    implemting ParseFromString() semantics.  For MergeFromString(), a
49    *    post-parse validation step will always be necessary. */
50   kUpb_DecodeOption_CheckRequired = 2,
51 
52   /* EXPERIMENTAL:
53    *
54    * If set, the parser will allow parsing of sub-message fields that were not
55    * previously linked using upb_MiniTable_SetSubMessage().  The data will be
56    * parsed into an internal "empty" message type that cannot be accessed
57    * directly, but can be later promoted into the true message type if the
58    * sub-message fields are linked at a later time.
59    *
60    * Users should set this option if they intend to perform dynamic tree shaking
61    * and promoting using the interfaces in message/promote.h.  If this option is
62    * enabled, it is important that the resulting messages are only accessed by
63    * code that is aware of promotion rules:
64    *
65    * 1. Message pointers in upb_Message, upb_Array, and upb_Map are represented
66    *    by a tagged pointer upb_TaggedMessagePointer.  The tag indicates whether
67    *    the message uses the internal "empty" type.
68    *
69    * 2. Any code *reading* these message pointers must test whether the "empty"
70    *    tag bit is set, using the interfaces in mini_table/types.h.  However
71    *    writing of message pointers should always use plain upb_Message*, since
72    *    users are not allowed to create "empty" messages.
73    *
74    * 3. It is always safe to test whether a field is present or test the array
75    *    length; these interfaces will reflect that empty messages are present,
76    *    even though their data cannot be accessed without promoting first.
77    *
78    * 4. If a message pointer is indeed tagged as empty, the message may not be
79    *    accessed directly, only promoted through the interfaces in
80    *    message/promote.h.
81    *
82    * 5. Tagged/empty messages may never be created by the user.  They may only
83    *    be created by the parser or the message-copying logic in message/copy.h.
84    */
85   kUpb_DecodeOption_ExperimentalAllowUnlinked = 4,
86 
87   /* EXPERIMENTAL:
88    *
89    * If set, decoding will enforce UTF-8 validation for string fields, even for
90    * proto2 or fields with `features.utf8_validation = NONE`. Normally, only
91    * proto3 string fields will be validated for UTF-8. Decoding will return
92    * kUpb_DecodeStatus_BadUtf8 for non-UTF-8 strings, which is the same behavior
93    * as non-UTF-8 proto3 string fields.
94    */
95   kUpb_DecodeOption_AlwaysValidateUtf8 = 8,
96 };
97 
upb_DecodeOptions_MaxDepth(uint16_t depth)98 UPB_INLINE uint32_t upb_DecodeOptions_MaxDepth(uint16_t depth) {
99   return (uint32_t)depth << 16;
100 }
101 
upb_DecodeOptions_GetMaxDepth(uint32_t options)102 UPB_INLINE uint16_t upb_DecodeOptions_GetMaxDepth(uint32_t options) {
103   return options >> 16;
104 }
105 
106 // Enforce an upper bound on recursion depth.
upb_Decode_LimitDepth(uint32_t decode_options,uint32_t limit)107 UPB_INLINE int upb_Decode_LimitDepth(uint32_t decode_options, uint32_t limit) {
108   uint32_t max_depth = upb_DecodeOptions_GetMaxDepth(decode_options);
109   if (max_depth > limit) max_depth = limit;
110   return upb_DecodeOptions_MaxDepth(max_depth) | (decode_options & 0xffff);
111 }
112 
113 typedef enum {
114   kUpb_DecodeStatus_Ok = 0,
115   kUpb_DecodeStatus_Malformed = 1,    // Wire format was corrupt
116   kUpb_DecodeStatus_OutOfMemory = 2,  // Arena alloc failed
117   kUpb_DecodeStatus_BadUtf8 = 3,      // String field had bad UTF-8
118   kUpb_DecodeStatus_MaxDepthExceeded =
119       4,  // Exceeded upb_DecodeOptions_MaxDepth
120 
121   // kUpb_DecodeOption_CheckRequired failed (see above), but the parse otherwise
122   // succeeded.
123   kUpb_DecodeStatus_MissingRequired = 5,
124 
125   // Unlinked sub-message field was present, but
126   // kUpb_DecodeOptions_ExperimentalAllowUnlinked was not specified in the list
127   // of options.
128   kUpb_DecodeStatus_UnlinkedSubMessage = 6,
129 } upb_DecodeStatus;
130 
131 UPB_API upb_DecodeStatus upb_Decode(const char* buf, size_t size,
132                                     upb_Message* msg, const upb_MiniTable* l,
133                                     const upb_ExtensionRegistry* extreg,
134                                     int options, upb_Arena* arena);
135 
136 #ifdef __cplusplus
137 } /* extern "C" */
138 #endif
139 
140 #include "upb/port/undef.inc"
141 
142 #endif /* UPB_WIRE_DECODE_H_ */
143