1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifndef UPB_WIRE_READER_H_
29 #define UPB_WIRE_READER_H_
30
31 #include "upb/wire/eps_copy_input_stream.h"
32 #include "upb/wire/swap_internal.h"
33 #include "upb/wire/types.h"
34
35 // Must be last.
36 #include "upb/port/def.inc"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 // The upb_WireReader interface is suitable for general-purpose parsing of
43 // protobuf binary wire format. It is designed to be used along with
44 // upb_EpsCopyInputStream for buffering, and all parsing routines in this file
45 // assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is
46 // available to read without any bounds checks.
47
48 #define kUpb_WireReader_WireTypeMask 7
49 #define kUpb_WireReader_WireTypeBits 3
50
51 typedef struct {
52 const char* ptr;
53 uint64_t val;
54 } _upb_WireReader_ReadLongVarintRet;
55
56 _upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint(
57 const char* ptr, uint64_t val);
58
_upb_WireReader_ReadVarint(const char * ptr,uint64_t * val,int maxlen,uint64_t maxval)59 static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr,
60 uint64_t* val,
61 int maxlen,
62 uint64_t maxval) {
63 uint64_t byte = (uint8_t)*ptr;
64 if (UPB_LIKELY((byte & 0x80) == 0)) {
65 *val = (uint32_t)byte;
66 return ptr + 1;
67 }
68 const char* start = ptr;
69 _upb_WireReader_ReadLongVarintRet res =
70 _upb_WireReader_ReadLongVarint(ptr, byte);
71 if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) ||
72 res.val > maxval) {
73 return NULL; // Malformed.
74 }
75 *val = res.val;
76 return res.ptr;
77 }
78
79 // Parses a tag into `tag`, and returns a pointer past the end of the tag, or
80 // NULL if there was an error in the tag data.
81 //
82 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
83 // Bounds checks must be performed before calling this function, preferably
84 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadTag(const char * ptr,uint32_t * tag)85 static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr,
86 uint32_t* tag) {
87 uint64_t val;
88 ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX);
89 if (!ptr) return NULL;
90 *tag = val;
91 return ptr;
92 }
93
94 // Given a tag, returns the field number.
upb_WireReader_GetFieldNumber(uint32_t tag)95 UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) {
96 return tag >> kUpb_WireReader_WireTypeBits;
97 }
98
99 // Given a tag, returns the wire type.
upb_WireReader_GetWireType(uint32_t tag)100 UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) {
101 return tag & kUpb_WireReader_WireTypeMask;
102 }
103
upb_WireReader_ReadVarint(const char * ptr,uint64_t * val)104 UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr,
105 uint64_t* val) {
106 return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX);
107 }
108
109 // Skips data for a varint, returning a pointer past the end of the varint, or
110 // NULL if there was an error in the varint data.
111 //
112 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
113 // Bounds checks must be performed before calling this function, preferably
114 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_SkipVarint(const char * ptr)115 UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) {
116 uint64_t val;
117 return upb_WireReader_ReadVarint(ptr, &val);
118 }
119
120 // Reads a varint indicating the size of a delimited field into `size`, or
121 // NULL if there was an error in the varint data.
122 //
123 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
124 // Bounds checks must be performed before calling this function, preferably
125 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadSize(const char * ptr,int * size)126 UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) {
127 uint64_t size64;
128 ptr = upb_WireReader_ReadVarint(ptr, &size64);
129 if (!ptr || size64 >= INT32_MAX) return NULL;
130 *size = size64;
131 return ptr;
132 }
133
134 // Reads a fixed32 field, performing byte swapping if necessary.
135 //
136 // REQUIRES: there must be at least 4 bytes of data available at `ptr`.
137 // Bounds checks must be performed before calling this function, preferably
138 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadFixed32(const char * ptr,void * val)139 UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) {
140 uint32_t uval;
141 memcpy(&uval, ptr, 4);
142 uval = _upb_BigEndian_Swap32(uval);
143 memcpy(val, &uval, 4);
144 return ptr + 4;
145 }
146
147 // Reads a fixed64 field, performing byte swapping if necessary.
148 //
149 // REQUIRES: there must be at least 4 bytes of data available at `ptr`.
150 // Bounds checks must be performed before calling this function, preferably
151 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadFixed64(const char * ptr,void * val)152 UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) {
153 uint64_t uval;
154 memcpy(&uval, ptr, 8);
155 uval = _upb_BigEndian_Swap64(uval);
156 memcpy(val, &uval, 8);
157 return ptr + 8;
158 }
159
160 const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag,
161 int depth_limit,
162 upb_EpsCopyInputStream* stream);
163
164 // Skips data for a group, returning a pointer past the end of the group, or
165 // NULL if there was an error parsing the group. The `tag` argument should be
166 // the start group tag that begins the group. The `depth_limit` argument
167 // indicates how many levels of recursion the group is allowed to have before
168 // reporting a parse error (this limit exists to protect against stack
169 // overflow).
170 //
171 // TODO: evaluate how the depth_limit should be specified. Do users need
172 // control over this?
upb_WireReader_SkipGroup(const char * ptr,uint32_t tag,upb_EpsCopyInputStream * stream)173 UPB_INLINE const char* upb_WireReader_SkipGroup(
174 const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
175 return _upb_WireReader_SkipGroup(ptr, tag, 100, stream);
176 }
177
_upb_WireReader_SkipValue(const char * ptr,uint32_t tag,int depth_limit,upb_EpsCopyInputStream * stream)178 UPB_INLINE const char* _upb_WireReader_SkipValue(
179 const char* ptr, uint32_t tag, int depth_limit,
180 upb_EpsCopyInputStream* stream) {
181 switch (upb_WireReader_GetWireType(tag)) {
182 case kUpb_WireType_Varint:
183 return upb_WireReader_SkipVarint(ptr);
184 case kUpb_WireType_32Bit:
185 return ptr + 4;
186 case kUpb_WireType_64Bit:
187 return ptr + 8;
188 case kUpb_WireType_Delimited: {
189 int size;
190 ptr = upb_WireReader_ReadSize(ptr, &size);
191 if (!ptr) return NULL;
192 ptr += size;
193 return ptr;
194 }
195 case kUpb_WireType_StartGroup:
196 return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream);
197 case kUpb_WireType_EndGroup:
198 return NULL; // Should be handled before now.
199 default:
200 return NULL; // Unknown wire type.
201 }
202 }
203
204 // Skips data for a wire value of any type, returning a pointer past the end of
205 // the data, or NULL if there was an error parsing the group. The `tag` argument
206 // should be the tag that was just parsed. The `depth_limit` argument indicates
207 // how many levels of recursion a group is allowed to have before reporting a
208 // parse error (this limit exists to protect against stack overflow).
209 //
210 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
211 // Bounds checks must be performed before calling this function, preferably
212 // by calling upb_EpsCopyInputStream_IsDone().
213 //
214 // TODO: evaluate how the depth_limit should be specified. Do users need
215 // control over this?
upb_WireReader_SkipValue(const char * ptr,uint32_t tag,upb_EpsCopyInputStream * stream)216 UPB_INLINE const char* upb_WireReader_SkipValue(
217 const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
218 return _upb_WireReader_SkipValue(ptr, tag, 100, stream);
219 }
220
221 #ifdef __cplusplus
222 } /* extern "C" */
223 #endif
224
225 #include "upb/port/undef.inc"
226
227 #endif // UPB_WIRE_READER_H_
228