1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifndef UPB_WIRE_READER_H_
29 #define UPB_WIRE_READER_H_
30 
31 #include "upb/wire/eps_copy_input_stream.h"
32 #include "upb/wire/swap_internal.h"
33 #include "upb/wire/types.h"
34 
35 // Must be last.
36 #include "upb/port/def.inc"
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 // The upb_WireReader interface is suitable for general-purpose parsing of
43 // protobuf binary wire format.  It is designed to be used along with
44 // upb_EpsCopyInputStream for buffering, and all parsing routines in this file
45 // assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is
46 // available to read without any bounds checks.
47 
48 #define kUpb_WireReader_WireTypeMask 7
49 #define kUpb_WireReader_WireTypeBits 3
50 
51 typedef struct {
52   const char* ptr;
53   uint64_t val;
54 } _upb_WireReader_ReadLongVarintRet;
55 
56 _upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint(
57     const char* ptr, uint64_t val);
58 
_upb_WireReader_ReadVarint(const char * ptr,uint64_t * val,int maxlen,uint64_t maxval)59 static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr,
60                                                               uint64_t* val,
61                                                               int maxlen,
62                                                               uint64_t maxval) {
63   uint64_t byte = (uint8_t)*ptr;
64   if (UPB_LIKELY((byte & 0x80) == 0)) {
65     *val = (uint32_t)byte;
66     return ptr + 1;
67   }
68   const char* start = ptr;
69   _upb_WireReader_ReadLongVarintRet res =
70       _upb_WireReader_ReadLongVarint(ptr, byte);
71   if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) ||
72       res.val > maxval) {
73     return NULL;  // Malformed.
74   }
75   *val = res.val;
76   return res.ptr;
77 }
78 
79 // Parses a tag into `tag`, and returns a pointer past the end of the tag, or
80 // NULL if there was an error in the tag data.
81 //
82 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
83 // Bounds checks must be performed before calling this function, preferably
84 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadTag(const char * ptr,uint32_t * tag)85 static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr,
86                                                           uint32_t* tag) {
87   uint64_t val;
88   ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX);
89   if (!ptr) return NULL;
90   *tag = val;
91   return ptr;
92 }
93 
94 // Given a tag, returns the field number.
upb_WireReader_GetFieldNumber(uint32_t tag)95 UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) {
96   return tag >> kUpb_WireReader_WireTypeBits;
97 }
98 
99 // Given a tag, returns the wire type.
upb_WireReader_GetWireType(uint32_t tag)100 UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) {
101   return tag & kUpb_WireReader_WireTypeMask;
102 }
103 
upb_WireReader_ReadVarint(const char * ptr,uint64_t * val)104 UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr,
105                                                  uint64_t* val) {
106   return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX);
107 }
108 
109 // Skips data for a varint, returning a pointer past the end of the varint, or
110 // NULL if there was an error in the varint data.
111 //
112 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
113 // Bounds checks must be performed before calling this function, preferably
114 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_SkipVarint(const char * ptr)115 UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) {
116   uint64_t val;
117   return upb_WireReader_ReadVarint(ptr, &val);
118 }
119 
120 // Reads a varint indicating the size of a delimited field into `size`, or
121 // NULL if there was an error in the varint data.
122 //
123 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
124 // Bounds checks must be performed before calling this function, preferably
125 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadSize(const char * ptr,int * size)126 UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) {
127   uint64_t size64;
128   ptr = upb_WireReader_ReadVarint(ptr, &size64);
129   if (!ptr || size64 >= INT32_MAX) return NULL;
130   *size = size64;
131   return ptr;
132 }
133 
134 // Reads a fixed32 field, performing byte swapping if necessary.
135 //
136 // REQUIRES: there must be at least 4 bytes of data available at `ptr`.
137 // Bounds checks must be performed before calling this function, preferably
138 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadFixed32(const char * ptr,void * val)139 UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) {
140   uint32_t uval;
141   memcpy(&uval, ptr, 4);
142   uval = _upb_BigEndian_Swap32(uval);
143   memcpy(val, &uval, 4);
144   return ptr + 4;
145 }
146 
147 // Reads a fixed64 field, performing byte swapping if necessary.
148 //
149 // REQUIRES: there must be at least 4 bytes of data available at `ptr`.
150 // Bounds checks must be performed before calling this function, preferably
151 // by calling upb_EpsCopyInputStream_IsDone().
upb_WireReader_ReadFixed64(const char * ptr,void * val)152 UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) {
153   uint64_t uval;
154   memcpy(&uval, ptr, 8);
155   uval = _upb_BigEndian_Swap64(uval);
156   memcpy(val, &uval, 8);
157   return ptr + 8;
158 }
159 
160 const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag,
161                                       int depth_limit,
162                                       upb_EpsCopyInputStream* stream);
163 
164 // Skips data for a group, returning a pointer past the end of the group, or
165 // NULL if there was an error parsing the group.  The `tag` argument should be
166 // the start group tag that begins the group.  The `depth_limit` argument
167 // indicates how many levels of recursion the group is allowed to have before
168 // reporting a parse error (this limit exists to protect against stack
169 // overflow).
170 //
171 // TODO: evaluate how the depth_limit should be specified. Do users need
172 // control over this?
upb_WireReader_SkipGroup(const char * ptr,uint32_t tag,upb_EpsCopyInputStream * stream)173 UPB_INLINE const char* upb_WireReader_SkipGroup(
174     const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
175   return _upb_WireReader_SkipGroup(ptr, tag, 100, stream);
176 }
177 
_upb_WireReader_SkipValue(const char * ptr,uint32_t tag,int depth_limit,upb_EpsCopyInputStream * stream)178 UPB_INLINE const char* _upb_WireReader_SkipValue(
179     const char* ptr, uint32_t tag, int depth_limit,
180     upb_EpsCopyInputStream* stream) {
181   switch (upb_WireReader_GetWireType(tag)) {
182     case kUpb_WireType_Varint:
183       return upb_WireReader_SkipVarint(ptr);
184     case kUpb_WireType_32Bit:
185       return ptr + 4;
186     case kUpb_WireType_64Bit:
187       return ptr + 8;
188     case kUpb_WireType_Delimited: {
189       int size;
190       ptr = upb_WireReader_ReadSize(ptr, &size);
191       if (!ptr) return NULL;
192       ptr += size;
193       return ptr;
194     }
195     case kUpb_WireType_StartGroup:
196       return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream);
197     case kUpb_WireType_EndGroup:
198       return NULL;  // Should be handled before now.
199     default:
200       return NULL;  // Unknown wire type.
201   }
202 }
203 
204 // Skips data for a wire value of any type, returning a pointer past the end of
205 // the data, or NULL if there was an error parsing the group. The `tag` argument
206 // should be the tag that was just parsed. The `depth_limit` argument indicates
207 // how many levels of recursion a group is allowed to have before reporting a
208 // parse error (this limit exists to protect against stack overflow).
209 //
210 // REQUIRES: there must be at least 10 bytes of data available at `ptr`.
211 // Bounds checks must be performed before calling this function, preferably
212 // by calling upb_EpsCopyInputStream_IsDone().
213 //
214 // TODO: evaluate how the depth_limit should be specified. Do users need
215 // control over this?
upb_WireReader_SkipValue(const char * ptr,uint32_t tag,upb_EpsCopyInputStream * stream)216 UPB_INLINE const char* upb_WireReader_SkipValue(
217     const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) {
218   return _upb_WireReader_SkipValue(ptr, tag, 100, stream);
219 }
220 
221 #ifdef __cplusplus
222 } /* extern "C" */
223 #endif
224 
225 #include "upb/port/undef.inc"
226 
227 #endif  // UPB_WIRE_READER_H_
228