1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
29 #define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
30
31 #include <string.h>
32
33 #include "upb/mem/arena.h"
34
35 // Must be last.
36 #include "upb/port/def.inc"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 // The maximum number of bytes a single protobuf field can take up in the
43 // wire format. We only want to do one bounds check per field, so the input
44 // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
45 // the decoder can read this many bytes without performing another bounds
46 // check. The stream will copy into a patch buffer as necessary to guarantee
47 // this invariant.
48 #define kUpb_EpsCopyInputStream_SlopBytes 16
49
50 enum {
51 kUpb_EpsCopyInputStream_NoAliasing = 0,
52 kUpb_EpsCopyInputStream_OnPatch = 1,
53 kUpb_EpsCopyInputStream_NoDelta = 2
54 };
55
56 typedef struct {
57 const char* end; // Can read up to SlopBytes bytes beyond this.
58 const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
59 uintptr_t aliasing;
60 int limit; // Submessage limit relative to end
61 bool error; // To distinguish between EOF and error.
62 char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
63 } upb_EpsCopyInputStream;
64
65 // Returns true if the stream is in the error state. A stream enters the error
66 // state when the user reads past a limit (caught in IsDone()) or the
67 // ZeroCopyInputStream returns an error.
upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream * e)68 UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
69 return e->error;
70 }
71
72 typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
73 upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
74
75 typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
76 upb_EpsCopyInputStream* e, const char* ptr, int overrun);
77
78 // Initializes a upb_EpsCopyInputStream using the contents of the buffer
79 // [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
80 // kUpb_EpsCopyInputStream_SlopBytes are available to read.
upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,bool enable_aliasing)81 UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
82 const char** ptr, size_t size,
83 bool enable_aliasing) {
84 if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
85 memset(&e->patch, 0, 32);
86 if (size) memcpy(&e->patch, *ptr, size);
87 e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
88 : kUpb_EpsCopyInputStream_NoAliasing;
89 *ptr = e->patch;
90 e->end = *ptr + size;
91 e->limit = 0;
92 } else {
93 e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
94 e->limit = kUpb_EpsCopyInputStream_SlopBytes;
95 e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
96 : kUpb_EpsCopyInputStream_NoAliasing;
97 }
98 e->limit_ptr = e->end;
99 e->error = false;
100 }
101
102 typedef enum {
103 // The current stream position is at a limit.
104 kUpb_IsDoneStatus_Done,
105
106 // The current stream position is not at a limit.
107 kUpb_IsDoneStatus_NotDone,
108
109 // The current stream position is not at a limit, and the stream needs to
110 // be flipped to a new buffer before more data can be read.
111 kUpb_IsDoneStatus_NeedFallback,
112 } upb_IsDoneStatus;
113
114 // Returns the status of the current stream position. This is a low-level
115 // function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
upb_EpsCopyInputStream_IsDoneStatus(upb_EpsCopyInputStream * e,const char * ptr,int * overrun)116 UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
117 upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
118 *overrun = ptr - e->end;
119 if (UPB_LIKELY(ptr < e->limit_ptr)) {
120 return kUpb_IsDoneStatus_NotDone;
121 } else if (UPB_LIKELY(*overrun == e->limit)) {
122 return kUpb_IsDoneStatus_Done;
123 } else {
124 return kUpb_IsDoneStatus_NeedFallback;
125 }
126 }
127
128 // Returns true if the stream has hit a limit, either the current delimited
129 // limit or the overall end-of-stream. As a side effect, this function may flip
130 // the pointer to a new buffer if there are less than
131 // kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
132 //
133 // Postcondition: if the function returns false, there are at least
134 // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
upb_EpsCopyInputStream_IsDoneWithCallback(upb_EpsCopyInputStream * e,const char ** ptr,upb_EpsCopyInputStream_IsDoneFallbackFunc * func)135 UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
136 upb_EpsCopyInputStream* e, const char** ptr,
137 upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
138 int overrun;
139 switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
140 case kUpb_IsDoneStatus_Done:
141 return true;
142 case kUpb_IsDoneStatus_NotDone:
143 return false;
144 case kUpb_IsDoneStatus_NeedFallback:
145 *ptr = func(e, *ptr, overrun);
146 return *ptr == NULL;
147 }
148 UPB_UNREACHABLE();
149 }
150
151 const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
152 upb_EpsCopyInputStream* e, const char* ptr, int overrun);
153
154 // A simpler version of IsDoneWithCallback() that does not support a buffer flip
155 // callback. Useful in cases where we do not need to insert custom logic at
156 // every buffer flip.
157 //
158 // If this returns true, the user must call upb_EpsCopyInputStream_IsError()
159 // to distinguish between EOF and error.
upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream * e,const char ** ptr)160 UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
161 const char** ptr) {
162 return upb_EpsCopyInputStream_IsDoneWithCallback(
163 e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
164 }
165
166 // Returns the total number of bytes that are safe to read from the current
167 // buffer without reading uninitialized or unallocated memory.
168 //
169 // Note that this check does not respect any semantic limits on the stream,
170 // either limits from PushLimit() or the overall stream end, so some of these
171 // bytes may have unpredictable, nonsense values in them. The guarantee is only
172 // that the bytes are valid to read from the perspective of the C language
173 // (ie. you can read without triggering UBSAN or ASAN).
upb_EpsCopyInputStream_BytesAvailable(upb_EpsCopyInputStream * e,const char * ptr)174 UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
175 upb_EpsCopyInputStream* e, const char* ptr) {
176 return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
177 }
178
179 // Returns true if the given delimited field size is valid (it does not extend
180 // beyond any previously-pushed limits). `ptr` should point to the beginning
181 // of the field data, after the delimited size.
182 //
183 // Note that this does *not* guarantee that all of the data for this field is in
184 // the current buffer.
upb_EpsCopyInputStream_CheckSize(const upb_EpsCopyInputStream * e,const char * ptr,int size)185 UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
186 const upb_EpsCopyInputStream* e, const char* ptr, int size) {
187 UPB_ASSERT(size >= 0);
188 return ptr - e->end + size <= e->limit;
189 }
190
_upb_EpsCopyInputStream_CheckSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size,bool submessage)191 UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
192 upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
193 // This is one extra branch compared to the more normal:
194 // return (size_t)(end - ptr) < size;
195 // However it is one less computation if we are just about to use "ptr + len":
196 // https://godbolt.org/z/35YGPz
197 // In microbenchmarks this shows a small improvement.
198 uintptr_t uptr = (uintptr_t)ptr;
199 uintptr_t uend = (uintptr_t)e->limit_ptr;
200 uintptr_t res = uptr + (size_t)size;
201 if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
202 // NOTE: this check depends on having a linear address space. This is not
203 // technically guaranteed by uintptr_t.
204 bool ret = res >= uptr && res <= uend;
205 if (size < 0) UPB_ASSERT(!ret);
206 return ret;
207 }
208
209 // Returns true if the given delimited field size is valid (it does not extend
210 // beyond any previously-pushed limited) *and* all of the data for this field is
211 // available to be read in the current buffer.
212 //
213 // If the size is negative, this function will always return false. This
214 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckDataSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)215 UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
216 upb_EpsCopyInputStream* e, const char* ptr, int size) {
217 return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
218 }
219
220 // Returns true if the given sub-message size is valid (it does not extend
221 // beyond any previously-pushed limited) *and* all of the data for this
222 // sub-message is available to be parsed in the current buffer.
223 //
224 // This implies that all fields from the sub-message can be parsed from the
225 // current buffer while maintaining the invariant that we always have at least
226 // kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
227 // any individual field start.
228 //
229 // If the size is negative, this function will always return false. This
230 // property can be useful in some cases.
upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(upb_EpsCopyInputStream * e,const char * ptr,int size)231 UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
232 upb_EpsCopyInputStream* e, const char* ptr, int size) {
233 return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
234 }
235
236 // Returns true if aliasing_enabled=true was passed to
237 // upb_EpsCopyInputStream_Init() when this stream was initialized.
upb_EpsCopyInputStream_AliasingEnabled(upb_EpsCopyInputStream * e)238 UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
239 upb_EpsCopyInputStream* e) {
240 return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
241 }
242
243 // Returns true if aliasing_enabled=true was passed to
244 // upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
245 // alias into the region [ptr, size] in an input buffer.
upb_EpsCopyInputStream_AliasingAvailable(upb_EpsCopyInputStream * e,const char * ptr,size_t size)246 UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
247 upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
248 // When EpsCopyInputStream supports streaming, this will need to become a
249 // runtime check.
250 return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
251 e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
252 }
253
254 // Returns a pointer into an input buffer that corresponds to the parsing
255 // pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
256 // be different if we are currently parsing out of the patch buffer.
257 //
258 // REQUIRES: Aliasing must be available for the given pointer. If the input is a
259 // flat buffer and aliasing is enabled, then aliasing will always be available.
upb_EpsCopyInputStream_GetAliasedPtr(upb_EpsCopyInputStream * e,const char * ptr)260 UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
261 upb_EpsCopyInputStream* e, const char* ptr) {
262 UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
263 uintptr_t delta =
264 e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
265 return (const char*)((uintptr_t)ptr + delta);
266 }
267
268 // Reads string data from the input, aliasing into the input buffer instead of
269 // copying. The parsing pointer is passed in `*ptr`, and will be updated if
270 // necessary to point to the actual input buffer. Returns the new parsing
271 // pointer, which will be advanced past the string data.
272 //
273 // REQUIRES: Aliasing must be available for this data region (test with
274 // upb_EpsCopyInputStream_AliasingAvailable().
upb_EpsCopyInputStream_ReadStringAliased(upb_EpsCopyInputStream * e,const char ** ptr,size_t size)275 UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
276 upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
277 UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
278 const char* ret = *ptr + size;
279 *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
280 UPB_ASSUME(ret != NULL);
281 return ret;
282 }
283
284 // Skips `size` bytes of data from the input and returns a pointer past the end.
285 // Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream * e,const char * ptr,int size)286 UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
287 const char* ptr, int size) {
288 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
289 return ptr + size;
290 }
291
292 // Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
293 // returns a pointer past the end. Returns NULL on end of stream or error.
upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream * e,const char * ptr,void * to,int size)294 UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
295 const char* ptr, void* to,
296 int size) {
297 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
298 memcpy(to, ptr, size);
299 return ptr + size;
300 }
301
302 // Reads string data from the stream and advances the pointer accordingly.
303 // If aliasing was enabled when the stream was initialized, then the returned
304 // pointer will point into the input buffer if possible, otherwise new data
305 // will be allocated from arena and copied into. We may be forced to copy even
306 // if aliasing was enabled if the input data spans input buffers.
307 //
308 // Returns NULL if memory allocation failed, or we reached a premature EOF.
upb_EpsCopyInputStream_ReadString(upb_EpsCopyInputStream * e,const char ** ptr,size_t size,upb_Arena * arena)309 UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
310 upb_EpsCopyInputStream* e, const char** ptr, size_t size,
311 upb_Arena* arena) {
312 if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
313 return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
314 } else {
315 // We need to allocate and copy.
316 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
317 return NULL;
318 }
319 UPB_ASSERT(arena);
320 char* data = (char*)upb_Arena_Malloc(arena, size);
321 if (!data) return NULL;
322 const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
323 *ptr = data;
324 return ret;
325 }
326 }
327
_upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream * e)328 UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
329 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
330 }
331
332 // Pushes a limit onto the stack of limits for the current stream. The limit
333 // will extend for `size` bytes beyond the position in `ptr`. Future calls to
334 // upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
335 // reaches this limit.
336 //
337 // Returns a delta that the caller must store and supply to PopLimit() below.
upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream * e,const char * ptr,int size)338 UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
339 const char* ptr, int size) {
340 int limit = size + (int)(ptr - e->end);
341 int delta = e->limit - limit;
342 _upb_EpsCopyInputStream_CheckLimit(e);
343 UPB_ASSERT(limit <= e->limit);
344 e->limit = limit;
345 e->limit_ptr = e->end + UPB_MIN(0, limit);
346 _upb_EpsCopyInputStream_CheckLimit(e);
347 return delta;
348 }
349
350 // Pops the last limit that was pushed on this stream. This may only be called
351 // once IsDone() returns true. The user must pass the delta that was returned
352 // from PushLimit().
upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream * e,const char * ptr,int saved_delta)353 UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
354 const char* ptr,
355 int saved_delta) {
356 UPB_ASSERT(ptr - e->end == e->limit);
357 _upb_EpsCopyInputStream_CheckLimit(e);
358 e->limit += saved_delta;
359 e->limit_ptr = e->end + UPB_MIN(0, e->limit);
360 _upb_EpsCopyInputStream_CheckLimit(e);
361 }
362
_upb_EpsCopyInputStream_IsDoneFallbackInline(upb_EpsCopyInputStream * e,const char * ptr,int overrun,upb_EpsCopyInputStream_BufferFlipCallback * callback)363 UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
364 upb_EpsCopyInputStream* e, const char* ptr, int overrun,
365 upb_EpsCopyInputStream_BufferFlipCallback* callback) {
366 if (overrun < e->limit) {
367 // Need to copy remaining data into patch buffer.
368 UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
369 const char* old_end = ptr;
370 const char* new_start = &e->patch[0] + overrun;
371 memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
372 kUpb_EpsCopyInputStream_SlopBytes);
373 memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
374 ptr = new_start;
375 e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
376 e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
377 e->limit_ptr = e->end + e->limit;
378 UPB_ASSERT(ptr < e->limit_ptr);
379 if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
380 e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
381 }
382 return callback(e, old_end, new_start);
383 } else {
384 UPB_ASSERT(overrun > e->limit);
385 e->error = true;
386 return callback(e, NULL, NULL);
387 }
388 }
389
390 typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
391 upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
392
393 // Tries to perform a fast-path handling of the given delimited message data.
394 // If the sub-message beginning at `*ptr` and extending for `len` is short and
395 // fits within this buffer, calls `func` with `ctx` as a parameter, where the
396 // pushing and popping of limits is handled automatically and with lower cost
397 // than the normal PushLimit()/PopLimit() sequence.
upb_EpsCopyInputStream_TryParseDelimitedFast(upb_EpsCopyInputStream * e,const char ** ptr,int len,upb_EpsCopyInputStream_ParseDelimitedFunc * func,void * ctx)398 static UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
399 upb_EpsCopyInputStream* e, const char** ptr, int len,
400 upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
401 if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
402 return false;
403 }
404
405 // Fast case: Sub-message is <128 bytes and fits in the current buffer.
406 // This means we can preserve limit/limit_ptr verbatim.
407 const char* saved_limit_ptr = e->limit_ptr;
408 int saved_limit = e->limit;
409 e->limit_ptr = *ptr + len;
410 e->limit = e->limit_ptr - e->end;
411 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
412 *ptr = func(e, *ptr, ctx);
413 e->limit_ptr = saved_limit_ptr;
414 e->limit = saved_limit;
415 UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
416 return true;
417 }
418
419 #ifdef __cplusplus
420 } /* extern "C" */
421 #endif
422
423 #include "upb/port/undef.inc"
424
425 #endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
426