1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * Internal implementation details of the decoder that are shared between
30 * decode.c and decode_fast.c.
31 */
32
33 #ifndef UPB_WIRE_DECODE_INTERNAL_H_
34 #define UPB_WIRE_DECODE_INTERNAL_H_
35
36 #include "upb/mem/arena_internal.h"
37 #include "upb/message/internal.h"
38 #include "upb/wire/decode.h"
39 #include "upb/wire/eps_copy_input_stream.h"
40 #include "utf8_range.h"
41
42 // Must be last.
43 #include "upb/port/def.inc"
44
45 #define DECODE_NOGROUP (uint32_t) - 1
46
47 typedef struct upb_Decoder {
48 upb_EpsCopyInputStream input;
49 const upb_ExtensionRegistry* extreg;
50 const char* unknown; // Start of unknown data, preserve at buffer flip
51 upb_Message* unknown_msg; // Pointer to preserve data to
52 int depth; // Tracks recursion depth to bound stack usage.
53 uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
54 uint16_t options;
55 bool missing_required;
56 upb_Arena arena;
57 upb_DecodeStatus status;
58 jmp_buf err;
59
60 #ifndef NDEBUG
61 const char* debug_tagstart;
62 const char* debug_valstart;
63 #endif
64 } upb_Decoder;
65
66 /* Error function that will abort decoding with longjmp(). We can't declare this
67 * UPB_NORETURN, even though it is appropriate, because if we do then compilers
68 * will "helpfully" refuse to tailcall to it
69 * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
70 * of our optimizations. That is also why we must declare it in a separate file,
71 * otherwise the compiler will see that it calls longjmp() and deduce that it is
72 * noreturn. */
73 const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status);
74
75 extern const uint8_t upb_utf8_offsets[];
76
77 UPB_INLINE
_upb_Decoder_VerifyUtf8Inline(const char * ptr,int len)78 bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
79 const char* end = ptr + len;
80
81 // Check 8 bytes at a time for any non-ASCII char.
82 while (end - ptr >= 8) {
83 uint64_t data;
84 memcpy(&data, ptr, 8);
85 if (data & 0x8080808080808080) goto non_ascii;
86 ptr += 8;
87 }
88
89 // Check one byte at a time for non-ASCII.
90 while (ptr < end) {
91 if (*ptr & 0x80) goto non_ascii;
92 ptr++;
93 }
94
95 return true;
96
97 non_ascii:
98 return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
99 }
100
101 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
102 const upb_Message* msg,
103 const upb_MiniTable* l);
104
105 /* x86-64 pointers always have the high 16 bits matching. So we can shift
106 * left 8 and right 8 without loss of information. */
decode_totable(const upb_MiniTable * tablep)107 UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep) {
108 return ((intptr_t)tablep << 8) | tablep->table_mask;
109 }
110
decode_totablep(intptr_t table)111 UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
112 return (const upb_MiniTable*)(table >> 8);
113 }
114
115 const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
116 const char* ptr, int overrun);
117
_upb_Decoder_IsDone(upb_Decoder * d,const char ** ptr)118 UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
119 return upb_EpsCopyInputStream_IsDoneWithCallback(
120 &d->input, ptr, &_upb_Decoder_IsDoneFallback);
121 }
122
_upb_Decoder_BufferFlipCallback(upb_EpsCopyInputStream * e,const char * old_end,const char * new_start)123 UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
124 upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
125 upb_Decoder* d = (upb_Decoder*)e;
126 if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
127
128 if (d->unknown) {
129 if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
130 old_end - d->unknown, &d->arena)) {
131 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
132 }
133 d->unknown = new_start;
134 }
135 return new_start;
136 }
137
138 #if UPB_FASTTABLE
139 UPB_INLINE
_upb_FastDecoder_TagDispatch(upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t tag)140 const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr,
141 upb_Message* msg, intptr_t table,
142 uint64_t hasbits, uint64_t tag) {
143 const upb_MiniTable* table_p = decode_totablep(table);
144 uint8_t mask = table;
145 uint64_t data;
146 size_t idx = tag & mask;
147 UPB_ASSUME((idx & 7) == 0);
148 idx >>= 3;
149 data = table_p->fasttable[idx].field_data ^ tag;
150 UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
151 hasbits, data);
152 }
153 #endif
154
_upb_FastDecoder_LoadTag(const char * ptr)155 UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
156 uint16_t tag;
157 memcpy(&tag, ptr, 2);
158 return tag;
159 }
160
161 #include "upb/port/undef.inc"
162
163 #endif /* UPB_WIRE_DECODE_INTERNAL_H_ */
164