1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * Internal implementation details of the decoder that are shared between
30  * decode.c and decode_fast.c.
31  */
32 
33 #ifndef UPB_WIRE_DECODE_INTERNAL_H_
34 #define UPB_WIRE_DECODE_INTERNAL_H_
35 
36 #include "upb/mem/arena_internal.h"
37 #include "upb/message/internal.h"
38 #include "upb/wire/decode.h"
39 #include "upb/wire/eps_copy_input_stream.h"
40 #include "utf8_range.h"
41 
42 // Must be last.
43 #include "upb/port/def.inc"
44 
45 #define DECODE_NOGROUP (uint32_t) - 1
46 
47 typedef struct upb_Decoder {
48   upb_EpsCopyInputStream input;
49   const upb_ExtensionRegistry* extreg;
50   const char* unknown;       // Start of unknown data, preserve at buffer flip
51   upb_Message* unknown_msg;  // Pointer to preserve data to
52   int depth;                 // Tracks recursion depth to bound stack usage.
53   uint32_t end_group;  // field number of END_GROUP tag, else DECODE_NOGROUP.
54   uint16_t options;
55   bool missing_required;
56   upb_Arena arena;
57   upb_DecodeStatus status;
58   jmp_buf err;
59 
60 #ifndef NDEBUG
61   const char* debug_tagstart;
62   const char* debug_valstart;
63 #endif
64 } upb_Decoder;
65 
66 /* Error function that will abort decoding with longjmp(). We can't declare this
67  * UPB_NORETURN, even though it is appropriate, because if we do then compilers
68  * will "helpfully" refuse to tailcall to it
69  * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
70  * of our optimizations. That is also why we must declare it in a separate file,
71  * otherwise the compiler will see that it calls longjmp() and deduce that it is
72  * noreturn. */
73 const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status);
74 
75 extern const uint8_t upb_utf8_offsets[];
76 
77 UPB_INLINE
_upb_Decoder_VerifyUtf8Inline(const char * ptr,int len)78 bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
79   const char* end = ptr + len;
80 
81   // Check 8 bytes at a time for any non-ASCII char.
82   while (end - ptr >= 8) {
83     uint64_t data;
84     memcpy(&data, ptr, 8);
85     if (data & 0x8080808080808080) goto non_ascii;
86     ptr += 8;
87   }
88 
89   // Check one byte at a time for non-ASCII.
90   while (ptr < end) {
91     if (*ptr & 0x80) goto non_ascii;
92     ptr++;
93   }
94 
95   return true;
96 
97 non_ascii:
98   return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
99 }
100 
101 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
102                                        const upb_Message* msg,
103                                        const upb_MiniTable* l);
104 
105 /* x86-64 pointers always have the high 16 bits matching. So we can shift
106  * left 8 and right 8 without loss of information. */
decode_totable(const upb_MiniTable * tablep)107 UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep) {
108   return ((intptr_t)tablep << 8) | tablep->table_mask;
109 }
110 
decode_totablep(intptr_t table)111 UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
112   return (const upb_MiniTable*)(table >> 8);
113 }
114 
115 const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
116                                         const char* ptr, int overrun);
117 
_upb_Decoder_IsDone(upb_Decoder * d,const char ** ptr)118 UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
119   return upb_EpsCopyInputStream_IsDoneWithCallback(
120       &d->input, ptr, &_upb_Decoder_IsDoneFallback);
121 }
122 
_upb_Decoder_BufferFlipCallback(upb_EpsCopyInputStream * e,const char * old_end,const char * new_start)123 UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
124     upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
125   upb_Decoder* d = (upb_Decoder*)e;
126   if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
127 
128   if (d->unknown) {
129     if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
130                                  old_end - d->unknown, &d->arena)) {
131       _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
132     }
133     d->unknown = new_start;
134   }
135   return new_start;
136 }
137 
138 #if UPB_FASTTABLE
139 UPB_INLINE
_upb_FastDecoder_TagDispatch(upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t tag)140 const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr,
141                                          upb_Message* msg, intptr_t table,
142                                          uint64_t hasbits, uint64_t tag) {
143   const upb_MiniTable* table_p = decode_totablep(table);
144   uint8_t mask = table;
145   uint64_t data;
146   size_t idx = tag & mask;
147   UPB_ASSUME((idx & 7) == 0);
148   idx >>= 3;
149   data = table_p->fasttable[idx].field_data ^ tag;
150   UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
151                                                            hasbits, data);
152 }
153 #endif
154 
_upb_FastDecoder_LoadTag(const char * ptr)155 UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
156   uint16_t tag;
157   memcpy(&tag, ptr, 2);
158   return tag;
159 }
160 
161 #include "upb/port/undef.inc"
162 
163 #endif /* UPB_WIRE_DECODE_INTERNAL_H_ */
164