1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "python/unknown_fields.h"
29
30 #include "python/message.h"
31 #include "python/protobuf.h"
32 #include "upb/wire/eps_copy_input_stream.h"
33 #include "upb/wire/reader.h"
34 #include "upb/wire/types.h"
35
36 // -----------------------------------------------------------------------------
37 // UnknownFieldSet
38 // -----------------------------------------------------------------------------
39
40 typedef struct {
41 PyObject_HEAD;
42 PyObject* fields;
43 } PyUpb_UnknownFieldSet;
44
PyUpb_UnknownFieldSet_Dealloc(PyObject * _self)45 static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
46 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
47 Py_XDECREF(self->fields);
48 PyUpb_Dealloc(self);
49 }
50
PyUpb_UnknownFieldSet_NewBare(void)51 PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) {
52 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
53 PyUpb_UnknownFieldSet* self =
54 (void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
55 return self;
56 }
57
58 // For MessageSet the established behavior is for UnknownFieldSet to interpret
59 // the MessageSet wire format:
60 // message MessageSet {
61 // repeated group Item = 1 {
62 // required int32 type_id = 2;
63 // required bytes message = 3;
64 // }
65 // }
66 //
67 // And create unknown fields like:
68 // UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
69 //
70 // For any unknown fields that are unexpected per the wire format defined above,
71 // we drop them on the floor.
72
73 enum {
74 kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
75 kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
76 kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
77 kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
78 };
79
PyUpb_UnknownFieldSet_BuildMessageSetItem(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)80 static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
81 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
82 const char* ptr) {
83 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
84 int type_id = 0;
85 PyObject* msg = NULL;
86 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
87 uint32_t tag;
88 ptr = upb_WireReader_ReadTag(ptr, &tag);
89 if (!ptr) goto err;
90 switch (tag) {
91 case kUpb_MessageSet_EndItemTag:
92 goto done;
93 case kUpb_MessageSet_TypeIdTag: {
94 uint64_t tmp;
95 ptr = upb_WireReader_ReadVarint(ptr, &tmp);
96 if (!ptr) goto err;
97 if (!type_id) type_id = tmp;
98 break;
99 }
100 case kUpb_MessageSet_MessageTag: {
101 int size;
102 ptr = upb_WireReader_ReadSize(ptr, &size);
103 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
104 goto err;
105 }
106 const char* str = ptr;
107 ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
108 if (!msg) {
109 msg = PyBytes_FromStringAndSize(str, size);
110 if (!msg) goto err;
111 } else {
112 // already saw a message here so deliberately skipping the duplicate
113 }
114 break;
115 }
116 default:
117 ptr = upb_WireReader_SkipValue(ptr, tag, stream);
118 if (!ptr) goto err;
119 }
120 }
121
122 done:
123 if (type_id && msg) {
124 PyObject* field = PyObject_CallFunction(
125 s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
126 if (!field) goto err;
127 PyList_Append(self->fields, field);
128 Py_DECREF(field);
129 }
130 Py_XDECREF(msg);
131 return ptr;
132
133 err:
134 Py_XDECREF(msg);
135 return NULL;
136 }
137
PyUpb_UnknownFieldSet_BuildMessageSet(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)138 static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
139 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
140 const char* ptr) {
141 self->fields = PyList_New(0);
142 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
143 uint32_t tag;
144 ptr = upb_WireReader_ReadTag(ptr, &tag);
145 if (!ptr) goto err;
146 if (tag == kUpb_MessageSet_StartItemTag) {
147 ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr);
148 } else {
149 ptr = upb_WireReader_SkipValue(ptr, tag, stream);
150 }
151 if (!ptr) goto err;
152 }
153 if (upb_EpsCopyInputStream_IsError(stream)) goto err;
154 return ptr;
155
156 err:
157 Py_DECREF(self->fields);
158 self->fields = NULL;
159 return NULL;
160 }
161
162 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
163 upb_EpsCopyInputStream* stream,
164 const char* ptr,
165 int group_number);
166
PyUpb_UnknownFieldSet_BuildValue(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int field_number,int wire_type,int group_number,PyObject ** data)167 static const char* PyUpb_UnknownFieldSet_BuildValue(
168 PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
169 const char* ptr, int field_number, int wire_type, int group_number,
170 PyObject** data) {
171 switch (wire_type) {
172 case kUpb_WireType_Varint: {
173 uint64_t val;
174 ptr = upb_WireReader_ReadVarint(ptr, &val);
175 if (!ptr) return NULL;
176 *data = PyLong_FromUnsignedLongLong(val);
177 return ptr;
178 }
179 case kUpb_WireType_64Bit: {
180 uint64_t val;
181 ptr = upb_WireReader_ReadFixed64(ptr, &val);
182 *data = PyLong_FromUnsignedLongLong(val);
183 return ptr;
184 }
185 case kUpb_WireType_32Bit: {
186 uint32_t val;
187 ptr = upb_WireReader_ReadFixed32(ptr, &val);
188 *data = PyLong_FromUnsignedLongLong(val);
189 return ptr;
190 }
191 case kUpb_WireType_Delimited: {
192 int size;
193 ptr = upb_WireReader_ReadSize(ptr, &size);
194 if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
195 return NULL;
196 }
197 const char* str = ptr;
198 ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
199 *data = PyBytes_FromStringAndSize(str, size);
200 return ptr;
201 }
202 case kUpb_WireType_StartGroup: {
203 PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
204 if (!sub) return NULL;
205 *data = &sub->ob_base;
206 return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number);
207 }
208 default:
209 assert(0);
210 *data = NULL;
211 return NULL;
212 }
213 }
214
215 // For non-MessageSet we just build the unknown fields exactly as they exist on
216 // the wire.
PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int group_number)217 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
218 upb_EpsCopyInputStream* stream,
219 const char* ptr,
220 int group_number) {
221 PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
222 self->fields = PyList_New(0);
223 while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
224 uint32_t tag;
225 ptr = upb_WireReader_ReadTag(ptr, &tag);
226 if (!ptr) goto err;
227 PyObject* data = NULL;
228 int field_number = upb_WireReader_GetFieldNumber(tag);
229 int wire_type = upb_WireReader_GetWireType(tag);
230 if (wire_type == kUpb_WireType_EndGroup) {
231 if (field_number != group_number) return NULL;
232 return ptr;
233 }
234 ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number,
235 wire_type, group_number, &data);
236 if (!ptr) {
237 Py_XDECREF(data);
238 goto err;
239 }
240 assert(data);
241 PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
242 field_number, wire_type, data);
243 PyList_Append(self->fields, field);
244 Py_DECREF(field);
245 }
246 if (upb_EpsCopyInputStream_IsError(stream)) goto err;
247 return ptr;
248
249 err:
250 Py_DECREF(self->fields);
251 self->fields = NULL;
252 return NULL;
253 }
254
PyUpb_UnknownFieldSet_New(PyTypeObject * type,PyObject * args,PyObject * kwargs)255 static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
256 PyObject* kwargs) {
257 char* kwlist[] = {"message", 0};
258 PyObject* py_msg = NULL;
259
260 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
261 return NULL;
262 }
263
264 if (!PyUpb_Message_Verify(py_msg)) return NULL;
265 PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
266 upb_Message* msg = PyUpb_Message_GetIfReified(py_msg);
267 if (!msg) return &self->ob_base;
268
269 size_t size;
270 const char* ptr = upb_Message_GetUnknown(msg, &size);
271 if (size == 0) return &self->ob_base;
272
273 upb_EpsCopyInputStream stream;
274 upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
275 const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg);
276
277 bool ok;
278 if (upb_MessageDef_IsMessageSet(msgdef)) {
279 ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL;
280 } else {
281 ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL;
282 }
283
284 if (!ok) {
285 Py_DECREF(&self->ob_base);
286 return NULL;
287 }
288
289 return &self->ob_base;
290 }
291
PyUpb_UnknownFieldSet_Length(PyObject * _self)292 static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
293 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
294 return self->fields ? PyObject_Length(self->fields) : 0;
295 }
296
PyUpb_UnknownFieldSet_GetItem(PyObject * _self,Py_ssize_t index)297 static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
298 Py_ssize_t index) {
299 PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
300 if (!self->fields) {
301 PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
302 return NULL;
303 }
304 PyObject* ret = PyList_GetItem(self->fields, index);
305 if (ret) Py_INCREF(ret);
306 return ret;
307 }
308
309 static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
310 {Py_tp_new, &PyUpb_UnknownFieldSet_New},
311 {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
312 {Py_sq_length, PyUpb_UnknownFieldSet_Length},
313 {Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
314 {Py_tp_hash, PyObject_HashNotImplemented},
315 {0, NULL},
316 };
317
318 static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
319 PYUPB_MODULE_NAME ".UnknownFieldSet", // tp_name
320 sizeof(PyUpb_UnknownFieldSet), // tp_basicsize
321 0, // tp_itemsize
322 Py_TPFLAGS_DEFAULT, // tp_flags
323 PyUpb_UnknownFieldSet_Slots,
324 };
325
326 // -----------------------------------------------------------------------------
327 // Top Level
328 // -----------------------------------------------------------------------------
329
PyUpb_UnknownFieldSet_CreateNamedTuple(void)330 PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
331 PyObject* mod = NULL;
332 PyObject* namedtuple = NULL;
333 PyObject* ret = NULL;
334
335 mod = PyImport_ImportModule("collections");
336 if (!mod) goto done;
337 namedtuple = PyObject_GetAttrString(mod, "namedtuple");
338 if (!namedtuple) goto done;
339 ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
340 "field_number", "wire_type", "data");
341
342 done:
343 Py_XDECREF(mod);
344 Py_XDECREF(namedtuple);
345 return ret;
346 }
347
PyUpb_UnknownFields_Init(PyObject * m)348 bool PyUpb_UnknownFields_Init(PyObject* m) {
349 PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
350
351 s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
352 s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
353
354 return s->unknown_fields_type && s->unknown_field_type;
355 }
356