1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "python/unknown_fields.h"
29 
30 #include "python/message.h"
31 #include "python/protobuf.h"
32 #include "upb/wire/eps_copy_input_stream.h"
33 #include "upb/wire/reader.h"
34 #include "upb/wire/types.h"
35 
36 // -----------------------------------------------------------------------------
37 // UnknownFieldSet
38 // -----------------------------------------------------------------------------
39 
40 typedef struct {
41   PyObject_HEAD;
42   PyObject* fields;
43 } PyUpb_UnknownFieldSet;
44 
PyUpb_UnknownFieldSet_Dealloc(PyObject * _self)45 static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
46   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
47   Py_XDECREF(self->fields);
48   PyUpb_Dealloc(self);
49 }
50 
PyUpb_UnknownFieldSet_NewBare(void)51 PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) {
52   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
53   PyUpb_UnknownFieldSet* self =
54       (void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
55   return self;
56 }
57 
58 // For MessageSet the established behavior is for UnknownFieldSet to interpret
59 // the MessageSet wire format:
60 //    message MessageSet {
61 //      repeated group Item = 1 {
62 //        required int32 type_id = 2;
63 //        required bytes message = 3;
64 //      }
65 //    }
66 //
67 // And create unknown fields like:
68 //   UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
69 //
70 // For any unknown fields that are unexpected per the wire format defined above,
71 // we drop them on the floor.
72 
73 enum {
74   kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
75   kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
76   kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
77   kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
78 };
79 
PyUpb_UnknownFieldSet_BuildMessageSetItem(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)80 static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
81     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
82     const char* ptr) {
83   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
84   int type_id = 0;
85   PyObject* msg = NULL;
86   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
87     uint32_t tag;
88     ptr = upb_WireReader_ReadTag(ptr, &tag);
89     if (!ptr) goto err;
90     switch (tag) {
91       case kUpb_MessageSet_EndItemTag:
92         goto done;
93       case kUpb_MessageSet_TypeIdTag: {
94         uint64_t tmp;
95         ptr = upb_WireReader_ReadVarint(ptr, &tmp);
96         if (!ptr) goto err;
97         if (!type_id) type_id = tmp;
98         break;
99       }
100       case kUpb_MessageSet_MessageTag: {
101         int size;
102         ptr = upb_WireReader_ReadSize(ptr, &size);
103         if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
104           goto err;
105         }
106         const char* str = ptr;
107         ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
108         if (!msg) {
109           msg = PyBytes_FromStringAndSize(str, size);
110           if (!msg) goto err;
111         } else {
112           // already saw a message here so deliberately skipping the duplicate
113         }
114         break;
115       }
116       default:
117         ptr = upb_WireReader_SkipValue(ptr, tag, stream);
118         if (!ptr) goto err;
119     }
120   }
121 
122 done:
123   if (type_id && msg) {
124     PyObject* field = PyObject_CallFunction(
125         s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
126     if (!field) goto err;
127     PyList_Append(self->fields, field);
128     Py_DECREF(field);
129   }
130   Py_XDECREF(msg);
131   return ptr;
132 
133 err:
134   Py_XDECREF(msg);
135   return NULL;
136 }
137 
PyUpb_UnknownFieldSet_BuildMessageSet(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr)138 static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
139     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
140     const char* ptr) {
141   self->fields = PyList_New(0);
142   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
143     uint32_t tag;
144     ptr = upb_WireReader_ReadTag(ptr, &tag);
145     if (!ptr) goto err;
146     if (tag == kUpb_MessageSet_StartItemTag) {
147       ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr);
148     } else {
149       ptr = upb_WireReader_SkipValue(ptr, tag, stream);
150     }
151     if (!ptr) goto err;
152   }
153   if (upb_EpsCopyInputStream_IsError(stream)) goto err;
154   return ptr;
155 
156 err:
157   Py_DECREF(self->fields);
158   self->fields = NULL;
159   return NULL;
160 }
161 
162 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
163                                                upb_EpsCopyInputStream* stream,
164                                                const char* ptr,
165                                                int group_number);
166 
PyUpb_UnknownFieldSet_BuildValue(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int field_number,int wire_type,int group_number,PyObject ** data)167 static const char* PyUpb_UnknownFieldSet_BuildValue(
168     PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream,
169     const char* ptr, int field_number, int wire_type, int group_number,
170     PyObject** data) {
171   switch (wire_type) {
172     case kUpb_WireType_Varint: {
173       uint64_t val;
174       ptr = upb_WireReader_ReadVarint(ptr, &val);
175       if (!ptr) return NULL;
176       *data = PyLong_FromUnsignedLongLong(val);
177       return ptr;
178     }
179     case kUpb_WireType_64Bit: {
180       uint64_t val;
181       ptr = upb_WireReader_ReadFixed64(ptr, &val);
182       *data = PyLong_FromUnsignedLongLong(val);
183       return ptr;
184     }
185     case kUpb_WireType_32Bit: {
186       uint32_t val;
187       ptr = upb_WireReader_ReadFixed32(ptr, &val);
188       *data = PyLong_FromUnsignedLongLong(val);
189       return ptr;
190     }
191     case kUpb_WireType_Delimited: {
192       int size;
193       ptr = upb_WireReader_ReadSize(ptr, &size);
194       if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) {
195         return NULL;
196       }
197       const char* str = ptr;
198       ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size);
199       *data = PyBytes_FromStringAndSize(str, size);
200       return ptr;
201     }
202     case kUpb_WireType_StartGroup: {
203       PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
204       if (!sub) return NULL;
205       *data = &sub->ob_base;
206       return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number);
207     }
208     default:
209       assert(0);
210       *data = NULL;
211       return NULL;
212   }
213 }
214 
215 // For non-MessageSet we just build the unknown fields exactly as they exist on
216 // the wire.
PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet * self,upb_EpsCopyInputStream * stream,const char * ptr,int group_number)217 static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
218                                                upb_EpsCopyInputStream* stream,
219                                                const char* ptr,
220                                                int group_number) {
221   PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
222   self->fields = PyList_New(0);
223   while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
224     uint32_t tag;
225     ptr = upb_WireReader_ReadTag(ptr, &tag);
226     if (!ptr) goto err;
227     PyObject* data = NULL;
228     int field_number = upb_WireReader_GetFieldNumber(tag);
229     int wire_type = upb_WireReader_GetWireType(tag);
230     if (wire_type == kUpb_WireType_EndGroup) {
231       if (field_number != group_number) return NULL;
232       return ptr;
233     }
234     ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number,
235                                            wire_type, group_number, &data);
236     if (!ptr) {
237       Py_XDECREF(data);
238       goto err;
239     }
240     assert(data);
241     PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
242                                             field_number, wire_type, data);
243     PyList_Append(self->fields, field);
244     Py_DECREF(field);
245   }
246   if (upb_EpsCopyInputStream_IsError(stream)) goto err;
247   return ptr;
248 
249 err:
250   Py_DECREF(self->fields);
251   self->fields = NULL;
252   return NULL;
253 }
254 
PyUpb_UnknownFieldSet_New(PyTypeObject * type,PyObject * args,PyObject * kwargs)255 static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
256                                            PyObject* kwargs) {
257   char* kwlist[] = {"message", 0};
258   PyObject* py_msg = NULL;
259 
260   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
261     return NULL;
262   }
263 
264   if (!PyUpb_Message_Verify(py_msg)) return NULL;
265   PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
266   upb_Message* msg = PyUpb_Message_GetIfReified(py_msg);
267   if (!msg) return &self->ob_base;
268 
269   size_t size;
270   const char* ptr = upb_Message_GetUnknown(msg, &size);
271   if (size == 0) return &self->ob_base;
272 
273   upb_EpsCopyInputStream stream;
274   upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
275   const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg);
276 
277   bool ok;
278   if (upb_MessageDef_IsMessageSet(msgdef)) {
279     ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL;
280   } else {
281     ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL;
282   }
283 
284   if (!ok) {
285     Py_DECREF(&self->ob_base);
286     return NULL;
287   }
288 
289   return &self->ob_base;
290 }
291 
PyUpb_UnknownFieldSet_Length(PyObject * _self)292 static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
293   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
294   return self->fields ? PyObject_Length(self->fields) : 0;
295 }
296 
PyUpb_UnknownFieldSet_GetItem(PyObject * _self,Py_ssize_t index)297 static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
298                                                Py_ssize_t index) {
299   PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
300   if (!self->fields) {
301     PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
302     return NULL;
303   }
304   PyObject* ret = PyList_GetItem(self->fields, index);
305   if (ret) Py_INCREF(ret);
306   return ret;
307 }
308 
309 static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
310     {Py_tp_new, &PyUpb_UnknownFieldSet_New},
311     {Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
312     {Py_sq_length, PyUpb_UnknownFieldSet_Length},
313     {Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
314     {Py_tp_hash, PyObject_HashNotImplemented},
315     {0, NULL},
316 };
317 
318 static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
319     PYUPB_MODULE_NAME ".UnknownFieldSet",  // tp_name
320     sizeof(PyUpb_UnknownFieldSet),         // tp_basicsize
321     0,                                     // tp_itemsize
322     Py_TPFLAGS_DEFAULT,                    // tp_flags
323     PyUpb_UnknownFieldSet_Slots,
324 };
325 
326 // -----------------------------------------------------------------------------
327 // Top Level
328 // -----------------------------------------------------------------------------
329 
PyUpb_UnknownFieldSet_CreateNamedTuple(void)330 PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
331   PyObject* mod = NULL;
332   PyObject* namedtuple = NULL;
333   PyObject* ret = NULL;
334 
335   mod = PyImport_ImportModule("collections");
336   if (!mod) goto done;
337   namedtuple = PyObject_GetAttrString(mod, "namedtuple");
338   if (!namedtuple) goto done;
339   ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
340                               "field_number", "wire_type", "data");
341 
342 done:
343   Py_XDECREF(mod);
344   Py_XDECREF(namedtuple);
345   return ret;
346 }
347 
PyUpb_UnknownFields_Init(PyObject * m)348 bool PyUpb_UnknownFields_Init(PyObject* m) {
349   PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
350 
351   s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
352   s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
353 
354   return s->unknown_fields_type && s->unknown_field_type;
355 }
356