1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "python/convert.h"
29 
30 #include "python/message.h"
31 #include "python/protobuf.h"
32 #include "upb/map.h"
33 #include "upb/reflection/message.h"
34 #include "upb/util/compare.h"
35 
36 // Must be last.
37 #include "upb/port/def.inc"
38 
PyUpb_UpbToPy(upb_MessageValue val,const upb_FieldDef * f,PyObject * arena)39 PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
40                         PyObject* arena) {
41   switch (upb_FieldDef_CType(f)) {
42     case kUpb_CType_Enum:
43     case kUpb_CType_Int32:
44       return PyLong_FromLong(val.int32_val);
45     case kUpb_CType_Int64:
46       return PyLong_FromLongLong(val.int64_val);
47     case kUpb_CType_UInt32:
48       return PyLong_FromSize_t(val.uint32_val);
49     case kUpb_CType_UInt64:
50       return PyLong_FromUnsignedLongLong(val.uint64_val);
51     case kUpb_CType_Float:
52       return PyFloat_FromDouble(val.float_val);
53     case kUpb_CType_Double:
54       return PyFloat_FromDouble(val.double_val);
55     case kUpb_CType_Bool:
56       return PyBool_FromLong(val.bool_val);
57     case kUpb_CType_Bytes:
58       return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
59     case kUpb_CType_String: {
60       PyObject* ret =
61           PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
62       // If the string can't be decoded in UTF-8, just return a bytes object
63       // that contains the raw bytes. This can't happen if the value was
64       // assigned using the members of the Python message object, but can happen
65       // if the values were parsed from the wire (binary).
66       if (ret == NULL) {
67         PyErr_Clear();
68         ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
69       }
70       return ret;
71     }
72     case kUpb_CType_Message:
73       return PyUpb_Message_Get((upb_Message*)val.msg_val,
74                                upb_FieldDef_MessageSubDef(f), arena);
75     default:
76       PyErr_Format(PyExc_SystemError,
77                    "Getting a value from a field of unknown type %d",
78                    upb_FieldDef_CType(f));
79       return NULL;
80   }
81 }
82 
PyUpb_GetInt64(PyObject * obj,int64_t * val)83 static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) {
84   // We require that the value is either an integer or has an __index__
85   // conversion.
86   obj = PyNumber_Index(obj);
87   if (!obj) return false;
88   // If the value is already a Python long, PyLong_AsLongLong() retrieves it.
89   // Otherwise is converts to integer using __int__.
90   *val = PyLong_AsLongLong(obj);
91   bool ok = true;
92   if (PyErr_Occurred()) {
93     assert(PyErr_ExceptionMatches(PyExc_OverflowError));
94     PyErr_Clear();
95     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
96     ok = false;
97   }
98   Py_DECREF(obj);
99   return ok;
100 }
101 
PyUpb_GetUint64(PyObject * obj,uint64_t * val)102 static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) {
103   // We require that the value is either an integer or has an __index__
104   // conversion.
105   obj = PyNumber_Index(obj);
106   if (!obj) return false;
107   *val = PyLong_AsUnsignedLongLong(obj);
108   bool ok = true;
109   if (PyErr_Occurred()) {
110     assert(PyErr_ExceptionMatches(PyExc_OverflowError));
111     PyErr_Clear();
112     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
113     ok = false;
114   }
115   Py_DECREF(obj);
116   return ok;
117 }
118 
PyUpb_GetInt32(PyObject * obj,int32_t * val)119 static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) {
120   int64_t i64;
121   if (!PyUpb_GetInt64(obj, &i64)) return false;
122   if (i64 < INT32_MIN || i64 > INT32_MAX) {
123     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
124     return false;
125   }
126   *val = i64;
127   return true;
128 }
129 
PyUpb_GetUint32(PyObject * obj,uint32_t * val)130 static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) {
131   uint64_t u64;
132   if (!PyUpb_GetUint64(obj, &u64)) return false;
133   if (u64 > UINT32_MAX) {
134     PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
135     return false;
136   }
137   *val = u64;
138   return true;
139 }
140 
141 // If `arena` is specified, copies the string data into the given arena.
142 // Otherwise aliases the given data.
PyUpb_MaybeCopyString(const char * ptr,size_t size,upb_Arena * arena)143 static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
144                                               upb_Arena* arena) {
145   upb_MessageValue ret;
146   ret.str_val.size = size;
147   if (arena) {
148     char* buf = upb_Arena_Malloc(arena, size);
149     memcpy(buf, ptr, size);
150     ret.str_val.data = buf;
151   } else {
152     ret.str_val.data = ptr;
153   }
154   return ret;
155 }
156 
upb_FieldDef_TypeString(const upb_FieldDef * f)157 const char* upb_FieldDef_TypeString(const upb_FieldDef* f) {
158   switch (upb_FieldDef_CType(f)) {
159     case kUpb_CType_Double:
160       return "double";
161     case kUpb_CType_Float:
162       return "float";
163     case kUpb_CType_Int64:
164       return "int64";
165     case kUpb_CType_Int32:
166       return "int32";
167     case kUpb_CType_UInt64:
168       return "uint64";
169     case kUpb_CType_UInt32:
170       return "uint32";
171     case kUpb_CType_Enum:
172       return "enum";
173     case kUpb_CType_Bool:
174       return "bool";
175     case kUpb_CType_String:
176       return "string";
177     case kUpb_CType_Bytes:
178       return "bytes";
179     case kUpb_CType_Message:
180       return "message";
181   }
182   UPB_UNREACHABLE();
183 }
184 
PyUpb_PyToUpbEnum(PyObject * obj,const upb_EnumDef * e,upb_MessageValue * val)185 static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
186                               upb_MessageValue* val) {
187   if (PyUnicode_Check(obj)) {
188     Py_ssize_t size;
189     const char* name = PyUnicode_AsUTF8AndSize(obj, &size);
190     const upb_EnumValueDef* ev =
191         upb_EnumDef_FindValueByNameWithSize(e, name, size);
192     if (!ev) {
193       PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
194       return false;
195     }
196     val->int32_val = upb_EnumValueDef_Number(ev);
197     return true;
198   } else {
199     int32_t i32;
200     if (!PyUpb_GetInt32(obj, &i32)) return false;
201     if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 &&
202         !upb_EnumDef_CheckNumber(e, i32)) {
203       PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
204       return false;
205     }
206     val->int32_val = i32;
207     return true;
208   }
209 }
210 
PyUpb_IsNumpyNdarray(PyObject * obj,const upb_FieldDef * f)211 bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) {
212   PyObject* type_name_obj =
213       PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__");
214   bool is_ndarray = false;
215   if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) {
216     PyErr_Format(PyExc_TypeError,
217                  "%S has type ndarray, but expected one of: %s", obj,
218                  upb_FieldDef_TypeString(f));
219     is_ndarray = true;
220   }
221   Py_DECREF(type_name_obj);
222   return is_ndarray;
223 }
224 
PyUpb_PyToUpb(PyObject * obj,const upb_FieldDef * f,upb_MessageValue * val,upb_Arena * arena)225 bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
226                    upb_Arena* arena) {
227   switch (upb_FieldDef_CType(f)) {
228     case kUpb_CType_Enum:
229       return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val);
230     case kUpb_CType_Int32:
231       return PyUpb_GetInt32(obj, &val->int32_val);
232     case kUpb_CType_Int64:
233       return PyUpb_GetInt64(obj, &val->int64_val);
234     case kUpb_CType_UInt32:
235       return PyUpb_GetUint32(obj, &val->uint32_val);
236     case kUpb_CType_UInt64:
237       return PyUpb_GetUint64(obj, &val->uint64_val);
238     case kUpb_CType_Float:
239       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
240       val->float_val = PyFloat_AsDouble(obj);
241       return !PyErr_Occurred();
242     case kUpb_CType_Double:
243       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
244       val->double_val = PyFloat_AsDouble(obj);
245       return !PyErr_Occurred();
246     case kUpb_CType_Bool:
247       if (PyUpb_IsNumpyNdarray(obj, f)) return false;
248       val->bool_val = PyLong_AsLong(obj);
249       return !PyErr_Occurred();
250     case kUpb_CType_Bytes: {
251       char* ptr;
252       Py_ssize_t size;
253       if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
254       *val = PyUpb_MaybeCopyString(ptr, size, arena);
255       return true;
256     }
257     case kUpb_CType_String: {
258       Py_ssize_t size;
259       const char* ptr;
260       PyObject* unicode = NULL;
261       if (PyBytes_Check(obj)) {
262         unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
263         if (!obj) return false;
264       }
265       ptr = PyUnicode_AsUTF8AndSize(obj, &size);
266       if (PyErr_Occurred()) {
267         Py_XDECREF(unicode);
268         return false;
269       }
270       *val = PyUpb_MaybeCopyString(ptr, size, arena);
271       Py_XDECREF(unicode);
272       return true;
273     }
274     case kUpb_CType_Message:
275       PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
276       return false;
277     default:
278       PyErr_Format(PyExc_SystemError,
279                    "Getting a value from a field of unknown type %d",
280                    upb_FieldDef_CType(f));
281       return false;
282   }
283 }
284 
285 bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
286                          const upb_MessageDef* m);
287 
288 // -----------------------------------------------------------------------------
289 // Equal
290 // -----------------------------------------------------------------------------
291 
PyUpb_ValueEq(upb_MessageValue val1,upb_MessageValue val2,const upb_FieldDef * f)292 bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2,
293                    const upb_FieldDef* f) {
294   switch (upb_FieldDef_CType(f)) {
295     case kUpb_CType_Bool:
296       return val1.bool_val == val2.bool_val;
297     case kUpb_CType_Int32:
298     case kUpb_CType_UInt32:
299     case kUpb_CType_Enum:
300       return val1.int32_val == val2.int32_val;
301     case kUpb_CType_Int64:
302     case kUpb_CType_UInt64:
303       return val1.int64_val == val2.int64_val;
304     case kUpb_CType_Float:
305       return val1.float_val == val2.float_val;
306     case kUpb_CType_Double:
307       return val1.double_val == val2.double_val;
308     case kUpb_CType_String:
309     case kUpb_CType_Bytes:
310       return val1.str_val.size == val2.str_val.size &&
311              memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
312                  0;
313     case kUpb_CType_Message:
314       return upb_Message_IsEqual(val1.msg_val, val2.msg_val,
315                                  upb_FieldDef_MessageSubDef(f));
316     default:
317       return false;
318   }
319 }
320 
PyUpb_Map_IsEqual(const upb_Map * map1,const upb_Map * map2,const upb_FieldDef * f)321 bool PyUpb_Map_IsEqual(const upb_Map* map1, const upb_Map* map2,
322                        const upb_FieldDef* f) {
323   assert(upb_FieldDef_IsMap(f));
324   if (map1 == map2) return true;
325 
326   size_t size1 = map1 ? upb_Map_Size(map1) : 0;
327   size_t size2 = map2 ? upb_Map_Size(map2) : 0;
328   if (size1 != size2) return false;
329   if (size1 == 0) return true;
330 
331   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
332   const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
333   size_t iter = kUpb_Map_Begin;
334 
335   upb_MessageValue key, val1;
336   while (upb_Map_Next(map1, &key, &val1, &iter)) {
337     upb_MessageValue val2;
338     if (!upb_Map_Get(map2, key, &val2)) return false;
339     if (!PyUpb_ValueEq(val1, val2, val_f)) return false;
340   }
341 
342   return true;
343 }
344 
PyUpb_ArrayElem_IsEqual(const upb_Array * arr1,const upb_Array * arr2,size_t i,const upb_FieldDef * f)345 static bool PyUpb_ArrayElem_IsEqual(const upb_Array* arr1,
346                                     const upb_Array* arr2, size_t i,
347                                     const upb_FieldDef* f) {
348   assert(i < upb_Array_Size(arr1));
349   assert(i < upb_Array_Size(arr2));
350   upb_MessageValue val1 = upb_Array_Get(arr1, i);
351   upb_MessageValue val2 = upb_Array_Get(arr2, i);
352   return PyUpb_ValueEq(val1, val2, f);
353 }
354 
PyUpb_Array_IsEqual(const upb_Array * arr1,const upb_Array * arr2,const upb_FieldDef * f)355 bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
356                          const upb_FieldDef* f) {
357   assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f));
358   if (arr1 == arr2) return true;
359 
360   size_t n1 = arr1 ? upb_Array_Size(arr1) : 0;
361   size_t n2 = arr2 ? upb_Array_Size(arr2) : 0;
362   if (n1 != n2) return false;
363 
364   // Half the length rounded down.  Important: the empty list rounds to 0.
365   size_t half = n1 / 2;
366 
367   // Search from the ends-in.  We expect differences to more quickly manifest
368   // at the ends than in the middle.  If the length is odd we will miss the
369   // middle element.
370   for (size_t i = 0; i < half; i++) {
371     if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false;
372     if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false;
373   }
374 
375   // For an odd-lengthed list, pick up the middle element.
376   if (n1 & 1) {
377     if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false;
378   }
379 
380   return true;
381 }
382 
upb_Message_IsEqual(const upb_Message * msg1,const upb_Message * msg2,const upb_MessageDef * m)383 bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
384                          const upb_MessageDef* m) {
385   if (msg1 == msg2) return true;
386   if (upb_Message_ExtensionCount(msg1) != upb_Message_ExtensionCount(msg2))
387     return false;
388 
389   // Compare messages field-by-field.  This is slightly tricky, because while
390   // we can iterate over normal fields in a predictable order, the extension
391   // order is unpredictable and may be different between msg1 and msg2.
392   // So we use the following strategy:
393   //   1. Iterate over all msg1 fields (including extensions).
394   //   2. For non-extension fields, we find the corresponding field by simply
395   //      using upb_Message_Next(msg2).  If the two messages have the same set
396   //      of fields, this will yield the same field.
397   //   3. For extension fields, we have to actually search for the corresponding
398   //      field, which we do with upb_Message_GetFieldByDef(msg2, ext_f1).
399   //   4. Once iteration over msg1 is complete, we call upb_Message_Next(msg2)
400   //   one
401   //      final time to verify that we have visited all of msg2's regular fields
402   //      (we pass NULL for ext_dict so that iteration will *not* return
403   //      extensions).
404   //
405   // We don't need to visit all of msg2's extensions, because we verified up
406   // front that both messages have the same number of extensions.
407   const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
408   const upb_FieldDef *f1, *f2;
409   upb_MessageValue val1, val2;
410   size_t iter1 = kUpb_Message_Begin;
411   size_t iter2 = kUpb_Message_Begin;
412   while (upb_Message_Next(msg1, m, symtab, &f1, &val1, &iter1)) {
413     if (upb_FieldDef_IsExtension(f1)) {
414       val2 = upb_Message_GetFieldByDef(msg2, f1);
415     } else {
416       if (!upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2) || f1 != f2) {
417         return false;
418       }
419     }
420 
421     if (upb_FieldDef_IsMap(f1)) {
422       if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false;
423     } else if (upb_FieldDef_IsRepeated(f1)) {
424       if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) {
425         return false;
426       }
427     } else {
428       if (!PyUpb_ValueEq(val1, val2, f1)) return false;
429     }
430   }
431 
432   if (upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2)) return false;
433 
434   size_t usize1, usize2;
435   const char* uf1 = upb_Message_GetUnknown(msg1, &usize1);
436   const char* uf2 = upb_Message_GetUnknown(msg2, &usize2);
437   // 100 is arbitrary, we're trying to prevent stack overflow but it's not
438   // obvious how deep we should allow here.
439   return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) ==
440          kUpb_UnknownCompareResult_Equal;
441 }
442 
443 #include "upb/port/undef.inc"
444