1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "python/convert.h"
29
30 #include "python/message.h"
31 #include "python/protobuf.h"
32 #include "upb/map.h"
33 #include "upb/reflection/message.h"
34 #include "upb/util/compare.h"
35
36 // Must be last.
37 #include "upb/port/def.inc"
38
PyUpb_UpbToPy(upb_MessageValue val,const upb_FieldDef * f,PyObject * arena)39 PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
40 PyObject* arena) {
41 switch (upb_FieldDef_CType(f)) {
42 case kUpb_CType_Enum:
43 case kUpb_CType_Int32:
44 return PyLong_FromLong(val.int32_val);
45 case kUpb_CType_Int64:
46 return PyLong_FromLongLong(val.int64_val);
47 case kUpb_CType_UInt32:
48 return PyLong_FromSize_t(val.uint32_val);
49 case kUpb_CType_UInt64:
50 return PyLong_FromUnsignedLongLong(val.uint64_val);
51 case kUpb_CType_Float:
52 return PyFloat_FromDouble(val.float_val);
53 case kUpb_CType_Double:
54 return PyFloat_FromDouble(val.double_val);
55 case kUpb_CType_Bool:
56 return PyBool_FromLong(val.bool_val);
57 case kUpb_CType_Bytes:
58 return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
59 case kUpb_CType_String: {
60 PyObject* ret =
61 PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
62 // If the string can't be decoded in UTF-8, just return a bytes object
63 // that contains the raw bytes. This can't happen if the value was
64 // assigned using the members of the Python message object, but can happen
65 // if the values were parsed from the wire (binary).
66 if (ret == NULL) {
67 PyErr_Clear();
68 ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
69 }
70 return ret;
71 }
72 case kUpb_CType_Message:
73 return PyUpb_Message_Get((upb_Message*)val.msg_val,
74 upb_FieldDef_MessageSubDef(f), arena);
75 default:
76 PyErr_Format(PyExc_SystemError,
77 "Getting a value from a field of unknown type %d",
78 upb_FieldDef_CType(f));
79 return NULL;
80 }
81 }
82
PyUpb_GetInt64(PyObject * obj,int64_t * val)83 static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) {
84 // We require that the value is either an integer or has an __index__
85 // conversion.
86 obj = PyNumber_Index(obj);
87 if (!obj) return false;
88 // If the value is already a Python long, PyLong_AsLongLong() retrieves it.
89 // Otherwise is converts to integer using __int__.
90 *val = PyLong_AsLongLong(obj);
91 bool ok = true;
92 if (PyErr_Occurred()) {
93 assert(PyErr_ExceptionMatches(PyExc_OverflowError));
94 PyErr_Clear();
95 PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
96 ok = false;
97 }
98 Py_DECREF(obj);
99 return ok;
100 }
101
PyUpb_GetUint64(PyObject * obj,uint64_t * val)102 static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) {
103 // We require that the value is either an integer or has an __index__
104 // conversion.
105 obj = PyNumber_Index(obj);
106 if (!obj) return false;
107 *val = PyLong_AsUnsignedLongLong(obj);
108 bool ok = true;
109 if (PyErr_Occurred()) {
110 assert(PyErr_ExceptionMatches(PyExc_OverflowError));
111 PyErr_Clear();
112 PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
113 ok = false;
114 }
115 Py_DECREF(obj);
116 return ok;
117 }
118
PyUpb_GetInt32(PyObject * obj,int32_t * val)119 static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) {
120 int64_t i64;
121 if (!PyUpb_GetInt64(obj, &i64)) return false;
122 if (i64 < INT32_MIN || i64 > INT32_MAX) {
123 PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
124 return false;
125 }
126 *val = i64;
127 return true;
128 }
129
PyUpb_GetUint32(PyObject * obj,uint32_t * val)130 static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) {
131 uint64_t u64;
132 if (!PyUpb_GetUint64(obj, &u64)) return false;
133 if (u64 > UINT32_MAX) {
134 PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
135 return false;
136 }
137 *val = u64;
138 return true;
139 }
140
141 // If `arena` is specified, copies the string data into the given arena.
142 // Otherwise aliases the given data.
PyUpb_MaybeCopyString(const char * ptr,size_t size,upb_Arena * arena)143 static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
144 upb_Arena* arena) {
145 upb_MessageValue ret;
146 ret.str_val.size = size;
147 if (arena) {
148 char* buf = upb_Arena_Malloc(arena, size);
149 memcpy(buf, ptr, size);
150 ret.str_val.data = buf;
151 } else {
152 ret.str_val.data = ptr;
153 }
154 return ret;
155 }
156
upb_FieldDef_TypeString(const upb_FieldDef * f)157 const char* upb_FieldDef_TypeString(const upb_FieldDef* f) {
158 switch (upb_FieldDef_CType(f)) {
159 case kUpb_CType_Double:
160 return "double";
161 case kUpb_CType_Float:
162 return "float";
163 case kUpb_CType_Int64:
164 return "int64";
165 case kUpb_CType_Int32:
166 return "int32";
167 case kUpb_CType_UInt64:
168 return "uint64";
169 case kUpb_CType_UInt32:
170 return "uint32";
171 case kUpb_CType_Enum:
172 return "enum";
173 case kUpb_CType_Bool:
174 return "bool";
175 case kUpb_CType_String:
176 return "string";
177 case kUpb_CType_Bytes:
178 return "bytes";
179 case kUpb_CType_Message:
180 return "message";
181 }
182 UPB_UNREACHABLE();
183 }
184
PyUpb_PyToUpbEnum(PyObject * obj,const upb_EnumDef * e,upb_MessageValue * val)185 static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
186 upb_MessageValue* val) {
187 if (PyUnicode_Check(obj)) {
188 Py_ssize_t size;
189 const char* name = PyUnicode_AsUTF8AndSize(obj, &size);
190 const upb_EnumValueDef* ev =
191 upb_EnumDef_FindValueByNameWithSize(e, name, size);
192 if (!ev) {
193 PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
194 return false;
195 }
196 val->int32_val = upb_EnumValueDef_Number(ev);
197 return true;
198 } else {
199 int32_t i32;
200 if (!PyUpb_GetInt32(obj, &i32)) return false;
201 if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 &&
202 !upb_EnumDef_CheckNumber(e, i32)) {
203 PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
204 return false;
205 }
206 val->int32_val = i32;
207 return true;
208 }
209 }
210
PyUpb_IsNumpyNdarray(PyObject * obj,const upb_FieldDef * f)211 bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) {
212 PyObject* type_name_obj =
213 PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__");
214 bool is_ndarray = false;
215 if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) {
216 PyErr_Format(PyExc_TypeError,
217 "%S has type ndarray, but expected one of: %s", obj,
218 upb_FieldDef_TypeString(f));
219 is_ndarray = true;
220 }
221 Py_DECREF(type_name_obj);
222 return is_ndarray;
223 }
224
PyUpb_PyToUpb(PyObject * obj,const upb_FieldDef * f,upb_MessageValue * val,upb_Arena * arena)225 bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
226 upb_Arena* arena) {
227 switch (upb_FieldDef_CType(f)) {
228 case kUpb_CType_Enum:
229 return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val);
230 case kUpb_CType_Int32:
231 return PyUpb_GetInt32(obj, &val->int32_val);
232 case kUpb_CType_Int64:
233 return PyUpb_GetInt64(obj, &val->int64_val);
234 case kUpb_CType_UInt32:
235 return PyUpb_GetUint32(obj, &val->uint32_val);
236 case kUpb_CType_UInt64:
237 return PyUpb_GetUint64(obj, &val->uint64_val);
238 case kUpb_CType_Float:
239 if (PyUpb_IsNumpyNdarray(obj, f)) return false;
240 val->float_val = PyFloat_AsDouble(obj);
241 return !PyErr_Occurred();
242 case kUpb_CType_Double:
243 if (PyUpb_IsNumpyNdarray(obj, f)) return false;
244 val->double_val = PyFloat_AsDouble(obj);
245 return !PyErr_Occurred();
246 case kUpb_CType_Bool:
247 if (PyUpb_IsNumpyNdarray(obj, f)) return false;
248 val->bool_val = PyLong_AsLong(obj);
249 return !PyErr_Occurred();
250 case kUpb_CType_Bytes: {
251 char* ptr;
252 Py_ssize_t size;
253 if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
254 *val = PyUpb_MaybeCopyString(ptr, size, arena);
255 return true;
256 }
257 case kUpb_CType_String: {
258 Py_ssize_t size;
259 const char* ptr;
260 PyObject* unicode = NULL;
261 if (PyBytes_Check(obj)) {
262 unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
263 if (!obj) return false;
264 }
265 ptr = PyUnicode_AsUTF8AndSize(obj, &size);
266 if (PyErr_Occurred()) {
267 Py_XDECREF(unicode);
268 return false;
269 }
270 *val = PyUpb_MaybeCopyString(ptr, size, arena);
271 Py_XDECREF(unicode);
272 return true;
273 }
274 case kUpb_CType_Message:
275 PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
276 return false;
277 default:
278 PyErr_Format(PyExc_SystemError,
279 "Getting a value from a field of unknown type %d",
280 upb_FieldDef_CType(f));
281 return false;
282 }
283 }
284
285 bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
286 const upb_MessageDef* m);
287
288 // -----------------------------------------------------------------------------
289 // Equal
290 // -----------------------------------------------------------------------------
291
PyUpb_ValueEq(upb_MessageValue val1,upb_MessageValue val2,const upb_FieldDef * f)292 bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2,
293 const upb_FieldDef* f) {
294 switch (upb_FieldDef_CType(f)) {
295 case kUpb_CType_Bool:
296 return val1.bool_val == val2.bool_val;
297 case kUpb_CType_Int32:
298 case kUpb_CType_UInt32:
299 case kUpb_CType_Enum:
300 return val1.int32_val == val2.int32_val;
301 case kUpb_CType_Int64:
302 case kUpb_CType_UInt64:
303 return val1.int64_val == val2.int64_val;
304 case kUpb_CType_Float:
305 return val1.float_val == val2.float_val;
306 case kUpb_CType_Double:
307 return val1.double_val == val2.double_val;
308 case kUpb_CType_String:
309 case kUpb_CType_Bytes:
310 return val1.str_val.size == val2.str_val.size &&
311 memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
312 0;
313 case kUpb_CType_Message:
314 return upb_Message_IsEqual(val1.msg_val, val2.msg_val,
315 upb_FieldDef_MessageSubDef(f));
316 default:
317 return false;
318 }
319 }
320
PyUpb_Map_IsEqual(const upb_Map * map1,const upb_Map * map2,const upb_FieldDef * f)321 bool PyUpb_Map_IsEqual(const upb_Map* map1, const upb_Map* map2,
322 const upb_FieldDef* f) {
323 assert(upb_FieldDef_IsMap(f));
324 if (map1 == map2) return true;
325
326 size_t size1 = map1 ? upb_Map_Size(map1) : 0;
327 size_t size2 = map2 ? upb_Map_Size(map2) : 0;
328 if (size1 != size2) return false;
329 if (size1 == 0) return true;
330
331 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
332 const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
333 size_t iter = kUpb_Map_Begin;
334
335 upb_MessageValue key, val1;
336 while (upb_Map_Next(map1, &key, &val1, &iter)) {
337 upb_MessageValue val2;
338 if (!upb_Map_Get(map2, key, &val2)) return false;
339 if (!PyUpb_ValueEq(val1, val2, val_f)) return false;
340 }
341
342 return true;
343 }
344
PyUpb_ArrayElem_IsEqual(const upb_Array * arr1,const upb_Array * arr2,size_t i,const upb_FieldDef * f)345 static bool PyUpb_ArrayElem_IsEqual(const upb_Array* arr1,
346 const upb_Array* arr2, size_t i,
347 const upb_FieldDef* f) {
348 assert(i < upb_Array_Size(arr1));
349 assert(i < upb_Array_Size(arr2));
350 upb_MessageValue val1 = upb_Array_Get(arr1, i);
351 upb_MessageValue val2 = upb_Array_Get(arr2, i);
352 return PyUpb_ValueEq(val1, val2, f);
353 }
354
PyUpb_Array_IsEqual(const upb_Array * arr1,const upb_Array * arr2,const upb_FieldDef * f)355 bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
356 const upb_FieldDef* f) {
357 assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f));
358 if (arr1 == arr2) return true;
359
360 size_t n1 = arr1 ? upb_Array_Size(arr1) : 0;
361 size_t n2 = arr2 ? upb_Array_Size(arr2) : 0;
362 if (n1 != n2) return false;
363
364 // Half the length rounded down. Important: the empty list rounds to 0.
365 size_t half = n1 / 2;
366
367 // Search from the ends-in. We expect differences to more quickly manifest
368 // at the ends than in the middle. If the length is odd we will miss the
369 // middle element.
370 for (size_t i = 0; i < half; i++) {
371 if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false;
372 if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false;
373 }
374
375 // For an odd-lengthed list, pick up the middle element.
376 if (n1 & 1) {
377 if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false;
378 }
379
380 return true;
381 }
382
upb_Message_IsEqual(const upb_Message * msg1,const upb_Message * msg2,const upb_MessageDef * m)383 bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
384 const upb_MessageDef* m) {
385 if (msg1 == msg2) return true;
386 if (upb_Message_ExtensionCount(msg1) != upb_Message_ExtensionCount(msg2))
387 return false;
388
389 // Compare messages field-by-field. This is slightly tricky, because while
390 // we can iterate over normal fields in a predictable order, the extension
391 // order is unpredictable and may be different between msg1 and msg2.
392 // So we use the following strategy:
393 // 1. Iterate over all msg1 fields (including extensions).
394 // 2. For non-extension fields, we find the corresponding field by simply
395 // using upb_Message_Next(msg2). If the two messages have the same set
396 // of fields, this will yield the same field.
397 // 3. For extension fields, we have to actually search for the corresponding
398 // field, which we do with upb_Message_GetFieldByDef(msg2, ext_f1).
399 // 4. Once iteration over msg1 is complete, we call upb_Message_Next(msg2)
400 // one
401 // final time to verify that we have visited all of msg2's regular fields
402 // (we pass NULL for ext_dict so that iteration will *not* return
403 // extensions).
404 //
405 // We don't need to visit all of msg2's extensions, because we verified up
406 // front that both messages have the same number of extensions.
407 const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
408 const upb_FieldDef *f1, *f2;
409 upb_MessageValue val1, val2;
410 size_t iter1 = kUpb_Message_Begin;
411 size_t iter2 = kUpb_Message_Begin;
412 while (upb_Message_Next(msg1, m, symtab, &f1, &val1, &iter1)) {
413 if (upb_FieldDef_IsExtension(f1)) {
414 val2 = upb_Message_GetFieldByDef(msg2, f1);
415 } else {
416 if (!upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2) || f1 != f2) {
417 return false;
418 }
419 }
420
421 if (upb_FieldDef_IsMap(f1)) {
422 if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false;
423 } else if (upb_FieldDef_IsRepeated(f1)) {
424 if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) {
425 return false;
426 }
427 } else {
428 if (!PyUpb_ValueEq(val1, val2, f1)) return false;
429 }
430 }
431
432 if (upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2)) return false;
433
434 size_t usize1, usize2;
435 const char* uf1 = upb_Message_GetUnknown(msg1, &usize1);
436 const char* uf2 = upb_Message_GetUnknown(msg2, &usize2);
437 // 100 is arbitrary, we're trying to prevent stack overflow but it's not
438 // obvious how deep we should allow here.
439 return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) ==
440 kUpb_UnknownCompareResult_Equal;
441 }
442
443 #include "upb/port/undef.inc"
444