1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "python/message.h"
29 
30 #include "python/convert.h"
31 #include "python/descriptor.h"
32 #include "python/extension_dict.h"
33 #include "python/map.h"
34 #include "python/repeated.h"
35 #include "upb/reflection/def.h"
36 #include "upb/reflection/message.h"
37 #include "upb/text/encode.h"
38 #include "upb/util/required_fields.h"
39 #include "upb/wire/common.h"
40 
41 static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls);
42 static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name);
43 
44 // -----------------------------------------------------------------------------
45 // CPythonBits
46 // -----------------------------------------------------------------------------
47 
48 // This struct contains a few things that are not exposed directly through the
49 // limited API, but that we can get at in somewhat more roundabout ways. The
50 // roundabout ways are slower, so we cache the values here.
51 //
52 // These values are valid to cache in a global, even across sub-interpreters,
53 // because they are not pointers to interpreter state.  They are process
54 // globals that will be the same for any interpreter in this process.
55 typedef struct {
56   // For each member, we note the equivalent expression that we could use in the
57   // full (non-limited) API.
58   newfunc type_new;            // PyTypeObject.tp_new
59   destructor type_dealloc;     // PyTypeObject.tp_dealloc
60   getattrofunc type_getattro;  // PyTypeObject.tp_getattro
61   setattrofunc type_setattro;  // PyTypeObject.tp_setattro
62   size_t type_basicsize;       // sizeof(PyHeapTypeObject)
63 
64   // While we can refer to PY_VERSION_HEX in the limited API, this will give us
65   // the version of Python we were compiled against, which may be different
66   // than the version we are dynamically linked against.  Here we want the
67   // version that is actually running in this process.
68   long python_version_hex;  // PY_VERSION_HEX
69 } PyUpb_CPythonBits;
70 
71 // A global containing the values for this process.
72 PyUpb_CPythonBits cpython_bits;
73 
upb_Pre310_PyType_GetDeallocSlot(PyTypeObject * type_subclass)74 destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) {
75   // This is a bit desperate.  We need type_dealloc(), but PyType_GetSlot(type,
76   // Py_tp_dealloc) will return subtype_dealloc().  There appears to be no way
77   // whatsoever to fetch type_dealloc() through the limited API until Python
78   // 3.10.
79   //
80   // To work around this so we attempt to find it by looking for the offset of
81   // tp_dealloc in PyTypeObject, then memcpy() it directly.  This should always
82   // work in practice.
83   //
84   // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap
85   // types.  We will be able to replace all this hack with just:
86   //
87   //   PyType_GetSlot(&PyType_Type, Py_tp_dealloc)
88   //
89   destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc);
90   for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) {
91     destructor maybe_subtype_dealloc;
92     memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i,
93            sizeof(destructor));
94     if (maybe_subtype_dealloc == subtype_dealloc) {
95       destructor type_dealloc;
96       memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor));
97       return type_dealloc;
98     }
99   }
100   assert(false);
101   return NULL;
102 }
103 
PyUpb_CPythonBits_Init(PyUpb_CPythonBits * bits)104 static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) {
105   PyObject* bases = NULL;
106   PyTypeObject* type = NULL;
107   PyObject* size = NULL;
108   PyObject* sys = NULL;
109   PyObject* hex_version = NULL;
110   bool ret = false;
111 
112   // PyType_GetSlot() only works on heap types, so we cannot use it on
113   // &PyType_Type directly. Instead we create our own (temporary) type derived
114   // from PyType_Type: this will inherit all of the slots from PyType_Type, but
115   // as a heap type it can be queried with PyType_GetSlot().
116   static PyType_Slot dummy_slots[] = {{0, NULL}};
117 
118   static PyType_Spec dummy_spec = {
119       "module.DummyClass",  // tp_name
120       0,  // To be filled in by size of base     // tp_basicsize
121       0,  // tp_itemsize
122       Py_TPFLAGS_DEFAULT,  // tp_flags
123       dummy_slots,
124   };
125 
126   bases = Py_BuildValue("(O)", &PyType_Type);
127   if (!bases) goto err;
128   type = (PyTypeObject*)PyType_FromSpecWithBases(&dummy_spec, bases);
129   if (!type) goto err;
130 
131   bits->type_new = PyType_GetSlot(type, Py_tp_new);
132   bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type);
133   bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro);
134   bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro);
135 
136   size = PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__");
137   if (!size) goto err;
138   bits->type_basicsize = PyLong_AsLong(size);
139   if (bits->type_basicsize == -1) goto err;
140 
141   assert(bits->type_new);
142   assert(bits->type_dealloc);
143   assert(bits->type_getattro);
144   assert(bits->type_setattro);
145 
146 #ifndef Py_LIMITED_API
147   assert(bits->type_new == PyType_Type.tp_new);
148   assert(bits->type_dealloc == PyType_Type.tp_dealloc);
149   assert(bits->type_getattro == PyType_Type.tp_getattro);
150   assert(bits->type_setattro == PyType_Type.tp_setattro);
151   assert(bits->type_basicsize == sizeof(PyHeapTypeObject));
152 #endif
153 
154   sys = PyImport_ImportModule("sys");
155   hex_version = PyObject_GetAttrString(sys, "hexversion");
156   bits->python_version_hex = PyLong_AsLong(hex_version);
157   ret = true;
158 
159 err:
160   Py_XDECREF(bases);
161   Py_XDECREF(type);
162   Py_XDECREF(size);
163   Py_XDECREF(sys);
164   Py_XDECREF(hex_version);
165   return ret;
166 }
167 
168 // -----------------------------------------------------------------------------
169 // Message
170 // -----------------------------------------------------------------------------
171 
172 // The main message object.  The type of the object (PyUpb_Message.ob_type)
173 // will be an instance of the PyUpb_MessageMeta type (defined below).  So the
174 // chain is:
175 //   FooMessage = MessageMeta(...)
176 //   foo = FooMessage()
177 //
178 // Which becomes:
179 //   Object             C Struct Type        Python type (ob_type)
180 //   -----------------  -----------------    ---------------------
181 //   foo                PyUpb_Message        FooMessage
182 //   FooMessage         PyUpb_MessageMeta    message_meta_type
183 //   message_meta_type  PyTypeObject         'type' in Python
184 //
185 // A message object can be in one of two states: present or non-present.  When
186 // a message is non-present, it stores a reference to its parent, and a write
187 // to any attribute will trigger the message to become present in its parent.
188 // The parent may also be non-present, in which case a mutation will trigger a
189 // chain reaction.
190 typedef struct PyUpb_Message {
191   PyObject_HEAD;
192   PyObject* arena;
193   uintptr_t def;  // Tagged, low bit 1 == upb_FieldDef*, else upb_MessageDef*
194   union {
195     // when def is msgdef, the data for this msg.
196     upb_Message* msg;
197     // when def is fielddef, owning pointer to parent
198     struct PyUpb_Message* parent;
199   } ptr;
200   PyObject* ext_dict;  // Weak pointer to extension dict, if any.
201   // name->obj dict for non-present msg/map/repeated, NULL if none.
202   PyUpb_WeakMap* unset_subobj_map;
203   int version;
204 } PyUpb_Message;
205 
206 static PyObject* PyUpb_Message_GetAttr(PyObject* _self, PyObject* attr);
207 
PyUpb_Message_IsStub(PyUpb_Message * msg)208 bool PyUpb_Message_IsStub(PyUpb_Message* msg) { return msg->def & 1; }
209 
PyUpb_Message_GetFieldDef(PyUpb_Message * msg)210 const upb_FieldDef* PyUpb_Message_GetFieldDef(PyUpb_Message* msg) {
211   assert(PyUpb_Message_IsStub(msg));
212   return (void*)(msg->def & ~(uintptr_t)1);
213 }
214 
_PyUpb_Message_GetMsgdef(PyUpb_Message * msg)215 static const upb_MessageDef* _PyUpb_Message_GetMsgdef(PyUpb_Message* msg) {
216   return PyUpb_Message_IsStub(msg)
217              ? upb_FieldDef_MessageSubDef(PyUpb_Message_GetFieldDef(msg))
218              : (void*)msg->def;
219 }
220 
PyUpb_Message_GetMsgdef(PyObject * self)221 const upb_MessageDef* PyUpb_Message_GetMsgdef(PyObject* self) {
222   return _PyUpb_Message_GetMsgdef((PyUpb_Message*)self);
223 }
224 
PyUpb_Message_GetMsg(PyUpb_Message * self)225 static upb_Message* PyUpb_Message_GetMsg(PyUpb_Message* self) {
226   assert(!PyUpb_Message_IsStub(self));
227   return self->ptr.msg;
228 }
229 
PyUpb_Message_TryCheck(PyObject * self)230 bool PyUpb_Message_TryCheck(PyObject* self) {
231   PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
232   PyObject* type = (PyObject*)Py_TYPE(self);
233   return Py_TYPE(type) == state->message_meta_type;
234 }
235 
PyUpb_Message_Verify(PyObject * self)236 bool PyUpb_Message_Verify(PyObject* self) {
237   if (!PyUpb_Message_TryCheck(self)) {
238     PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.",
239                  self);
240     return false;
241   }
242   return true;
243 }
244 
245 // If the message is reified, returns it.  Otherwise, returns NULL.
246 // If NULL is returned, the object is empty and has no underlying data.
PyUpb_Message_GetIfReified(PyObject * _self)247 upb_Message* PyUpb_Message_GetIfReified(PyObject* _self) {
248   PyUpb_Message* self = (void*)_self;
249   return PyUpb_Message_IsStub(self) ? NULL : self->ptr.msg;
250 }
251 
PyUpb_Message_New(PyObject * cls,PyObject * unused_args,PyObject * unused_kwargs)252 static PyObject* PyUpb_Message_New(PyObject* cls, PyObject* unused_args,
253                                    PyObject* unused_kwargs) {
254   const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls);
255   const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
256   PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
257   msg->def = (uintptr_t)msgdef;
258   msg->arena = PyUpb_Arena_New();
259   msg->ptr.msg = upb_Message_New(layout, PyUpb_Arena_Get(msg->arena));
260   msg->unset_subobj_map = NULL;
261   msg->ext_dict = NULL;
262   msg->version = 0;
263 
264   PyObject* ret = &msg->ob_base;
265   PyUpb_ObjCache_Add(msg->ptr.msg, ret);
266   return ret;
267 }
268 
269 /*
270  * PyUpb_Message_LookupName()
271  *
272  * Tries to find a field or oneof named `py_name` in the message object `self`.
273  * The user must pass `f` and/or `o` to indicate whether a field or a oneof name
274  * is expected.  If the name is found and it has an expected type, the function
275  * sets `*f` or `*o` respectively and returns true.  Otherwise returns false
276  * and sets an exception of type `exc_type` if provided.
277  */
PyUpb_Message_LookupName(PyUpb_Message * self,PyObject * py_name,const upb_FieldDef ** f,const upb_OneofDef ** o,PyObject * exc_type)278 static bool PyUpb_Message_LookupName(PyUpb_Message* self, PyObject* py_name,
279                                      const upb_FieldDef** f,
280                                      const upb_OneofDef** o,
281                                      PyObject* exc_type) {
282   assert(f || o);
283   Py_ssize_t size;
284   const char* name = NULL;
285   if (PyUnicode_Check(py_name)) {
286     name = PyUnicode_AsUTF8AndSize(py_name, &size);
287   } else if (PyBytes_Check(py_name)) {
288     PyBytes_AsStringAndSize(py_name, (char**)&name, &size);
289   }
290   if (!name) {
291     PyErr_Format(exc_type,
292                  "Expected a field name, but got non-string argument %S.",
293                  py_name);
294     return false;
295   }
296   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
297 
298   if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) {
299     if (exc_type) {
300       PyErr_Format(exc_type, "Protocol message %s has no \"%s\" field.",
301                    upb_MessageDef_Name(msgdef), name);
302     }
303     return false;
304   }
305 
306   if (!o && !*f) {
307     if (exc_type) {
308       PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.",
309                    name);
310     }
311     return false;
312   }
313 
314   if (!f && !*o) {
315     if (exc_type) {
316       PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.",
317                    name);
318     }
319     return false;
320   }
321 
322   return true;
323 }
324 
PyUpb_Message_InitMessageMapEntry(PyObject * dst,PyObject * src)325 static bool PyUpb_Message_InitMessageMapEntry(PyObject* dst, PyObject* src) {
326   if (!src || !dst) return false;
327 
328   // TODO(haberman): Currently we are doing Clear()+MergeFrom().  Replace with
329   // CopyFrom() once that is implemented.
330   PyObject* ok = PyObject_CallMethod(dst, "Clear", NULL);
331   if (!ok) return false;
332   Py_DECREF(ok);
333   ok = PyObject_CallMethod(dst, "MergeFrom", "O", src);
334   if (!ok) return false;
335   Py_DECREF(ok);
336 
337   return true;
338 }
339 
PyUpb_Message_InitMapAttributes(PyObject * map,PyObject * value,const upb_FieldDef * f)340 int PyUpb_Message_InitMapAttributes(PyObject* map, PyObject* value,
341                                     const upb_FieldDef* f) {
342   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
343   const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
344   PyObject* it = NULL;
345   PyObject* tmp = NULL;
346   int ret = -1;
347   if (upb_FieldDef_IsSubMessage(val_f)) {
348     it = PyObject_GetIter(value);
349     if (it == NULL) {
350       PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
351                    upb_FieldDef_FullName(f));
352       goto err;
353     }
354     PyObject* e;
355     while ((e = PyIter_Next(it)) != NULL) {
356       PyObject* src = PyObject_GetItem(value, e);
357       PyObject* dst = PyObject_GetItem(map, e);
358       Py_DECREF(e);
359       bool ok = PyUpb_Message_InitMessageMapEntry(dst, src);
360       Py_XDECREF(src);
361       Py_XDECREF(dst);
362       if (!ok) goto err;
363     }
364   } else {
365     tmp = PyObject_CallMethod(map, "update", "O", value);
366     if (!tmp) goto err;
367   }
368   ret = 0;
369 
370 err:
371   Py_XDECREF(it);
372   Py_XDECREF(tmp);
373   return ret;
374 }
375 
376 void PyUpb_Message_EnsureReified(PyUpb_Message* self);
377 
PyUpb_Message_InitMapAttribute(PyObject * _self,PyObject * name,const upb_FieldDef * f,PyObject * value)378 static bool PyUpb_Message_InitMapAttribute(PyObject* _self, PyObject* name,
379                                            const upb_FieldDef* f,
380                                            PyObject* value) {
381   PyObject* map = PyUpb_Message_GetAttr(_self, name);
382   int ok = PyUpb_Message_InitMapAttributes(map, value, f);
383   Py_DECREF(map);
384   return ok >= 0;
385 }
386 
PyUpb_Message_InitRepeatedMessageAttribute(PyObject * _self,PyObject * repeated,PyObject * value,const upb_FieldDef * f)387 static bool PyUpb_Message_InitRepeatedMessageAttribute(PyObject* _self,
388                                                        PyObject* repeated,
389                                                        PyObject* value,
390                                                        const upb_FieldDef* f) {
391   PyObject* it = PyObject_GetIter(value);
392   if (!it) {
393     PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable",
394                  upb_FieldDef_FullName(f));
395     return false;
396   }
397   PyObject* e = NULL;
398   PyObject* m = NULL;
399   while ((e = PyIter_Next(it)) != NULL) {
400     if (PyDict_Check(e)) {
401       m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, e);
402       if (!m) goto err;
403     } else {
404       m = PyUpb_RepeatedCompositeContainer_Add(repeated, NULL, NULL);
405       if (!m) goto err;
406       PyObject* merged = PyUpb_Message_MergeFrom(m, e);
407       if (!merged) goto err;
408       Py_DECREF(merged);
409     }
410     Py_DECREF(e);
411     Py_DECREF(m);
412     m = NULL;
413   }
414 
415 err:
416   Py_XDECREF(it);
417   Py_XDECREF(e);
418   Py_XDECREF(m);
419   return !PyErr_Occurred();  // Check PyIter_Next() exit.
420 }
421 
PyUpb_Message_InitRepeatedAttribute(PyObject * _self,PyObject * name,PyObject * value)422 static bool PyUpb_Message_InitRepeatedAttribute(PyObject* _self, PyObject* name,
423                                                 PyObject* value) {
424   PyUpb_Message* self = (void*)_self;
425   const upb_FieldDef* field;
426   if (!PyUpb_Message_LookupName(self, name, &field, NULL,
427                                 PyExc_AttributeError)) {
428     return false;
429   }
430   bool ok = false;
431   PyObject* repeated = PyUpb_Message_GetFieldValue(_self, field);
432   PyObject* tmp = NULL;
433   if (!repeated) goto err;
434   if (upb_FieldDef_IsSubMessage(field)) {
435     if (!PyUpb_Message_InitRepeatedMessageAttribute(_self, repeated, value,
436                                                     field)) {
437       goto err;
438     }
439   } else {
440     tmp = PyUpb_RepeatedContainer_Extend(repeated, value);
441     if (!tmp) goto err;
442   }
443   ok = true;
444 
445 err:
446   Py_XDECREF(repeated);
447   Py_XDECREF(tmp);
448   return ok;
449 }
450 
451 static PyObject* PyUpb_Message_MergePartialFrom(PyObject*, PyObject*);
452 
PyUpb_Message_InitMessageAttribute(PyObject * _self,PyObject * name,PyObject * value)453 static bool PyUpb_Message_InitMessageAttribute(PyObject* _self, PyObject* name,
454                                                PyObject* value) {
455   PyObject* submsg = PyUpb_Message_GetAttr(_self, name);
456   if (!submsg) return -1;
457   assert(!PyErr_Occurred());
458   bool ok;
459   if (PyUpb_Message_TryCheck(value)) {
460     PyObject* tmp = PyUpb_Message_MergePartialFrom(submsg, value);
461     ok = tmp != NULL;
462     Py_XDECREF(tmp);
463   } else if (PyDict_Check(value)) {
464     assert(!PyErr_Occurred());
465     ok = PyUpb_Message_InitAttributes(submsg, NULL, value) >= 0;
466   } else {
467     const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
468     PyErr_Format(PyExc_TypeError, "Message must be initialized with a dict: %s",
469                  upb_MessageDef_FullName(m));
470     ok = false;
471   }
472   Py_DECREF(submsg);
473   return ok;
474 }
475 
PyUpb_Message_InitScalarAttribute(upb_Message * msg,const upb_FieldDef * f,PyObject * value,upb_Arena * arena)476 static bool PyUpb_Message_InitScalarAttribute(upb_Message* msg,
477                                               const upb_FieldDef* f,
478                                               PyObject* value,
479                                               upb_Arena* arena) {
480   upb_MessageValue msgval;
481   assert(!PyErr_Occurred());
482   if (!PyUpb_PyToUpb(value, f, &msgval, arena)) return false;
483   upb_Message_SetFieldByDef(msg, f, msgval, arena);
484   return true;
485 }
486 
PyUpb_Message_InitAttributes(PyObject * _self,PyObject * args,PyObject * kwargs)487 int PyUpb_Message_InitAttributes(PyObject* _self, PyObject* args,
488                                  PyObject* kwargs) {
489   assert(!PyErr_Occurred());
490 
491   if (args != NULL && PyTuple_Size(args) != 0) {
492     PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
493     return -1;
494   }
495 
496   if (kwargs == NULL) return 0;
497 
498   PyUpb_Message* self = (void*)_self;
499   Py_ssize_t pos = 0;
500   PyObject* name;
501   PyObject* value;
502   PyUpb_Message_EnsureReified(self);
503   upb_Message* msg = PyUpb_Message_GetMsg(self);
504   upb_Arena* arena = PyUpb_Arena_Get(self->arena);
505 
506   while (PyDict_Next(kwargs, &pos, &name, &value)) {
507     assert(!PyErr_Occurred());
508     const upb_FieldDef* f;
509     assert(!PyErr_Occurred());
510     if (!PyUpb_Message_LookupName(self, name, &f, NULL, PyExc_ValueError)) {
511       return -1;
512     }
513 
514     if (value == Py_None) continue;  // Ignored.
515 
516     assert(!PyErr_Occurred());
517 
518     if (upb_FieldDef_IsMap(f)) {
519       if (!PyUpb_Message_InitMapAttribute(_self, name, f, value)) return -1;
520     } else if (upb_FieldDef_IsRepeated(f)) {
521       if (!PyUpb_Message_InitRepeatedAttribute(_self, name, value)) return -1;
522     } else if (upb_FieldDef_IsSubMessage(f)) {
523       if (!PyUpb_Message_InitMessageAttribute(_self, name, value)) return -1;
524     } else {
525       if (!PyUpb_Message_InitScalarAttribute(msg, f, value, arena)) return -1;
526     }
527     if (PyErr_Occurred()) return -1;
528   }
529 
530   if (PyErr_Occurred()) return -1;
531   return 0;
532 }
533 
PyUpb_Message_Init(PyObject * _self,PyObject * args,PyObject * kwargs)534 static int PyUpb_Message_Init(PyObject* _self, PyObject* args,
535                               PyObject* kwargs) {
536   if (args != NULL && PyTuple_Size(args) != 0) {
537     PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
538     return -1;
539   }
540 
541   return PyUpb_Message_InitAttributes(_self, args, kwargs);
542 }
543 
PyUpb_Message_NewStub(PyObject * parent,const upb_FieldDef * f,PyObject * arena)544 static PyObject* PyUpb_Message_NewStub(PyObject* parent, const upb_FieldDef* f,
545                                        PyObject* arena) {
546   const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f);
547   PyObject* cls = PyUpb_Descriptor_GetClass(sub_m);
548 
549   PyUpb_Message* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
550   msg->def = (uintptr_t)f | 1;
551   msg->arena = arena;
552   msg->ptr.parent = (PyUpb_Message*)parent;
553   msg->unset_subobj_map = NULL;
554   msg->ext_dict = NULL;
555   msg->version = 0;
556 
557   Py_DECREF(cls);
558   Py_INCREF(parent);
559   Py_INCREF(arena);
560   return &msg->ob_base;
561 }
562 
PyUpb_Message_IsEmpty(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool)563 static bool PyUpb_Message_IsEmpty(const upb_Message* msg,
564                                   const upb_MessageDef* m,
565                                   const upb_DefPool* ext_pool) {
566   if (!msg) return true;
567 
568   size_t iter = kUpb_Message_Begin;
569   const upb_FieldDef* f;
570   upb_MessageValue val;
571   if (upb_Message_Next(msg, m, ext_pool, &f, &val, &iter)) return false;
572 
573   size_t len;
574   (void)upb_Message_GetUnknown(msg, &len);
575   return len == 0;
576 }
577 
PyUpb_Message_IsEqual(PyUpb_Message * m1,PyObject * _m2)578 static bool PyUpb_Message_IsEqual(PyUpb_Message* m1, PyObject* _m2) {
579   PyUpb_Message* m2 = (void*)_m2;
580   if (m1 == m2) return true;
581   if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) {
582     return false;
583   }
584   const upb_MessageDef* m1_msgdef = _PyUpb_Message_GetMsgdef(m1);
585 #ifndef NDEBUG
586   const upb_MessageDef* m2_msgdef = _PyUpb_Message_GetMsgdef(m2);
587   assert(m1_msgdef == m2_msgdef);
588 #endif
589   const upb_Message* m1_msg = PyUpb_Message_GetIfReified((PyObject*)m1);
590   const upb_Message* m2_msg = PyUpb_Message_GetIfReified(_m2);
591   const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m1_msgdef));
592 
593   const bool e1 = PyUpb_Message_IsEmpty(m1_msg, m1_msgdef, symtab);
594   const bool e2 = PyUpb_Message_IsEmpty(m2_msg, m1_msgdef, symtab);
595   if (e1 || e2) return e1 && e2;
596 
597   return upb_Message_IsEqual(m1_msg, m2_msg, m1_msgdef);
598 }
599 
PyUpb_Message_InitAsMsg(PyUpb_Message * m,upb_Arena * arena)600 static const upb_FieldDef* PyUpb_Message_InitAsMsg(PyUpb_Message* m,
601                                                    upb_Arena* arena) {
602   const upb_FieldDef* f = PyUpb_Message_GetFieldDef(m);
603   const upb_MessageDef* m2 = upb_FieldDef_MessageSubDef(f);
604   m->ptr.msg = upb_Message_New(upb_MessageDef_MiniTable(m2), arena);
605   m->def = (uintptr_t)m2;
606   PyUpb_ObjCache_Add(m->ptr.msg, &m->ob_base);
607   return f;
608 }
609 
PyUpb_Message_SetField(PyUpb_Message * parent,const upb_FieldDef * f,PyUpb_Message * child,upb_Arena * arena)610 static void PyUpb_Message_SetField(PyUpb_Message* parent, const upb_FieldDef* f,
611                                    PyUpb_Message* child, upb_Arena* arena) {
612   upb_MessageValue msgval = {.msg_val = PyUpb_Message_GetMsg(child)};
613   upb_Message_SetFieldByDef(PyUpb_Message_GetMsg(parent), f, msgval, arena);
614   PyUpb_WeakMap_Delete(parent->unset_subobj_map, f);
615   // Releases a ref previously owned by child->ptr.parent of our child.
616   Py_DECREF(child);
617 }
618 
619 /*
620  * PyUpb_Message_EnsureReified()
621  *
622  * This implements the "expando" behavior of Python protos:
623  *   foo = FooProto()
624  *
625  *   # The intermediate messages don't really exist, and won't be serialized.
626  *   x = foo.bar.bar.bar.bar.bar.baz
627  *
628  *   # Now all the intermediate objects are created.
629  *   foo.bar.bar.bar.bar.bar.baz = 5
630  *
631  * This function should be called before performing any mutation of a protobuf
632  * object.
633  *
634  * Post-condition:
635  *   PyUpb_Message_IsStub(self) is false
636  */
PyUpb_Message_EnsureReified(PyUpb_Message * self)637 void PyUpb_Message_EnsureReified(PyUpb_Message* self) {
638   if (!PyUpb_Message_IsStub(self)) return;
639   upb_Arena* arena = PyUpb_Arena_Get(self->arena);
640 
641   // This is a non-present message. We need to create a real upb_Message for
642   // this object and every parent until we reach a present message.
643   PyUpb_Message* child = self;
644   PyUpb_Message* parent = self->ptr.parent;
645   const upb_FieldDef* child_f = PyUpb_Message_InitAsMsg(child, arena);
646   Py_INCREF(child);  // To avoid a special-case in PyUpb_Message_SetField().
647 
648   do {
649     PyUpb_Message* next_parent = parent->ptr.parent;
650     const upb_FieldDef* parent_f = NULL;
651     if (PyUpb_Message_IsStub(parent)) {
652       parent_f = PyUpb_Message_InitAsMsg(parent, arena);
653     }
654     PyUpb_Message_SetField(parent, child_f, child, arena);
655     child = parent;
656     child_f = parent_f;
657     parent = next_parent;
658   } while (child_f);
659 
660   // Releases ref previously owned by child->ptr.parent of our child.
661   Py_DECREF(child);
662   self->version++;
663 }
664 
665 static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self);
666 
667 /*
668  * PyUpb_Message_Reify()
669  *
670  * The message equivalent of PyUpb_*Container_Reify(), this transitions
671  * the wrapper from the unset state (owning a reference on self->ptr.parent) to
672  * the set state (having a non-owning pointer to self->ptr.msg).
673  */
PyUpb_Message_Reify(PyUpb_Message * self,const upb_FieldDef * f,upb_Message * msg)674 static void PyUpb_Message_Reify(PyUpb_Message* self, const upb_FieldDef* f,
675                                 upb_Message* msg) {
676   assert(f == PyUpb_Message_GetFieldDef(self));
677   if (!msg) {
678     const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef((PyObject*)self);
679     const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
680     msg = upb_Message_New(layout, PyUpb_Arena_Get(self->arena));
681   }
682   PyUpb_ObjCache_Add(msg, &self->ob_base);
683   Py_DECREF(&self->ptr.parent->ob_base);
684   self->ptr.msg = msg;  // Overwrites self->ptr.parent
685   self->def = (uintptr_t)upb_FieldDef_MessageSubDef(f);
686   PyUpb_Message_SyncSubobjs(self);
687 }
688 
689 /*
690  * PyUpb_Message_SyncSubobjs()
691  *
692  * This operation must be invoked whenever the underlying upb_Message has been
693  * mutated directly in C.  This will attach any newly-present field data
694  * to previously returned stub wrapper objects.
695  *
696  * For example:
697  *   foo = FooMessage()
698  *   sub = foo.submsg  # Empty, unset sub-message
699  *
700  *   # SyncSubobjs() is required to connect our existing 'sub' wrapper to the
701  *   # newly created foo.submsg data in C.
702  *   foo.MergeFrom(FooMessage(submsg={}))
703  *
704  * This requires that all of the new sub-objects that have appeared are owned
705  * by `self`'s arena.
706  */
PyUpb_Message_SyncSubobjs(PyUpb_Message * self)707 static void PyUpb_Message_SyncSubobjs(PyUpb_Message* self) {
708   PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
709   if (!subobj_map) return;
710 
711   upb_Message* msg = PyUpb_Message_GetMsg(self);
712   intptr_t iter = PYUPB_WEAKMAP_BEGIN;
713   const void* key;
714   PyObject* obj;
715 
716   // The last ref to this message could disappear during iteration.
717   // When we call PyUpb_*Container_Reify() below, the container will drop
718   // its ref on `self`.  If that was the last ref on self, the object will be
719   // deleted, and `subobj_map` along with it.  We need it to live until we are
720   // done iterating.
721   Py_INCREF(&self->ob_base);
722 
723   while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
724     const upb_FieldDef* f = key;
725     if (upb_FieldDef_HasPresence(f) && !upb_Message_HasFieldByDef(msg, f))
726       continue;
727     upb_MessageValue msgval = upb_Message_GetFieldByDef(msg, f);
728     PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
729     if (upb_FieldDef_IsMap(f)) {
730       if (!msgval.map_val) continue;
731       PyUpb_MapContainer_Reify(obj, (upb_Map*)msgval.map_val);
732     } else if (upb_FieldDef_IsRepeated(f)) {
733       if (!msgval.array_val) continue;
734       PyUpb_RepeatedContainer_Reify(obj, (upb_Array*)msgval.array_val);
735     } else {
736       PyUpb_Message* sub = (void*)obj;
737       assert(self == sub->ptr.parent);
738       PyUpb_Message_Reify(sub, f, (upb_Message*)msgval.msg_val);
739     }
740   }
741 
742   Py_DECREF(&self->ob_base);
743 
744   // TODO(haberman): present fields need to be iterated too if they can reach
745   // a WeakMap.
746 }
747 
PyUpb_Message_ToString(PyUpb_Message * self)748 static PyObject* PyUpb_Message_ToString(PyUpb_Message* self) {
749   if (PyUpb_Message_IsStub(self)) {
750     return PyUnicode_FromStringAndSize(NULL, 0);
751   }
752   upb_Message* msg = PyUpb_Message_GetMsg(self);
753   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
754   const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
755   char buf[1024];
756   int options = UPB_TXTENC_SKIPUNKNOWN;
757   size_t size = upb_TextEncode(msg, msgdef, symtab, options, buf, sizeof(buf));
758   if (size < sizeof(buf)) {
759     return PyUnicode_FromStringAndSize(buf, size);
760   } else {
761     char* buf2 = malloc(size + 1);
762     size_t size2 = upb_TextEncode(msg, msgdef, symtab, options, buf2, size + 1);
763     assert(size == size2);
764     PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2);
765     free(buf2);
766     return ret;
767   }
768 }
769 
PyUpb_Message_RichCompare(PyObject * _self,PyObject * other,int opid)770 static PyObject* PyUpb_Message_RichCompare(PyObject* _self, PyObject* other,
771                                            int opid) {
772   PyUpb_Message* self = (void*)_self;
773   if (opid != Py_EQ && opid != Py_NE) {
774     Py_INCREF(Py_NotImplemented);
775     return Py_NotImplemented;
776   }
777   bool ret = PyUpb_Message_IsEqual(self, other);
778   if (opid == Py_NE) ret = !ret;
779   return PyBool_FromLong(ret);
780 }
781 
PyUpb_Message_CacheDelete(PyObject * _self,const upb_FieldDef * f)782 void PyUpb_Message_CacheDelete(PyObject* _self, const upb_FieldDef* f) {
783   PyUpb_Message* self = (void*)_self;
784   PyUpb_WeakMap_Delete(self->unset_subobj_map, f);
785 }
786 
PyUpb_Message_SetConcreteSubobj(PyObject * _self,const upb_FieldDef * f,upb_MessageValue subobj)787 void PyUpb_Message_SetConcreteSubobj(PyObject* _self, const upb_FieldDef* f,
788                                      upb_MessageValue subobj) {
789   PyUpb_Message* self = (void*)_self;
790   PyUpb_Message_EnsureReified(self);
791   PyUpb_Message_CacheDelete(_self, f);
792   upb_Message_SetFieldByDef(self->ptr.msg, f, subobj,
793                             PyUpb_Arena_Get(self->arena));
794 }
795 
PyUpb_Message_Dealloc(PyObject * _self)796 static void PyUpb_Message_Dealloc(PyObject* _self) {
797   PyUpb_Message* self = (void*)_self;
798 
799   if (PyUpb_Message_IsStub(self)) {
800     PyUpb_Message_CacheDelete((PyObject*)self->ptr.parent,
801                               PyUpb_Message_GetFieldDef(self));
802     Py_DECREF(self->ptr.parent);
803   } else {
804     PyUpb_ObjCache_Delete(self->ptr.msg);
805   }
806 
807   if (self->unset_subobj_map) {
808     PyUpb_WeakMap_Free(self->unset_subobj_map);
809   }
810 
811   Py_DECREF(self->arena);
812 
813   // We do not use PyUpb_Dealloc() here because Message is a base type and for
814   // base types there is a bug we have to work around in this case (see below).
815   PyTypeObject* tp = Py_TYPE(self);
816   freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
817   tp_free(self);
818 
819   if (cpython_bits.python_version_hex >= 0x03080000) {
820     // Prior to Python 3.8 there is a bug where deallocating the type here would
821     // lead to a double-decref: https://bugs.python.org/issue37879
822     Py_DECREF(tp);
823   }
824 }
825 
PyUpb_Message_Get(upb_Message * u_msg,const upb_MessageDef * m,PyObject * arena)826 PyObject* PyUpb_Message_Get(upb_Message* u_msg, const upb_MessageDef* m,
827                             PyObject* arena) {
828   PyObject* ret = PyUpb_ObjCache_Get(u_msg);
829   if (ret) return ret;
830 
831   PyObject* cls = PyUpb_Descriptor_GetClass(m);
832   // It is not safe to use PyObject_{,GC}_New() due to:
833   //    https://bugs.python.org/issue35810
834   PyUpb_Message* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0);
835   py_msg->arena = arena;
836   py_msg->def = (uintptr_t)m;
837   py_msg->ptr.msg = u_msg;
838   py_msg->unset_subobj_map = NULL;
839   py_msg->ext_dict = NULL;
840   py_msg->version = 0;
841   ret = &py_msg->ob_base;
842   Py_DECREF(cls);
843   Py_INCREF(arena);
844   PyUpb_ObjCache_Add(u_msg, ret);
845   return ret;
846 }
847 
848 /* PyUpb_Message_GetStub()
849  *
850  * Non-present messages return "stub" objects that point to their parent, but
851  * will materialize into real upb objects if they are mutated.
852  *
853  * Note: we do *not* create stubs for repeated/map fields unless the parent
854  * is a stub:
855  *
856  *    msg = TestMessage()
857  *    msg.submessage                # (A) Creates a stub
858  *    msg.repeated_foo              # (B) Does *not* create a stub
859  *    msg.submessage.repeated_bar   # (C) Creates a stub
860  *
861  * In case (B) we have some freedom: we could either create a stub, or create
862  * a reified object with underlying data.  It appears that either could work
863  * equally well, with no observable change to users.  There isn't a clear
864  * advantage to either choice.  We choose to follow the behavior of the
865  * pre-existing C++ behavior for consistency, but if it becomes apparent that
866  * there would be some benefit to reversing this decision, it should be totally
867  * within the realm of possibility.
868  */
PyUpb_Message_GetStub(PyUpb_Message * self,const upb_FieldDef * field)869 PyObject* PyUpb_Message_GetStub(PyUpb_Message* self,
870                                 const upb_FieldDef* field) {
871   PyObject* _self = (void*)self;
872   if (!self->unset_subobj_map) {
873     self->unset_subobj_map = PyUpb_WeakMap_New();
874   }
875   PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field);
876 
877   if (subobj) return subobj;
878 
879   if (upb_FieldDef_IsMap(field)) {
880     subobj = PyUpb_MapContainer_NewStub(_self, field, self->arena);
881   } else if (upb_FieldDef_IsRepeated(field)) {
882     subobj = PyUpb_RepeatedContainer_NewStub(_self, field, self->arena);
883   } else {
884     subobj = PyUpb_Message_NewStub(&self->ob_base, field, self->arena);
885   }
886   PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj);
887 
888   assert(!PyErr_Occurred());
889   return subobj;
890 }
891 
PyUpb_Message_GetPresentWrapper(PyUpb_Message * self,const upb_FieldDef * field)892 PyObject* PyUpb_Message_GetPresentWrapper(PyUpb_Message* self,
893                                           const upb_FieldDef* field) {
894   assert(!PyUpb_Message_IsStub(self));
895   upb_MutableMessageValue mutval =
896       upb_Message_Mutable(self->ptr.msg, field, PyUpb_Arena_Get(self->arena));
897   if (upb_FieldDef_IsMap(field)) {
898     return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field,
899                                                  self->arena);
900   } else {
901     return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, field,
902                                                       self->arena);
903   }
904 }
905 
PyUpb_Message_GetScalarValue(PyUpb_Message * self,const upb_FieldDef * field)906 PyObject* PyUpb_Message_GetScalarValue(PyUpb_Message* self,
907                                        const upb_FieldDef* field) {
908   upb_MessageValue val;
909   if (PyUpb_Message_IsStub(self)) {
910     // Unset message always returns default values.
911     val = upb_FieldDef_Default(field);
912   } else {
913     val = upb_Message_GetFieldByDef(self->ptr.msg, field);
914   }
915   return PyUpb_UpbToPy(val, field, self->arena);
916 }
917 
918 /*
919  * PyUpb_Message_GetFieldValue()
920  *
921  * Implements the equivalent of getattr(msg, field), once `field` has
922  * already been resolved to a `upb_FieldDef*`.
923  *
924  * This may involve constructing a wrapper object for the given field, or
925  * returning one that was previously constructed.  If the field is not actually
926  * set, the wrapper object will be an "unset" object that is not actually
927  * connected to any C data.
928  */
PyUpb_Message_GetFieldValue(PyObject * _self,const upb_FieldDef * field)929 PyObject* PyUpb_Message_GetFieldValue(PyObject* _self,
930                                       const upb_FieldDef* field) {
931   PyUpb_Message* self = (void*)_self;
932   assert(upb_FieldDef_ContainingType(field) == PyUpb_Message_GetMsgdef(_self));
933   bool submsg = upb_FieldDef_IsSubMessage(field);
934   bool seq = upb_FieldDef_IsRepeated(field);
935 
936   if ((PyUpb_Message_IsStub(self) && (submsg || seq)) ||
937       (submsg && !seq && !upb_Message_HasFieldByDef(self->ptr.msg, field))) {
938     return PyUpb_Message_GetStub(self, field);
939   } else if (seq) {
940     return PyUpb_Message_GetPresentWrapper(self, field);
941   } else {
942     return PyUpb_Message_GetScalarValue(self, field);
943   }
944 }
945 
PyUpb_Message_SetFieldValue(PyObject * _self,const upb_FieldDef * field,PyObject * value,PyObject * exc)946 int PyUpb_Message_SetFieldValue(PyObject* _self, const upb_FieldDef* field,
947                                 PyObject* value, PyObject* exc) {
948   PyUpb_Message* self = (void*)_self;
949   assert(value);
950 
951   if (upb_FieldDef_IsSubMessage(field) || upb_FieldDef_IsRepeated(field)) {
952     PyErr_Format(exc,
953                  "Assignment not allowed to message, map, or repeated "
954                  "field \"%s\" in protocol message object.",
955                  upb_FieldDef_Name(field));
956     return -1;
957   }
958 
959   PyUpb_Message_EnsureReified(self);
960 
961   upb_MessageValue val;
962   upb_Arena* arena = PyUpb_Arena_Get(self->arena);
963   if (!PyUpb_PyToUpb(value, field, &val, arena)) {
964     return -1;
965   }
966 
967   upb_Message_SetFieldByDef(self->ptr.msg, field, val, arena);
968   return 0;
969 }
970 
PyUpb_Message_GetVersion(PyObject * _self)971 int PyUpb_Message_GetVersion(PyObject* _self) {
972   PyUpb_Message* self = (void*)_self;
973   return self->version;
974 }
975 
976 /*
977  * PyUpb_Message_GetAttr()
978  *
979  * Implements:
980  *   foo = msg.foo
981  *
982  * Attribute lookup must find both message fields and base class methods like
983  * msg.SerializeToString().
984  */
PyUpb_Message_GetAttr(PyObject * _self,PyObject * attr)985 __attribute__((flatten)) static PyObject* PyUpb_Message_GetAttr(
986     PyObject* _self, PyObject* attr) {
987   PyUpb_Message* self = (void*)_self;
988 
989   // Lookup field by name.
990   const upb_FieldDef* field;
991   if (PyUpb_Message_LookupName(self, attr, &field, NULL, NULL)) {
992     return PyUpb_Message_GetFieldValue(_self, field);
993   }
994 
995   // Check base class attributes.
996   assert(!PyErr_Occurred());
997   PyObject* ret = PyObject_GenericGetAttr(_self, attr);
998   if (ret) return ret;
999 
1000   // Swallow AttributeError if it occurred and try again on the metaclass
1001   // to pick up class attributes.  But we have to special-case "Extensions"
1002   // which affirmatively returns AttributeError when a message is not
1003   // extendable.
1004   const char* name;
1005   if (PyErr_ExceptionMatches(PyExc_AttributeError) &&
1006       (name = PyUpb_GetStrData(attr)) && strcmp(name, "Extensions") != 0) {
1007     PyErr_Clear();
1008     return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr);
1009   }
1010 
1011   return NULL;
1012 }
1013 
1014 /*
1015  * PyUpb_Message_SetAttr()
1016  *
1017  * Implements:
1018  *   msg.foo = foo
1019  */
PyUpb_Message_SetAttr(PyObject * _self,PyObject * attr,PyObject * value)1020 static int PyUpb_Message_SetAttr(PyObject* _self, PyObject* attr,
1021                                  PyObject* value) {
1022   PyUpb_Message* self = (void*)_self;
1023   const upb_FieldDef* field;
1024   if (!PyUpb_Message_LookupName(self, attr, &field, NULL,
1025                                 PyExc_AttributeError)) {
1026     return -1;
1027   }
1028 
1029   return PyUpb_Message_SetFieldValue(_self, field, value, PyExc_AttributeError);
1030 }
1031 
PyUpb_Message_HasField(PyObject * _self,PyObject * arg)1032 static PyObject* PyUpb_Message_HasField(PyObject* _self, PyObject* arg) {
1033   PyUpb_Message* self = (void*)_self;
1034   const upb_FieldDef* field;
1035   const upb_OneofDef* oneof;
1036 
1037   if (!PyUpb_Message_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) {
1038     return NULL;
1039   }
1040 
1041   if (field && !upb_FieldDef_HasPresence(field)) {
1042     PyErr_Format(PyExc_ValueError, "Field %s does not have presence.",
1043                  upb_FieldDef_FullName(field));
1044     return NULL;
1045   }
1046 
1047   if (PyUpb_Message_IsStub(self)) Py_RETURN_FALSE;
1048 
1049   return PyBool_FromLong(field ? upb_Message_HasFieldByDef(self->ptr.msg, field)
1050                                : upb_Message_WhichOneof(self->ptr.msg, oneof) !=
1051                                      NULL);
1052 }
1053 
1054 static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1055                                                         PyObject* arg);
1056 
PyUpb_Message_IsInitializedAppendErrors(PyObject * _self,PyObject * errors)1057 static PyObject* PyUpb_Message_IsInitializedAppendErrors(PyObject* _self,
1058                                                          PyObject* errors) {
1059   PyObject* list = PyUpb_Message_FindInitializationErrors(_self, NULL);
1060   if (!list) return NULL;
1061   bool ok = PyList_Size(list) == 0;
1062   PyObject* ret = NULL;
1063   PyObject* extend_result = NULL;
1064   if (!ok) {
1065     extend_result = PyObject_CallMethod(errors, "extend", "O", list);
1066     if (!extend_result) goto done;
1067   }
1068   ret = PyBool_FromLong(ok);
1069 
1070 done:
1071   Py_XDECREF(list);
1072   Py_XDECREF(extend_result);
1073   return ret;
1074 }
1075 
PyUpb_Message_IsInitialized(PyObject * _self,PyObject * args)1076 static PyObject* PyUpb_Message_IsInitialized(PyObject* _self, PyObject* args) {
1077   PyObject* errors = NULL;
1078   if (!PyArg_ParseTuple(args, "|O", &errors)) {
1079     return NULL;
1080   }
1081   if (errors) {
1082     // We need to collect a list of unset required fields and append it to
1083     // `errors`.
1084     return PyUpb_Message_IsInitializedAppendErrors(_self, errors);
1085   } else {
1086     // We just need to return a boolean "true" or "false" for whether all
1087     // required fields are set.
1088     upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1089     const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1090     const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1091     bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL);
1092     return PyBool_FromLong(initialized);
1093   }
1094 }
1095 
PyUpb_Message_ListFieldsItemKey(PyObject * self,PyObject * val)1096 static PyObject* PyUpb_Message_ListFieldsItemKey(PyObject* self,
1097                                                  PyObject* val) {
1098   assert(PyTuple_Check(val));
1099   PyObject* field = PyTuple_GetItem(val, 0);
1100   const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(field);
1101   return PyLong_FromLong(upb_FieldDef_Number(f));
1102 }
1103 
PyUpb_Message_CheckCalledFromGeneratedFile(PyObject * unused,PyObject * unused_arg)1104 static PyObject* PyUpb_Message_CheckCalledFromGeneratedFile(
1105     PyObject* unused, PyObject* unused_arg) {
1106   PyErr_SetString(
1107       PyExc_TypeError,
1108       "Descriptors cannot not be created directly.\n"
1109       "If this call came from a _pb2.py file, your generated code is out of "
1110       "date and must be regenerated with protoc >= 3.19.0.\n"
1111       "If you cannot immediately regenerate your protos, some other possible "
1112       "workarounds are:\n"
1113       " 1. Downgrade the protobuf package to 3.20.x or lower.\n"
1114       " 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will "
1115       "use pure-Python parsing and will be much slower).\n"
1116       "\n"
1117       "More information: "
1118       "https://developers.google.com/protocol-buffers/docs/news/"
1119       "2022-05-06#python-updates");
1120   return NULL;
1121 }
1122 
PyUpb_Message_SortFieldList(PyObject * list)1123 static bool PyUpb_Message_SortFieldList(PyObject* list) {
1124   PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1125   bool ok = false;
1126   PyObject* args = PyTuple_New(0);
1127   PyObject* kwargs = PyDict_New();
1128   PyObject* method = PyObject_GetAttrString(list, "sort");
1129   PyObject* call_result = NULL;
1130   if (!args || !kwargs || !method) goto err;
1131   if (PyDict_SetItemString(kwargs, "key", state->listfields_item_key) < 0) {
1132     goto err;
1133   }
1134   call_result = PyObject_Call(method, args, kwargs);
1135   if (!call_result) goto err;
1136   ok = true;
1137 
1138 err:
1139   Py_XDECREF(method);
1140   Py_XDECREF(args);
1141   Py_XDECREF(kwargs);
1142   Py_XDECREF(call_result);
1143   return ok;
1144 }
1145 
PyUpb_Message_ListFields(PyObject * _self,PyObject * arg)1146 static PyObject* PyUpb_Message_ListFields(PyObject* _self, PyObject* arg) {
1147   PyObject* list = PyList_New(0);
1148   upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1149   if (!msg) return list;
1150 
1151   size_t iter1 = kUpb_Message_Begin;
1152   const upb_MessageDef* m = PyUpb_Message_GetMsgdef(_self);
1153   const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
1154   const upb_FieldDef* f;
1155   PyObject* field_desc = NULL;
1156   PyObject* py_val = NULL;
1157   PyObject* tuple = NULL;
1158   upb_MessageValue val;
1159   uint32_t last_field = 0;
1160   bool in_order = true;
1161   while (upb_Message_Next(msg, m, symtab, &f, &val, &iter1)) {
1162     const uint32_t field_number = upb_FieldDef_Number(f);
1163     if (field_number < last_field) in_order = false;
1164     last_field = field_number;
1165     PyObject* field_desc = PyUpb_FieldDescriptor_Get(f);
1166     PyObject* py_val = PyUpb_Message_GetFieldValue(_self, f);
1167     if (!field_desc || !py_val) goto err;
1168     PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val);
1169     field_desc = NULL;
1170     py_val = NULL;
1171     if (!tuple) goto err;
1172     if (PyList_Append(list, tuple)) goto err;
1173     Py_DECREF(tuple);
1174     tuple = NULL;
1175   }
1176 
1177   // Users rely on fields being returned in field number order.
1178   if (!in_order && !PyUpb_Message_SortFieldList(list)) goto err;
1179 
1180   return list;
1181 
1182 err:
1183   Py_XDECREF(field_desc);
1184   Py_XDECREF(py_val);
1185   Py_XDECREF(tuple);
1186   Py_DECREF(list);
1187   return NULL;
1188 }
1189 
PyUpb_Message_MergeInternal(PyObject * self,PyObject * arg,bool check_required)1190 static PyObject* PyUpb_Message_MergeInternal(PyObject* self, PyObject* arg,
1191                                              bool check_required) {
1192   if (self->ob_type != arg->ob_type) {
1193     PyErr_Format(PyExc_TypeError,
1194                  "Parameter to MergeFrom() must be instance of same class: "
1195                  "expected %S got %S.",
1196                  Py_TYPE(self), Py_TYPE(arg));
1197     return NULL;
1198   }
1199   // OPT: exit if src is empty.
1200   PyObject* subargs = PyTuple_New(0);
1201   PyObject* serialized =
1202       check_required
1203           ? PyUpb_Message_SerializeToString(arg, subargs, NULL)
1204           : PyUpb_Message_SerializePartialToString(arg, subargs, NULL);
1205   Py_DECREF(subargs);
1206   if (!serialized) return NULL;
1207   PyObject* ret = PyUpb_Message_MergeFromString(self, serialized);
1208   Py_DECREF(serialized);
1209   Py_DECREF(ret);
1210   Py_RETURN_NONE;
1211 }
1212 
PyUpb_Message_MergeFrom(PyObject * self,PyObject * arg)1213 PyObject* PyUpb_Message_MergeFrom(PyObject* self, PyObject* arg) {
1214   return PyUpb_Message_MergeInternal(self, arg, true);
1215 }
1216 
PyUpb_Message_MergePartialFrom(PyObject * self,PyObject * arg)1217 static PyObject* PyUpb_Message_MergePartialFrom(PyObject* self, PyObject* arg) {
1218   return PyUpb_Message_MergeInternal(self, arg, false);
1219 }
1220 
PyUpb_Message_SetInParent(PyObject * _self,PyObject * arg)1221 static PyObject* PyUpb_Message_SetInParent(PyObject* _self, PyObject* arg) {
1222   PyUpb_Message* self = (void*)_self;
1223   PyUpb_Message_EnsureReified(self);
1224   Py_RETURN_NONE;
1225 }
1226 
PyUpb_Message_UnknownFields(PyObject * _self,PyObject * arg)1227 static PyObject* PyUpb_Message_UnknownFields(PyObject* _self, PyObject* arg) {
1228   // TODO(haberman): re-enable when unknown fields are added.
1229   // return PyUpb_UnknownFields_New(_self);
1230   PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor");
1231   return NULL;
1232 }
1233 
PyUpb_Message_MergeFromString(PyObject * _self,PyObject * arg)1234 PyObject* PyUpb_Message_MergeFromString(PyObject* _self, PyObject* arg) {
1235   PyUpb_Message* self = (void*)_self;
1236   char* buf;
1237   Py_ssize_t size;
1238   PyObject* bytes = NULL;
1239 
1240   if (PyMemoryView_Check(arg)) {
1241     bytes = PyBytes_FromObject(arg);
1242     // Cannot fail when passed something of the correct type.
1243     int err = PyBytes_AsStringAndSize(bytes, &buf, &size);
1244     (void)err;
1245     assert(err >= 0);
1246   } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) {
1247     return NULL;
1248   }
1249 
1250   PyUpb_Message_EnsureReified(self);
1251   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1252   const upb_FileDef* file = upb_MessageDef_File(msgdef);
1253   const upb_ExtensionRegistry* extreg =
1254       upb_DefPool_ExtensionRegistry(upb_FileDef_Pool(file));
1255   const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1256   upb_Arena* arena = PyUpb_Arena_Get(self->arena);
1257   PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1258   int options = upb_DecodeOptions_MaxDepth(
1259       state->allow_oversize_protos ? UINT16_MAX
1260                                    : kUpb_WireFormat_DefaultDepthLimit);
1261   upb_DecodeStatus status =
1262       upb_Decode(buf, size, self->ptr.msg, layout, extreg, options, arena);
1263   Py_XDECREF(bytes);
1264   if (status != kUpb_DecodeStatus_Ok) {
1265     PyErr_Format(state->decode_error_class, "Error parsing message");
1266     return NULL;
1267   }
1268   PyUpb_Message_SyncSubobjs(self);
1269   return PyLong_FromSsize_t(size);
1270 }
1271 
1272 static PyObject* PyUpb_Message_Clear(PyUpb_Message* self, PyObject* args);
1273 
PyUpb_Message_ParseFromString(PyObject * self,PyObject * arg)1274 static PyObject* PyUpb_Message_ParseFromString(PyObject* self, PyObject* arg) {
1275   PyObject* tmp = PyUpb_Message_Clear((PyUpb_Message*)self, NULL);
1276   Py_DECREF(tmp);
1277   return PyUpb_Message_MergeFromString(self, arg);
1278 }
1279 
PyUpb_Message_ByteSize(PyObject * self,PyObject * args)1280 static PyObject* PyUpb_Message_ByteSize(PyObject* self, PyObject* args) {
1281   // TODO(https://github.com/protocolbuffers/upb/issues/462): At the moment upb
1282   // does not have a "byte size" function, so we just serialize to string and
1283   // get the size of the string.
1284   PyObject* subargs = PyTuple_New(0);
1285   PyObject* serialized = PyUpb_Message_SerializeToString(self, subargs, NULL);
1286   Py_DECREF(subargs);
1287   if (!serialized) return NULL;
1288   size_t size = PyBytes_Size(serialized);
1289   Py_DECREF(serialized);
1290   return PyLong_FromSize_t(size);
1291 }
1292 
PyUpb_Message_Clear(PyUpb_Message * self,PyObject * args)1293 static PyObject* PyUpb_Message_Clear(PyUpb_Message* self, PyObject* args) {
1294   PyUpb_Message_EnsureReified(self);
1295   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1296   PyUpb_WeakMap* subobj_map = self->unset_subobj_map;
1297 
1298   if (subobj_map) {
1299     upb_Message* msg = PyUpb_Message_GetMsg(self);
1300     (void)msg;  // Suppress unused warning when asserts are disabled.
1301     intptr_t iter = PYUPB_WEAKMAP_BEGIN;
1302     const void* key;
1303     PyObject* obj;
1304 
1305     while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) {
1306       const upb_FieldDef* f = key;
1307       PyUpb_WeakMap_DeleteIter(subobj_map, &iter);
1308       if (upb_FieldDef_IsMap(f)) {
1309         assert(upb_Message_GetFieldByDef(msg, f).map_val == NULL);
1310         PyUpb_MapContainer_Reify(obj, NULL);
1311       } else if (upb_FieldDef_IsRepeated(f)) {
1312         assert(upb_Message_GetFieldByDef(msg, f).array_val == NULL);
1313         PyUpb_RepeatedContainer_Reify(obj, NULL);
1314       } else {
1315         assert(!upb_Message_HasFieldByDef(msg, f));
1316         PyUpb_Message* sub = (void*)obj;
1317         assert(self == sub->ptr.parent);
1318         PyUpb_Message_Reify(sub, f, NULL);
1319       }
1320     }
1321   }
1322 
1323   upb_Message_ClearByDef(self->ptr.msg, msgdef);
1324   Py_RETURN_NONE;
1325 }
1326 
PyUpb_Message_DoClearField(PyObject * _self,const upb_FieldDef * f)1327 void PyUpb_Message_DoClearField(PyObject* _self, const upb_FieldDef* f) {
1328   PyUpb_Message* self = (void*)_self;
1329   PyUpb_Message_EnsureReified((PyUpb_Message*)self);
1330 
1331   // We must ensure that any stub object is reified so its parent no longer
1332   // points to us.
1333   PyObject* sub = self->unset_subobj_map
1334                       ? PyUpb_WeakMap_Get(self->unset_subobj_map, f)
1335                       : NULL;
1336 
1337   if (upb_FieldDef_IsMap(f)) {
1338     // For maps we additionally have to invalidate any iterators.  So we need
1339     // to get an object even if it's reified.
1340     if (!sub) {
1341       sub = PyUpb_Message_GetFieldValue(_self, f);
1342     }
1343     PyUpb_MapContainer_EnsureReified(sub);
1344     PyUpb_MapContainer_Invalidate(sub);
1345   } else if (upb_FieldDef_IsRepeated(f)) {
1346     if (sub) {
1347       PyUpb_RepeatedContainer_EnsureReified(sub);
1348     }
1349   } else if (upb_FieldDef_IsSubMessage(f)) {
1350     if (sub) {
1351       PyUpb_Message_EnsureReified((PyUpb_Message*)sub);
1352     }
1353   }
1354 
1355   Py_XDECREF(sub);
1356   upb_Message_ClearFieldByDef(self->ptr.msg, f);
1357 }
1358 
PyUpb_Message_ClearExtension(PyObject * _self,PyObject * arg)1359 static PyObject* PyUpb_Message_ClearExtension(PyObject* _self, PyObject* arg) {
1360   PyUpb_Message* self = (void*)_self;
1361   PyUpb_Message_EnsureReified(self);
1362   const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, arg);
1363   if (!f) return NULL;
1364   PyUpb_Message_DoClearField(_self, f);
1365   Py_RETURN_NONE;
1366 }
1367 
PyUpb_Message_ClearField(PyObject * _self,PyObject * arg)1368 static PyObject* PyUpb_Message_ClearField(PyObject* _self, PyObject* arg) {
1369   PyUpb_Message* self = (void*)_self;
1370 
1371   // We always need EnsureReified() here (even for an unset message) to
1372   // preserve behavior like:
1373   //   msg = FooMessage()
1374   //   msg.foo.Clear()
1375   //   assert msg.HasField("foo")
1376   PyUpb_Message_EnsureReified(self);
1377 
1378   const upb_FieldDef* f;
1379   const upb_OneofDef* o;
1380   if (!PyUpb_Message_LookupName(self, arg, &f, &o, PyExc_ValueError)) {
1381     return NULL;
1382   }
1383 
1384   if (o) f = upb_Message_WhichOneof(self->ptr.msg, o);
1385   if (f) PyUpb_Message_DoClearField(_self, f);
1386   Py_RETURN_NONE;
1387 }
1388 
PyUpb_Message_DiscardUnknownFields(PyUpb_Message * self,PyObject * arg)1389 static PyObject* PyUpb_Message_DiscardUnknownFields(PyUpb_Message* self,
1390                                                     PyObject* arg) {
1391   PyUpb_Message_EnsureReified(self);
1392   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1393   upb_Message_DiscardUnknown(self->ptr.msg, msgdef, 64);
1394   Py_RETURN_NONE;
1395 }
1396 
PyUpb_Message_FindInitializationErrors(PyObject * _self,PyObject * arg)1397 static PyObject* PyUpb_Message_FindInitializationErrors(PyObject* _self,
1398                                                         PyObject* arg) {
1399   PyUpb_Message* self = (void*)_self;
1400   upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1401   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1402   const upb_DefPool* ext_pool = upb_FileDef_Pool(upb_MessageDef_File(msgdef));
1403   upb_FieldPathEntry* fields;
1404   PyObject* ret = PyList_New(0);
1405   if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields)) {
1406     char* buf = NULL;
1407     size_t size = 0;
1408     assert(fields->field);
1409     while (fields->field) {
1410       upb_FieldPathEntry* field = fields;
1411       size_t need = upb_FieldPath_ToText(&fields, buf, size);
1412       if (need >= size) {
1413         fields = field;
1414         size = size ? size * 2 : 16;
1415         while (size <= need) size *= 2;
1416         buf = realloc(buf, size);
1417         need = upb_FieldPath_ToText(&fields, buf, size);
1418         assert(size > need);
1419       }
1420       PyObject* str = PyUnicode_FromString(buf);
1421       PyList_Append(ret, str);
1422       Py_DECREF(str);
1423     }
1424     free(buf);
1425   }
1426   return ret;
1427 }
1428 
PyUpb_Message_FromString(PyObject * cls,PyObject * serialized)1429 static PyObject* PyUpb_Message_FromString(PyObject* cls, PyObject* serialized) {
1430   PyObject* ret = NULL;
1431   PyObject* length = NULL;
1432 
1433   ret = PyObject_CallObject(cls, NULL);
1434   if (ret == NULL) goto err;
1435   length = PyUpb_Message_MergeFromString(ret, serialized);
1436   if (length == NULL) goto err;
1437 
1438 done:
1439   Py_XDECREF(length);
1440   return ret;
1441 
1442 err:
1443   Py_XDECREF(ret);
1444   ret = NULL;
1445   goto done;
1446 }
1447 
PyUpb_Message_GetExtensionDef(PyObject * _self,PyObject * key)1448 const upb_FieldDef* PyUpb_Message_GetExtensionDef(PyObject* _self,
1449                                                   PyObject* key) {
1450   const upb_FieldDef* f = PyUpb_FieldDescriptor_GetDef(key);
1451   if (!f) {
1452     PyErr_Clear();
1453     PyErr_Format(PyExc_KeyError, "Object %R is not a field descriptor\n", key);
1454     return NULL;
1455   }
1456   if (!upb_FieldDef_IsExtension(f)) {
1457     PyErr_Format(PyExc_KeyError, "Field %s is not an extension\n",
1458                  upb_FieldDef_FullName(f));
1459     return NULL;
1460   }
1461   const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(_self);
1462   if (upb_FieldDef_ContainingType(f) != msgdef) {
1463     PyErr_Format(PyExc_KeyError, "Extension doesn't match (%s vs %s)",
1464                  upb_MessageDef_FullName(msgdef), upb_FieldDef_FullName(f));
1465     return NULL;
1466   }
1467   return f;
1468 }
1469 
PyUpb_Message_HasExtension(PyObject * _self,PyObject * ext_desc)1470 static PyObject* PyUpb_Message_HasExtension(PyObject* _self,
1471                                             PyObject* ext_desc) {
1472   upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1473   const upb_FieldDef* f = PyUpb_Message_GetExtensionDef(_self, ext_desc);
1474   if (!f) return NULL;
1475   if (upb_FieldDef_IsRepeated(f)) {
1476     PyErr_SetString(PyExc_KeyError,
1477                     "Field is repeated. A singular method is required.");
1478     return NULL;
1479   }
1480   if (!msg) Py_RETURN_FALSE;
1481   return PyBool_FromLong(upb_Message_HasFieldByDef(msg, f));
1482 }
1483 
PyUpb_Message_ReportInitializationErrors(const upb_MessageDef * msgdef,PyObject * errors,PyObject * exc)1484 void PyUpb_Message_ReportInitializationErrors(const upb_MessageDef* msgdef,
1485                                               PyObject* errors, PyObject* exc) {
1486   PyObject* comma = PyUnicode_FromString(",");
1487   PyObject* missing_fields = NULL;
1488   if (!comma) goto done;
1489   missing_fields = PyUnicode_Join(comma, errors);
1490   if (!missing_fields) goto done;
1491   PyErr_Format(exc, "Message %s is missing required fields: %U",
1492                upb_MessageDef_FullName(msgdef), missing_fields);
1493 done:
1494   Py_XDECREF(comma);
1495   Py_XDECREF(missing_fields);
1496   Py_DECREF(errors);
1497 }
1498 
PyUpb_Message_SerializeInternal(PyObject * _self,PyObject * args,PyObject * kwargs,bool check_required)1499 PyObject* PyUpb_Message_SerializeInternal(PyObject* _self, PyObject* args,
1500                                           PyObject* kwargs,
1501                                           bool check_required) {
1502   PyUpb_Message* self = (void*)_self;
1503   if (!PyUpb_Message_Verify((PyObject*)self)) return NULL;
1504   static const char* kwlist[] = {"deterministic", NULL};
1505   int deterministic = 0;
1506   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist),
1507                                    &deterministic)) {
1508     return NULL;
1509   }
1510 
1511   const upb_MessageDef* msgdef = _PyUpb_Message_GetMsgdef(self);
1512   if (PyUpb_Message_IsStub(self)) {
1513     // Nothing to serialize, but we do have to check whether the message is
1514     // initialized.
1515     PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1516     PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1517     if (!errors) return NULL;
1518     if (PyList_Size(errors) == 0) {
1519       Py_DECREF(errors);
1520       return PyBytes_FromStringAndSize(NULL, 0);
1521     }
1522     PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1523                                              state->encode_error_class);
1524     return NULL;
1525   }
1526 
1527   upb_Arena* arena = upb_Arena_New();
1528   const upb_MiniTable* layout = upb_MessageDef_MiniTable(msgdef);
1529   size_t size = 0;
1530   // Python does not currently have any effective limit on serialization depth.
1531   int options = upb_EncodeOptions_MaxDepth(UINT16_MAX);
1532   if (check_required) options |= kUpb_EncodeOption_CheckRequired;
1533   if (deterministic) options |= kUpb_EncodeOption_Deterministic;
1534   char* pb;
1535   upb_EncodeStatus status =
1536       upb_Encode(self->ptr.msg, layout, options, arena, &pb, &size);
1537   PyObject* ret = NULL;
1538 
1539   if (status != kUpb_EncodeStatus_Ok) {
1540     PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1541     PyObject* errors = PyUpb_Message_FindInitializationErrors(_self, NULL);
1542     if (PyList_Size(errors) != 0) {
1543       PyUpb_Message_ReportInitializationErrors(msgdef, errors,
1544                                                state->encode_error_class);
1545     } else {
1546       PyErr_Format(state->encode_error_class, "Failed to serialize proto");
1547     }
1548     goto done;
1549   }
1550 
1551   ret = PyBytes_FromStringAndSize(pb, size);
1552 
1553 done:
1554   upb_Arena_Free(arena);
1555   return ret;
1556 }
1557 
PyUpb_Message_SerializeToString(PyObject * _self,PyObject * args,PyObject * kwargs)1558 PyObject* PyUpb_Message_SerializeToString(PyObject* _self, PyObject* args,
1559                                           PyObject* kwargs) {
1560   return PyUpb_Message_SerializeInternal(_self, args, kwargs, true);
1561 }
1562 
PyUpb_Message_SerializePartialToString(PyObject * _self,PyObject * args,PyObject * kwargs)1563 PyObject* PyUpb_Message_SerializePartialToString(PyObject* _self,
1564                                                  PyObject* args,
1565                                                  PyObject* kwargs) {
1566   return PyUpb_Message_SerializeInternal(_self, args, kwargs, false);
1567 }
1568 
PyUpb_Message_WhichOneof(PyObject * _self,PyObject * name)1569 static PyObject* PyUpb_Message_WhichOneof(PyObject* _self, PyObject* name) {
1570   PyUpb_Message* self = (void*)_self;
1571   const upb_OneofDef* o;
1572   if (!PyUpb_Message_LookupName(self, name, NULL, &o, PyExc_ValueError)) {
1573     return NULL;
1574   }
1575   upb_Message* msg = PyUpb_Message_GetIfReified(_self);
1576   if (!msg) Py_RETURN_NONE;
1577   const upb_FieldDef* f = upb_Message_WhichOneof(msg, o);
1578   if (!f) Py_RETURN_NONE;
1579   return PyUnicode_FromString(upb_FieldDef_Name(f));
1580 }
1581 
PyUpb_Message_ClearExtensionDict(PyObject * _self)1582 void PyUpb_Message_ClearExtensionDict(PyObject* _self) {
1583   PyUpb_Message* self = (void*)_self;
1584   assert(self->ext_dict);
1585   self->ext_dict = NULL;
1586 }
1587 
PyUpb_Message_GetExtensionDict(PyObject * _self,void * closure)1588 static PyObject* PyUpb_Message_GetExtensionDict(PyObject* _self,
1589                                                 void* closure) {
1590   PyUpb_Message* self = (void*)_self;
1591   if (self->ext_dict) {
1592     Py_INCREF(self->ext_dict);
1593     return self->ext_dict;
1594   }
1595 
1596   const upb_MessageDef* m = _PyUpb_Message_GetMsgdef(self);
1597   if (upb_MessageDef_ExtensionRangeCount(m) == 0) {
1598     PyErr_SetNone(PyExc_AttributeError);
1599     return NULL;
1600   }
1601 
1602   self->ext_dict = PyUpb_ExtensionDict_New(_self);
1603   return self->ext_dict;
1604 }
1605 
1606 static PyGetSetDef PyUpb_Message_Getters[] = {
1607     {"Extensions", PyUpb_Message_GetExtensionDict, NULL, "Extension dict"},
1608     {NULL}};
1609 
1610 static PyMethodDef PyUpb_Message_Methods[] = {
1611     // TODO(https://github.com/protocolbuffers/upb/issues/459)
1612     //{ "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
1613     //  "Makes a deep copy of the class." },
1614     //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
1615     //  "Outputs a unicode representation of the message." },
1616     {"ByteSize", (PyCFunction)PyUpb_Message_ByteSize, METH_NOARGS,
1617      "Returns the size of the message in bytes."},
1618     {"Clear", (PyCFunction)PyUpb_Message_Clear, METH_NOARGS,
1619      "Clears the message."},
1620     {"ClearExtension", PyUpb_Message_ClearExtension, METH_O,
1621      "Clears a message field."},
1622     {"ClearField", PyUpb_Message_ClearField, METH_O, "Clears a message field."},
1623     // TODO(https://github.com/protocolbuffers/upb/issues/459)
1624     //{ "CopyFrom", (PyCFunction)CopyFrom, METH_O,
1625     //  "Copies a protocol message into the current message." },
1626     {"DiscardUnknownFields", (PyCFunction)PyUpb_Message_DiscardUnknownFields,
1627      METH_NOARGS, "Discards the unknown fields."},
1628     {"FindInitializationErrors", PyUpb_Message_FindInitializationErrors,
1629      METH_NOARGS, "Finds unset required fields."},
1630     {"FromString", PyUpb_Message_FromString, METH_O | METH_CLASS,
1631      "Creates new method instance from given serialized data."},
1632     {"HasExtension", PyUpb_Message_HasExtension, METH_O,
1633      "Checks if a message field is set."},
1634     {"HasField", PyUpb_Message_HasField, METH_O,
1635      "Checks if a message field is set."},
1636     {"IsInitialized", PyUpb_Message_IsInitialized, METH_VARARGS,
1637      "Checks if all required fields of a protocol message are set."},
1638     {"ListFields", PyUpb_Message_ListFields, METH_NOARGS,
1639      "Lists all set fields of a message."},
1640     {"MergeFrom", PyUpb_Message_MergeFrom, METH_O,
1641      "Merges a protocol message into the current message."},
1642     {"MergeFromString", PyUpb_Message_MergeFromString, METH_O,
1643      "Merges a serialized message into the current message."},
1644     {"ParseFromString", PyUpb_Message_ParseFromString, METH_O,
1645      "Parses a serialized message into the current message."},
1646     // TODO(https://github.com/protocolbuffers/upb/issues/459)
1647     //{ "RegisterExtension", (PyCFunction)RegisterExtension, METH_O |
1648     // METH_CLASS,
1649     //  "Registers an extension with the current message." },
1650     {"SerializePartialToString",
1651      (PyCFunction)PyUpb_Message_SerializePartialToString,
1652      METH_VARARGS | METH_KEYWORDS,
1653      "Serializes the message to a string, even if it isn't initialized."},
1654     {"SerializeToString", (PyCFunction)PyUpb_Message_SerializeToString,
1655      METH_VARARGS | METH_KEYWORDS,
1656      "Serializes the message to a string, only for initialized messages."},
1657     {"SetInParent", (PyCFunction)PyUpb_Message_SetInParent, METH_NOARGS,
1658      "Sets the has bit of the given field in its parent message."},
1659     {"UnknownFields", (PyCFunction)PyUpb_Message_UnknownFields, METH_NOARGS,
1660      "Parse unknown field set"},
1661     {"WhichOneof", PyUpb_Message_WhichOneof, METH_O,
1662      "Returns the name of the field set inside a oneof, "
1663      "or None if no field is set."},
1664     {"_ListFieldsItemKey", PyUpb_Message_ListFieldsItemKey,
1665      METH_O | METH_STATIC,
1666      "Compares ListFields() list entries by field number"},
1667     {"_CheckCalledFromGeneratedFile",
1668      PyUpb_Message_CheckCalledFromGeneratedFile, METH_NOARGS | METH_STATIC,
1669      "Raises TypeError if the caller is not in a _pb2.py file."},
1670     {NULL, NULL}};
1671 
1672 static PyType_Slot PyUpb_Message_Slots[] = {
1673     {Py_tp_dealloc, PyUpb_Message_Dealloc},
1674     {Py_tp_doc, "A ProtocolMessage"},
1675     {Py_tp_getattro, PyUpb_Message_GetAttr},
1676     {Py_tp_getset, PyUpb_Message_Getters},
1677     {Py_tp_hash, PyObject_HashNotImplemented},
1678     {Py_tp_methods, PyUpb_Message_Methods},
1679     {Py_tp_new, PyUpb_Message_New},
1680     {Py_tp_str, PyUpb_Message_ToString},
1681     {Py_tp_repr, PyUpb_Message_ToString},
1682     {Py_tp_richcompare, PyUpb_Message_RichCompare},
1683     {Py_tp_setattro, PyUpb_Message_SetAttr},
1684     {Py_tp_init, PyUpb_Message_Init},
1685     {0, NULL}};
1686 
1687 PyType_Spec PyUpb_Message_Spec = {
1688     PYUPB_MODULE_NAME ".Message",              // tp_name
1689     sizeof(PyUpb_Message),                     // tp_basicsize
1690     0,                                         // tp_itemsize
1691     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
1692     PyUpb_Message_Slots,
1693 };
1694 
1695 // -----------------------------------------------------------------------------
1696 // MessageMeta
1697 // -----------------------------------------------------------------------------
1698 
1699 // MessageMeta is the metaclass for message objects.  The generated code uses it
1700 // to construct message classes, ie.
1701 //
1702 // FooMessage = _message.MessageMeta('FooMessage', (_message.Message), {...})
1703 //
1704 // (This is not quite true: at the moment the Python library subclasses
1705 // MessageMeta, and uses that subclass as the metaclass.  There is a TODO below
1706 // to simplify this, so that the illustration above is indeed accurate).
1707 
1708 typedef struct {
1709   const upb_MiniTable* layout;
1710   PyObject* py_message_descriptor;
1711 } PyUpb_MessageMeta;
1712 
1713 // The PyUpb_MessageMeta struct is trailing data tacked onto the end of
1714 // MessageMeta instances.  This means that we get our instances of this struct
1715 // by adding the appropriate number of bytes.
PyUpb_GetMessageMeta(PyObject * cls)1716 static PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) {
1717 #ifndef NDEBUG
1718   PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet();
1719   assert(!state || cls->ob_type == state->message_meta_type);
1720 #endif
1721   return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize);
1722 }
1723 
PyUpb_MessageMeta_GetMsgdef(PyObject * cls)1724 static const upb_MessageDef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls) {
1725   PyUpb_MessageMeta* self = PyUpb_GetMessageMeta(cls);
1726   return PyUpb_Descriptor_GetDef(self->py_message_descriptor);
1727 }
1728 
PyUpb_MessageMeta_DoCreateClass(PyObject * py_descriptor,const char * name,PyObject * dict)1729 PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor,
1730                                           const char* name, PyObject* dict) {
1731   PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1732   PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor];
1733   if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) {
1734     return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor");
1735   }
1736 
1737   const upb_MessageDef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor);
1738   assert(msgdef);
1739   assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef)));
1740 
1741   PyObject* slots = PyTuple_New(0);
1742   if (!slots) return NULL;
1743   int status = PyDict_SetItemString(dict, "__slots__", slots);
1744   Py_DECREF(slots);
1745   if (status < 0) return NULL;
1746 
1747   // Bases are either:
1748   //    (Message, Message)            # for regular messages
1749   //    (Message, Message, WktBase)   # For well-known types
1750   PyObject* wkt_bases = PyUpb_GetWktBases(state);
1751   PyObject* wkt_base =
1752       PyDict_GetItemString(wkt_bases, upb_MessageDef_FullName(msgdef));
1753   PyObject* args;
1754   if (wkt_base == NULL) {
1755     args = Py_BuildValue("s(OO)O", name, state->cmessage_type,
1756                          state->message_class, dict);
1757   } else {
1758     args = Py_BuildValue("s(OOO)O", name, state->cmessage_type,
1759                          state->message_class, wkt_base, dict);
1760   }
1761 
1762   PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL);
1763   Py_DECREF(args);
1764   if (!ret) return NULL;
1765 
1766   PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret);
1767   meta->py_message_descriptor = py_descriptor;
1768   meta->layout = upb_MessageDef_MiniTable(msgdef);
1769   Py_INCREF(meta->py_message_descriptor);
1770 
1771   PyUpb_ObjCache_Add(meta->layout, ret);
1772 
1773   return ret;
1774 }
1775 
PyUpb_MessageMeta_New(PyTypeObject * type,PyObject * args,PyObject * kwargs)1776 static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args,
1777                                        PyObject* kwargs) {
1778   PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1779   static const char* kwlist[] = {"name", "bases", "dict", 0};
1780   PyObject *bases, *dict;
1781   const char* name;
1782 
1783   // Check arguments: (name, bases, dict)
1784   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist,
1785                                    &name, &PyTuple_Type, &bases, &PyDict_Type,
1786                                    &dict)) {
1787     return NULL;
1788   }
1789 
1790   // Check bases: only (), or (message.Message,) are allowed
1791   Py_ssize_t size = PyTuple_Size(bases);
1792   if (!(size == 0 ||
1793         (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) {
1794     PyErr_Format(PyExc_TypeError,
1795                  "A Message class can only inherit from Message, not %S",
1796                  bases);
1797     return NULL;
1798   }
1799 
1800   // Check dict['DESCRIPTOR']
1801   PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR");
1802   if (py_descriptor == NULL) {
1803     PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
1804     return NULL;
1805   }
1806 
1807   const upb_MessageDef* m = PyUpb_Descriptor_GetDef(py_descriptor);
1808   PyObject* ret = PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(m));
1809   if (ret) return ret;
1810   return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict);
1811 }
1812 
PyUpb_MessageMeta_Dealloc(PyObject * self)1813 static void PyUpb_MessageMeta_Dealloc(PyObject* self) {
1814   PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self);
1815   PyUpb_ObjCache_Delete(meta->layout);
1816   Py_DECREF(meta->py_message_descriptor);
1817   PyTypeObject* tp = Py_TYPE(self);
1818   cpython_bits.type_dealloc(self);
1819   Py_DECREF(tp);
1820 }
1821 
PyUpb_MessageMeta_AddFieldNumber(PyObject * self,const upb_FieldDef * f)1822 void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) {
1823   PyObject* name =
1824       PyUnicode_FromFormat("%s_FIELD_NUMBER", upb_FieldDef_Name(f));
1825   PyObject* upper = PyObject_CallMethod(name, "upper", "");
1826   PyObject_SetAttr(self, upper, PyLong_FromLong(upb_FieldDef_Number(f)));
1827   Py_DECREF(name);
1828   Py_DECREF(upper);
1829 }
1830 
PyUpb_MessageMeta_GetDynamicAttr(PyObject * self,PyObject * name)1831 static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self,
1832                                                   PyObject* name) {
1833   const char* name_buf = PyUpb_GetStrData(name);
1834   if (!name_buf) return NULL;
1835   const upb_MessageDef* msgdef = PyUpb_MessageMeta_GetMsgdef(self);
1836   const upb_FileDef* filedef = upb_MessageDef_File(msgdef);
1837   const upb_DefPool* symtab = upb_FileDef_Pool(filedef);
1838 
1839   PyObject* py_key =
1840       PyBytes_FromFormat("%s.%s", upb_MessageDef_FullName(msgdef), name_buf);
1841   const char* key = PyUpb_GetStrData(py_key);
1842   PyObject* ret = NULL;
1843   const upb_MessageDef* nested = upb_DefPool_FindMessageByName(symtab, key);
1844   const upb_EnumDef* enumdef;
1845   const upb_EnumValueDef* enumval;
1846   const upb_FieldDef* ext;
1847 
1848   if (nested) {
1849     ret = PyUpb_Descriptor_GetClass(nested);
1850   } else if ((enumdef = upb_DefPool_FindEnumByName(symtab, key))) {
1851     PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
1852     PyObject* klass = state->enum_type_wrapper_class;
1853     ret = PyUpb_EnumDescriptor_Get(enumdef);
1854     ret = PyObject_CallFunctionObjArgs(klass, ret, NULL);
1855   } else if ((enumval = upb_DefPool_FindEnumByNameval(symtab, key))) {
1856     ret = PyLong_FromLong(upb_EnumValueDef_Number(enumval));
1857   } else if ((ext = upb_DefPool_FindExtensionByName(symtab, key))) {
1858     ret = PyUpb_FieldDescriptor_Get(ext);
1859   }
1860 
1861   Py_DECREF(py_key);
1862 
1863   const char* suffix = "_FIELD_NUMBER";
1864   size_t n = strlen(name_buf);
1865   size_t suffix_n = strlen(suffix);
1866   if (n > suffix_n && memcmp(suffix, name_buf + n - suffix_n, suffix_n) == 0) {
1867     // We can't look up field names dynamically, because the <NAME>_FIELD_NUMBER
1868     // naming scheme upper-cases the field name and is therefore non-reversible.
1869     // So we just add all field numbers.
1870     int n = upb_MessageDef_FieldCount(msgdef);
1871     for (int i = 0; i < n; i++) {
1872       PyUpb_MessageMeta_AddFieldNumber(self, upb_MessageDef_Field(msgdef, i));
1873     }
1874     n = upb_MessageDef_NestedExtensionCount(msgdef);
1875     for (int i = 0; i < n; i++) {
1876       PyUpb_MessageMeta_AddFieldNumber(
1877           self, upb_MessageDef_NestedExtension(msgdef, i));
1878     }
1879     ret = PyObject_GenericGetAttr(self, name);
1880   }
1881 
1882   return ret;
1883 }
1884 
PyUpb_MessageMeta_GetAttr(PyObject * self,PyObject * name)1885 static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) {
1886   // We want to first delegate to the type's tp_dict to retrieve any attributes
1887   // that were previously calculated and cached in the type's dict.
1888   PyObject* ret = cpython_bits.type_getattro(self, name);
1889   if (ret) return ret;
1890 
1891   // We did not find a cached attribute. Try to calculate the attribute
1892   // dynamically, using the descriptor as an argument.
1893   PyErr_Clear();
1894   ret = PyUpb_MessageMeta_GetDynamicAttr(self, name);
1895 
1896   if (ret) {
1897     PyObject_SetAttr(self, name, ret);
1898     PyErr_Clear();
1899     return ret;
1900   }
1901 
1902   PyErr_SetObject(PyExc_AttributeError, name);
1903   return NULL;
1904 }
1905 
1906 static PyType_Slot PyUpb_MessageMeta_Slots[] = {
1907     {Py_tp_new, PyUpb_MessageMeta_New},
1908     {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc},
1909     {Py_tp_getattro, PyUpb_MessageMeta_GetAttr},
1910     {0, NULL}};
1911 
1912 static PyType_Spec PyUpb_MessageMeta_Spec = {
1913     PYUPB_MODULE_NAME ".MessageMeta",  // tp_name
1914     0,  // To be filled in by size of base     // tp_basicsize
1915     0,  // tp_itemsize
1916     // TODO(haberman): remove BASETYPE, Python should just use MessageMeta
1917     // directly instead of subclassing it.
1918     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
1919     PyUpb_MessageMeta_Slots,
1920 };
1921 
PyUpb_MessageMeta_CreateType(void)1922 static PyObject* PyUpb_MessageMeta_CreateType(void) {
1923   PyObject* bases = Py_BuildValue("(O)", &PyType_Type);
1924   if (!bases) return NULL;
1925   PyUpb_MessageMeta_Spec.basicsize =
1926       cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta);
1927   PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases);
1928   Py_DECREF(bases);
1929   return type;
1930 }
1931 
PyUpb_InitMessage(PyObject * m)1932 bool PyUpb_InitMessage(PyObject* m) {
1933   if (!PyUpb_CPythonBits_Init(&cpython_bits)) return false;
1934   PyObject* message_meta_type = PyUpb_MessageMeta_CreateType();
1935 
1936   PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
1937   state->cmessage_type = PyUpb_AddClass(m, &PyUpb_Message_Spec);
1938   state->message_meta_type = (PyTypeObject*)message_meta_type;
1939 
1940   if (!state->cmessage_type || !state->message_meta_type) return false;
1941   if (PyModule_AddObject(m, "MessageMeta", message_meta_type)) return false;
1942   state->listfields_item_key = PyObject_GetAttrString(
1943       (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
1944 
1945   PyObject* mod =
1946       PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
1947   if (mod == NULL) return false;
1948 
1949   state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
1950   state->decode_error_class = PyObject_GetAttrString(mod, "DecodeError");
1951   state->message_class = PyObject_GetAttrString(mod, "Message");
1952   Py_DECREF(mod);
1953 
1954   PyObject* enum_type_wrapper = PyImport_ImportModule(
1955       PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
1956   if (enum_type_wrapper == NULL) return false;
1957 
1958   state->enum_type_wrapper_class =
1959       PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
1960   Py_DECREF(enum_type_wrapper);
1961 
1962   if (!state->encode_error_class || !state->decode_error_class ||
1963       !state->message_class || !state->listfields_item_key ||
1964       !state->enum_type_wrapper_class) {
1965     return false;
1966   }
1967 
1968   return true;
1969 }
1970