xref: /aosp_15_r20/external/protobuf/ruby/ext/google/protobuf_c/message.c (revision 1b3f573f81763fcece89efc2b6a5209149e44ab8)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2014 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include "message.h"
32 
33 #include "convert.h"
34 #include "defs.h"
35 #include "map.h"
36 #include "protobuf.h"
37 #include "repeated_field.h"
38 
39 static VALUE cParseError = Qnil;
40 static ID descriptor_instancevar_interned;
41 
initialize_rb_class_with_no_args(VALUE klass)42 static VALUE initialize_rb_class_with_no_args(VALUE klass) {
43   return rb_funcall(klass, rb_intern("new"), 0);
44 }
45 
MessageOrEnum_GetDescriptor(VALUE klass)46 VALUE MessageOrEnum_GetDescriptor(VALUE klass) {
47   return rb_ivar_get(klass, descriptor_instancevar_interned);
48 }
49 
50 // -----------------------------------------------------------------------------
51 // Class/module creation from msgdefs and enumdefs, respectively.
52 // -----------------------------------------------------------------------------
53 
54 typedef struct {
55   VALUE arena;
56   const upb_Message* msg;  // Can get as mutable when non-frozen.
57   const upb_MessageDef*
58       msgdef;  // kept alive by self.class.descriptor reference.
59 } Message;
60 
Message_mark(void * _self)61 static void Message_mark(void* _self) {
62   Message* self = (Message*)_self;
63   rb_gc_mark(self->arena);
64 }
65 
66 static rb_data_type_t Message_type = {
67     "Message",
68     {Message_mark, RUBY_DEFAULT_FREE, NULL},
69     .flags = RUBY_TYPED_FREE_IMMEDIATELY,
70 };
71 
ruby_to_Message(VALUE msg_rb)72 static Message* ruby_to_Message(VALUE msg_rb) {
73   Message* msg;
74   TypedData_Get_Struct(msg_rb, Message, &Message_type, msg);
75   return msg;
76 }
77 
Message_alloc(VALUE klass)78 static VALUE Message_alloc(VALUE klass) {
79   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
80   Message* msg = ALLOC(Message);
81   VALUE ret;
82 
83   msg->msgdef = Descriptor_GetMsgDef(descriptor);
84   msg->arena = Qnil;
85   msg->msg = NULL;
86 
87   ret = TypedData_Wrap_Struct(klass, &Message_type, msg);
88   rb_ivar_set(ret, descriptor_instancevar_interned, descriptor);
89 
90   return ret;
91 }
92 
Message_Get(VALUE msg_rb,const upb_MessageDef ** m)93 const upb_Message* Message_Get(VALUE msg_rb, const upb_MessageDef** m) {
94   Message* msg = ruby_to_Message(msg_rb);
95   if (m) *m = msg->msgdef;
96   return msg->msg;
97 }
98 
Message_GetMutable(VALUE msg_rb,const upb_MessageDef ** m)99 upb_Message* Message_GetMutable(VALUE msg_rb, const upb_MessageDef** m) {
100   rb_check_frozen(msg_rb);
101   return (upb_Message*)Message_Get(msg_rb, m);
102 }
103 
Message_InitPtr(VALUE self_,upb_Message * msg,VALUE arena)104 void Message_InitPtr(VALUE self_, upb_Message* msg, VALUE arena) {
105   Message* self = ruby_to_Message(self_);
106   self->msg = msg;
107   self->arena = arena;
108   ObjectCache_Add(msg, self_);
109 }
110 
Message_GetArena(VALUE msg_rb)111 VALUE Message_GetArena(VALUE msg_rb) {
112   Message* msg = ruby_to_Message(msg_rb);
113   return msg->arena;
114 }
115 
Message_CheckClass(VALUE klass)116 void Message_CheckClass(VALUE klass) {
117   if (rb_get_alloc_func(klass) != &Message_alloc) {
118     rb_raise(rb_eArgError,
119              "Message class was not returned by the DescriptorPool.");
120   }
121 }
122 
Message_GetRubyWrapper(upb_Message * msg,const upb_MessageDef * m,VALUE arena)123 VALUE Message_GetRubyWrapper(upb_Message* msg, const upb_MessageDef* m,
124                              VALUE arena) {
125   if (msg == NULL) return Qnil;
126 
127   VALUE val = ObjectCache_Get(msg);
128 
129   if (val == Qnil) {
130     VALUE klass = Descriptor_DefToClass(m);
131     val = Message_alloc(klass);
132     Message_InitPtr(val, msg, arena);
133   }
134 
135   return val;
136 }
137 
Message_PrintMessage(StringBuilder * b,const upb_Message * msg,const upb_MessageDef * m)138 void Message_PrintMessage(StringBuilder* b, const upb_Message* msg,
139                           const upb_MessageDef* m) {
140   bool first = true;
141   int n = upb_MessageDef_FieldCount(m);
142   VALUE klass = Descriptor_DefToClass(m);
143   StringBuilder_Printf(b, "<%s: ", rb_class2name(klass));
144 
145   for (int i = 0; i < n; i++) {
146     const upb_FieldDef* field = upb_MessageDef_Field(m, i);
147 
148     if (upb_FieldDef_HasPresence(field) && !upb_Message_Has(msg, field)) {
149       continue;
150     }
151 
152     if (!first) {
153       StringBuilder_Printf(b, ", ");
154     } else {
155       first = false;
156     }
157 
158     upb_MessageValue msgval = upb_Message_Get(msg, field);
159 
160     StringBuilder_Printf(b, "%s: ", upb_FieldDef_Name(field));
161 
162     if (upb_FieldDef_IsMap(field)) {
163       const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(field);
164       const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry_m, 1);
165       const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
166       TypeInfo val_info = TypeInfo_get(val_f);
167       Map_Inspect(b, msgval.map_val, upb_FieldDef_CType(key_f), val_info);
168     } else if (upb_FieldDef_IsRepeated(field)) {
169       RepeatedField_Inspect(b, msgval.array_val, TypeInfo_get(field));
170     } else {
171       StringBuilder_PrintMsgval(b, msgval, TypeInfo_get(field));
172     }
173   }
174 
175   StringBuilder_Printf(b, ">");
176 }
177 
178 // Helper functions for #method_missing ////////////////////////////////////////
179 
180 enum {
181   METHOD_UNKNOWN = 0,
182   METHOD_GETTER = 1,
183   METHOD_SETTER = 2,
184   METHOD_CLEAR = 3,
185   METHOD_PRESENCE = 4,
186   METHOD_ENUM_GETTER = 5,
187   METHOD_WRAPPER_GETTER = 6,
188   METHOD_WRAPPER_SETTER = 7
189 };
190 
191 // Check if the field is a well known wrapper type
IsWrapper(const upb_MessageDef * m)192 static bool IsWrapper(const upb_MessageDef* m) {
193   if (!m) return false;
194   switch (upb_MessageDef_WellKnownType(m)) {
195     case kUpb_WellKnown_DoubleValue:
196     case kUpb_WellKnown_FloatValue:
197     case kUpb_WellKnown_Int64Value:
198     case kUpb_WellKnown_UInt64Value:
199     case kUpb_WellKnown_Int32Value:
200     case kUpb_WellKnown_UInt32Value:
201     case kUpb_WellKnown_StringValue:
202     case kUpb_WellKnown_BytesValue:
203     case kUpb_WellKnown_BoolValue:
204       return true;
205     default:
206       return false;
207   }
208 }
209 
IsFieldWrapper(const upb_FieldDef * f)210 static bool IsFieldWrapper(const upb_FieldDef* f) {
211   return IsWrapper(upb_FieldDef_MessageSubDef(f));
212 }
213 
Match(const upb_MessageDef * m,const char * name,const upb_FieldDef ** f,const upb_OneofDef ** o,const char * prefix,const char * suffix)214 static bool Match(const upb_MessageDef* m, const char* name,
215                   const upb_FieldDef** f, const upb_OneofDef** o,
216                   const char* prefix, const char* suffix) {
217   size_t sp = strlen(prefix);
218   size_t ss = strlen(suffix);
219   size_t sn = strlen(name);
220 
221   if (sn <= sp + ss) return false;
222 
223   if (memcmp(name, prefix, sp) != 0 ||
224       memcmp(name + sn - ss, suffix, ss) != 0) {
225     return false;
226   }
227 
228   return upb_MessageDef_FindByNameWithSize(m, name + sp, sn - sp - ss, f, o);
229 }
230 
extract_method_call(VALUE method_name,Message * self,const upb_FieldDef ** f,const upb_OneofDef ** o)231 static int extract_method_call(VALUE method_name, Message* self,
232                                const upb_FieldDef** f, const upb_OneofDef** o) {
233   const upb_MessageDef* m = self->msgdef;
234   const char* name;
235 
236   Check_Type(method_name, T_SYMBOL);
237   name = rb_id2name(SYM2ID(method_name));
238 
239   if (Match(m, name, f, o, "", "")) return METHOD_GETTER;
240   if (Match(m, name, f, o, "", "=")) return METHOD_SETTER;
241   if (Match(m, name, f, o, "clear_", "")) return METHOD_CLEAR;
242   if (Match(m, name, f, o, "has_", "?") &&
243       (*o || (*f && upb_FieldDef_HasPresence(*f)))) {
244     // Disallow oneof hazzers for proto3.
245     // TODO(haberman): remove this test when we are enabling oneof hazzers for
246     // proto3.
247     if (*f && !upb_FieldDef_IsSubMessage(*f) &&
248         upb_FieldDef_RealContainingOneof(*f) &&
249         upb_MessageDef_Syntax(upb_FieldDef_ContainingType(*f)) !=
250             kUpb_Syntax_Proto2) {
251       return METHOD_UNKNOWN;
252     }
253     return METHOD_PRESENCE;
254   }
255   if (Match(m, name, f, o, "", "_as_value") && *f &&
256       !upb_FieldDef_IsRepeated(*f) && IsFieldWrapper(*f)) {
257     return METHOD_WRAPPER_GETTER;
258   }
259   if (Match(m, name, f, o, "", "_as_value=") && *f &&
260       !upb_FieldDef_IsRepeated(*f) && IsFieldWrapper(*f)) {
261     return METHOD_WRAPPER_SETTER;
262   }
263   if (Match(m, name, f, o, "", "_const") && *f &&
264       upb_FieldDef_CType(*f) == kUpb_CType_Enum) {
265     return METHOD_ENUM_GETTER;
266   }
267 
268   return METHOD_UNKNOWN;
269 }
270 
Message_oneof_accessor(VALUE _self,const upb_OneofDef * o,int accessor_type)271 static VALUE Message_oneof_accessor(VALUE _self, const upb_OneofDef* o,
272                                     int accessor_type) {
273   Message* self = ruby_to_Message(_self);
274   const upb_FieldDef* oneof_field = upb_Message_WhichOneof(self->msg, o);
275 
276   switch (accessor_type) {
277     case METHOD_PRESENCE:
278       return oneof_field == NULL ? Qfalse : Qtrue;
279     case METHOD_CLEAR:
280       if (oneof_field != NULL) {
281         upb_Message_ClearField(Message_GetMutable(_self, NULL), oneof_field);
282       }
283       return Qnil;
284     case METHOD_GETTER:
285       return oneof_field == NULL
286                  ? Qnil
287                  : ID2SYM(rb_intern(upb_FieldDef_Name(oneof_field)));
288     case METHOD_SETTER:
289       rb_raise(rb_eRuntimeError, "Oneof accessors are read-only.");
290   }
291   rb_raise(rb_eRuntimeError, "Invalid access of oneof field.");
292 }
293 
Message_setfield(upb_Message * msg,const upb_FieldDef * f,VALUE val,upb_Arena * arena)294 static void Message_setfield(upb_Message* msg, const upb_FieldDef* f, VALUE val,
295                              upb_Arena* arena) {
296   upb_MessageValue msgval;
297   if (upb_FieldDef_IsMap(f)) {
298     msgval.map_val = Map_GetUpbMap(val, f, arena);
299   } else if (upb_FieldDef_IsRepeated(f)) {
300     msgval.array_val = RepeatedField_GetUpbArray(val, f, arena);
301   } else {
302     if (val == Qnil &&
303         (upb_FieldDef_IsSubMessage(f) || upb_FieldDef_RealContainingOneof(f))) {
304       upb_Message_ClearField(msg, f);
305       return;
306     }
307     msgval =
308         Convert_RubyToUpb(val, upb_FieldDef_Name(f), TypeInfo_get(f), arena);
309   }
310   upb_Message_Set(msg, f, msgval, arena);
311 }
312 
Message_getfield(VALUE _self,const upb_FieldDef * f)313 VALUE Message_getfield(VALUE _self, const upb_FieldDef* f) {
314   Message* self = ruby_to_Message(_self);
315   // This is a special-case: upb_Message_Mutable() for map & array are logically
316   // const (they will not change what is serialized) but physically
317   // non-const, as they do allocate a repeated field or map. The logical
318   // constness means it's ok to do even if the message is frozen.
319   upb_Message* msg = (upb_Message*)self->msg;
320   upb_Arena* arena = Arena_get(self->arena);
321   if (upb_FieldDef_IsMap(f)) {
322     upb_Map* map = upb_Message_Mutable(msg, f, arena).map;
323     const upb_FieldDef* key_f = map_field_key(f);
324     const upb_FieldDef* val_f = map_field_value(f);
325     upb_CType key_type = upb_FieldDef_CType(key_f);
326     TypeInfo value_type_info = TypeInfo_get(val_f);
327     return Map_GetRubyWrapper(map, key_type, value_type_info, self->arena);
328   } else if (upb_FieldDef_IsRepeated(f)) {
329     upb_Array* arr = upb_Message_Mutable(msg, f, arena).array;
330     return RepeatedField_GetRubyWrapper(arr, TypeInfo_get(f), self->arena);
331   } else if (upb_FieldDef_IsSubMessage(f)) {
332     if (!upb_Message_Has(self->msg, f)) return Qnil;
333     upb_Message* submsg = upb_Message_Mutable(msg, f, arena).msg;
334     const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
335     return Message_GetRubyWrapper(submsg, m, self->arena);
336   } else {
337     upb_MessageValue msgval = upb_Message_Get(self->msg, f);
338     return Convert_UpbToRuby(msgval, TypeInfo_get(f), self->arena);
339   }
340 }
341 
Message_field_accessor(VALUE _self,const upb_FieldDef * f,int accessor_type,int argc,VALUE * argv)342 static VALUE Message_field_accessor(VALUE _self, const upb_FieldDef* f,
343                                     int accessor_type, int argc, VALUE* argv) {
344   upb_Arena* arena = Arena_get(Message_GetArena(_self));
345 
346   switch (accessor_type) {
347     case METHOD_SETTER:
348       Message_setfield(Message_GetMutable(_self, NULL), f, argv[1], arena);
349       return Qnil;
350     case METHOD_CLEAR:
351       upb_Message_ClearField(Message_GetMutable(_self, NULL), f);
352       return Qnil;
353     case METHOD_PRESENCE:
354       if (!upb_FieldDef_HasPresence(f)) {
355         rb_raise(rb_eRuntimeError, "Field does not have presence.");
356       }
357       return upb_Message_Has(Message_Get(_self, NULL), f);
358     case METHOD_WRAPPER_GETTER: {
359       Message* self = ruby_to_Message(_self);
360       if (upb_Message_Has(self->msg, f)) {
361         PBRUBY_ASSERT(upb_FieldDef_IsSubMessage(f) &&
362                       !upb_FieldDef_IsRepeated(f));
363         upb_MessageValue wrapper = upb_Message_Get(self->msg, f);
364         const upb_MessageDef* wrapper_m = upb_FieldDef_MessageSubDef(f);
365         const upb_FieldDef* value_f =
366             upb_MessageDef_FindFieldByNumber(wrapper_m, 1);
367         upb_MessageValue value = upb_Message_Get(wrapper.msg_val, value_f);
368         return Convert_UpbToRuby(value, TypeInfo_get(value_f), self->arena);
369       } else {
370         return Qnil;
371       }
372     }
373     case METHOD_WRAPPER_SETTER: {
374       upb_Message* msg = Message_GetMutable(_self, NULL);
375       if (argv[1] == Qnil) {
376         upb_Message_ClearField(msg, f);
377       } else {
378         const upb_FieldDef* val_f =
379             upb_MessageDef_FindFieldByNumber(upb_FieldDef_MessageSubDef(f), 1);
380         upb_MessageValue msgval = Convert_RubyToUpb(
381             argv[1], upb_FieldDef_Name(f), TypeInfo_get(val_f), arena);
382         upb_Message* wrapper = upb_Message_Mutable(msg, f, arena).msg;
383         upb_Message_Set(wrapper, val_f, msgval, arena);
384       }
385       return Qnil;
386     }
387     case METHOD_ENUM_GETTER: {
388       upb_MessageValue msgval = upb_Message_Get(Message_Get(_self, NULL), f);
389 
390       if (upb_FieldDef_Label(f) == kUpb_Label_Repeated) {
391         // Map repeated fields to a new type with ints
392         VALUE arr = rb_ary_new();
393         size_t i, n = upb_Array_Size(msgval.array_val);
394         for (i = 0; i < n; i++) {
395           upb_MessageValue elem = upb_Array_Get(msgval.array_val, i);
396           rb_ary_push(arr, INT2NUM(elem.int32_val));
397         }
398         return arr;
399       } else {
400         return INT2NUM(msgval.int32_val);
401       }
402     }
403     case METHOD_GETTER:
404       return Message_getfield(_self, f);
405     default:
406       rb_raise(rb_eRuntimeError, "Internal error, no such accessor: %d",
407                accessor_type);
408   }
409 }
410 
411 /*
412  * call-seq:
413  *     Message.method_missing(*args)
414  *
415  * Provides accessors and setters and methods to clear and check for presence of
416  * message fields according to their field names.
417  *
418  * For any field whose name does not conflict with a built-in method, an
419  * accessor is provided with the same name as the field, and a setter is
420  * provided with the name of the field plus the '=' suffix. Thus, given a
421  * message instance 'msg' with field 'foo', the following code is valid:
422  *
423  *     msg.foo = 42
424  *     puts msg.foo
425  *
426  * This method also provides read-only accessors for oneofs. If a oneof exists
427  * with name 'my_oneof', then msg.my_oneof will return a Ruby symbol equal to
428  * the name of the field in that oneof that is currently set, or nil if none.
429  *
430  * It also provides methods of the form 'clear_fieldname' to clear the value
431  * of the field 'fieldname'. For basic data types, this will set the default
432  * value of the field.
433  *
434  * Additionally, it provides methods of the form 'has_fieldname?', which returns
435  * true if the field 'fieldname' is set in the message object, else false. For
436  * 'proto3' syntax, calling this for a basic type field will result in an error.
437  */
Message_method_missing(int argc,VALUE * argv,VALUE _self)438 static VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) {
439   Message* self = ruby_to_Message(_self);
440   const upb_OneofDef* o;
441   const upb_FieldDef* f;
442   int accessor_type;
443 
444   if (argc < 1) {
445     rb_raise(rb_eArgError, "Expected method name as first argument.");
446   }
447 
448   accessor_type = extract_method_call(argv[0], self, &f, &o);
449 
450   if (accessor_type == METHOD_UNKNOWN) return rb_call_super(argc, argv);
451 
452   // Validate argument count.
453   switch (accessor_type) {
454     case METHOD_SETTER:
455     case METHOD_WRAPPER_SETTER:
456       if (argc != 2) {
457         rb_raise(rb_eArgError, "Expected 2 arguments, received %d", argc);
458       }
459       rb_check_frozen(_self);
460       break;
461     default:
462       if (argc != 1) {
463         rb_raise(rb_eArgError, "Expected 1 argument, received %d", argc);
464       }
465       break;
466   }
467 
468   // Dispatch accessor.
469   if (o != NULL) {
470     return Message_oneof_accessor(_self, o, accessor_type);
471   } else {
472     return Message_field_accessor(_self, f, accessor_type, argc, argv);
473   }
474 }
475 
Message_respond_to_missing(int argc,VALUE * argv,VALUE _self)476 static VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) {
477   Message* self = ruby_to_Message(_self);
478   const upb_OneofDef* o;
479   const upb_FieldDef* f;
480   int accessor_type;
481 
482   if (argc < 1) {
483     rb_raise(rb_eArgError, "Expected method name as first argument.");
484   }
485 
486   accessor_type = extract_method_call(argv[0], self, &f, &o);
487 
488   if (accessor_type == METHOD_UNKNOWN) {
489     return rb_call_super(argc, argv);
490   } else if (o != NULL) {
491     return accessor_type == METHOD_SETTER ? Qfalse : Qtrue;
492   } else {
493     return Qtrue;
494   }
495 }
496 
497 void Message_InitFromValue(upb_Message* msg, const upb_MessageDef* m, VALUE val,
498                            upb_Arena* arena);
499 
500 typedef struct {
501   upb_Map* map;
502   TypeInfo key_type;
503   TypeInfo val_type;
504   upb_Arena* arena;
505 } MapInit;
506 
Map_initialize_kwarg(VALUE key,VALUE val,VALUE _self)507 static int Map_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
508   MapInit* map_init = (MapInit*)_self;
509   upb_MessageValue k, v;
510   k = Convert_RubyToUpb(key, "", map_init->key_type, NULL);
511 
512   if (map_init->val_type.type == kUpb_CType_Message && TYPE(val) == T_HASH) {
513     upb_Message* msg =
514         upb_Message_New(map_init->val_type.def.msgdef, map_init->arena);
515     Message_InitFromValue(msg, map_init->val_type.def.msgdef, val,
516                           map_init->arena);
517     v.msg_val = msg;
518   } else {
519     v = Convert_RubyToUpb(val, "", map_init->val_type, map_init->arena);
520   }
521   upb_Map_Set(map_init->map, k, v, map_init->arena);
522   return ST_CONTINUE;
523 }
524 
Map_InitFromValue(upb_Map * map,const upb_FieldDef * f,VALUE val,upb_Arena * arena)525 static void Map_InitFromValue(upb_Map* map, const upb_FieldDef* f, VALUE val,
526                               upb_Arena* arena) {
527   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
528   const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry_m, 1);
529   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
530   if (TYPE(val) != T_HASH) {
531     rb_raise(rb_eArgError,
532              "Expected Hash object as initializer value for map field '%s' "
533              "(given %s).",
534              upb_FieldDef_Name(f), rb_class2name(CLASS_OF(val)));
535   }
536   MapInit map_init = {map, TypeInfo_get(key_f), TypeInfo_get(val_f), arena};
537   rb_hash_foreach(val, Map_initialize_kwarg, (VALUE)&map_init);
538 }
539 
MessageValue_FromValue(VALUE val,TypeInfo info,upb_Arena * arena)540 static upb_MessageValue MessageValue_FromValue(VALUE val, TypeInfo info,
541                                                upb_Arena* arena) {
542   if (info.type == kUpb_CType_Message) {
543     upb_MessageValue msgval;
544     upb_Message* msg = upb_Message_New(info.def.msgdef, arena);
545     Message_InitFromValue(msg, info.def.msgdef, val, arena);
546     msgval.msg_val = msg;
547     return msgval;
548   } else {
549     return Convert_RubyToUpb(val, "", info, arena);
550   }
551 }
552 
RepeatedField_InitFromValue(upb_Array * arr,const upb_FieldDef * f,VALUE val,upb_Arena * arena)553 static void RepeatedField_InitFromValue(upb_Array* arr, const upb_FieldDef* f,
554                                         VALUE val, upb_Arena* arena) {
555   TypeInfo type_info = TypeInfo_get(f);
556 
557   if (TYPE(val) != T_ARRAY) {
558     rb_raise(rb_eArgError,
559              "Expected array as initializer value for repeated field '%s' "
560              "(given %s).",
561              upb_FieldDef_Name(f), rb_class2name(CLASS_OF(val)));
562   }
563 
564   for (int i = 0; i < RARRAY_LEN(val); i++) {
565     VALUE entry = rb_ary_entry(val, i);
566     upb_MessageValue msgval;
567     if (upb_FieldDef_IsSubMessage(f) && TYPE(entry) == T_HASH) {
568       msgval = MessageValue_FromValue(entry, type_info, arena);
569     } else {
570       msgval = Convert_RubyToUpb(entry, upb_FieldDef_Name(f), type_info, arena);
571     }
572     upb_Array_Append(arr, msgval, arena);
573   }
574 }
575 
Message_InitFieldFromValue(upb_Message * msg,const upb_FieldDef * f,VALUE val,upb_Arena * arena)576 static void Message_InitFieldFromValue(upb_Message* msg, const upb_FieldDef* f,
577                                        VALUE val, upb_Arena* arena) {
578   if (TYPE(val) == T_NIL) return;
579 
580   if (upb_FieldDef_IsMap(f)) {
581     upb_Map* map = upb_Message_Mutable(msg, f, arena).map;
582     Map_InitFromValue(map, f, val, arena);
583   } else if (upb_FieldDef_Label(f) == kUpb_Label_Repeated) {
584     upb_Array* arr = upb_Message_Mutable(msg, f, arena).array;
585     RepeatedField_InitFromValue(arr, f, val, arena);
586   } else if (upb_FieldDef_IsSubMessage(f)) {
587     if (TYPE(val) == T_HASH) {
588       upb_Message* submsg = upb_Message_Mutable(msg, f, arena).msg;
589       Message_InitFromValue(submsg, upb_FieldDef_MessageSubDef(f), val, arena);
590     } else {
591       Message_setfield(msg, f, val, arena);
592     }
593   } else {
594     upb_MessageValue msgval =
595         Convert_RubyToUpb(val, upb_FieldDef_Name(f), TypeInfo_get(f), arena);
596     upb_Message_Set(msg, f, msgval, arena);
597   }
598 }
599 
600 typedef struct {
601   upb_Message* msg;
602   const upb_MessageDef* msgdef;
603   upb_Arena* arena;
604 } MsgInit;
605 
Message_initialize_kwarg(VALUE key,VALUE val,VALUE _self)606 static int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
607   MsgInit* msg_init = (MsgInit*)_self;
608   const char* name;
609 
610   if (TYPE(key) == T_STRING) {
611     name = RSTRING_PTR(key);
612   } else if (TYPE(key) == T_SYMBOL) {
613     name = RSTRING_PTR(rb_id2str(SYM2ID(key)));
614   } else {
615     rb_raise(rb_eArgError,
616              "Expected string or symbols as hash keys when initializing proto "
617              "from hash.");
618   }
619 
620   const upb_FieldDef* f =
621       upb_MessageDef_FindFieldByName(msg_init->msgdef, name);
622 
623   if (f == NULL) {
624     rb_raise(rb_eArgError,
625              "Unknown field name '%s' in initialization map entry.", name);
626   }
627 
628   Message_InitFieldFromValue(msg_init->msg, f, val, msg_init->arena);
629   return ST_CONTINUE;
630 }
631 
Message_InitFromValue(upb_Message * msg,const upb_MessageDef * m,VALUE val,upb_Arena * arena)632 void Message_InitFromValue(upb_Message* msg, const upb_MessageDef* m, VALUE val,
633                            upb_Arena* arena) {
634   MsgInit msg_init = {msg, m, arena};
635   if (TYPE(val) == T_HASH) {
636     rb_hash_foreach(val, Message_initialize_kwarg, (VALUE)&msg_init);
637   } else {
638     rb_raise(rb_eArgError, "Expected hash arguments or message, not %s",
639              rb_class2name(CLASS_OF(val)));
640   }
641 }
642 
643 /*
644  * call-seq:
645  *     Message.new(kwargs) => new_message
646  *
647  * Creates a new instance of the given message class. Keyword arguments may be
648  * provided with keywords corresponding to field names.
649  *
650  * Note that no literal Message class exists. Only concrete classes per message
651  * type exist, as provided by the #msgclass method on Descriptors after they
652  * have been added to a pool. The method definitions described here on the
653  * Message class are provided on each concrete message class.
654  */
Message_initialize(int argc,VALUE * argv,VALUE _self)655 static VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) {
656   Message* self = ruby_to_Message(_self);
657   VALUE arena_rb = Arena_new();
658   upb_Arena* arena = Arena_get(arena_rb);
659   upb_Message* msg = upb_Message_New(self->msgdef, arena);
660 
661   Message_InitPtr(_self, msg, arena_rb);
662 
663   if (argc == 0) {
664     return Qnil;
665   }
666   if (argc != 1) {
667     rb_raise(rb_eArgError, "Expected 0 or 1 arguments.");
668   }
669   Message_InitFromValue((upb_Message*)self->msg, self->msgdef, argv[0], arena);
670   return Qnil;
671 }
672 
673 /*
674  * call-seq:
675  *     Message.dup => new_message
676  *
677  * Performs a shallow copy of this message and returns the new copy.
678  */
Message_dup(VALUE _self)679 static VALUE Message_dup(VALUE _self) {
680   Message* self = ruby_to_Message(_self);
681   VALUE new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self));
682   Message* new_msg_self = ruby_to_Message(new_msg);
683   size_t size = upb_MessageDef_MiniTable(self->msgdef)->size;
684 
685   // TODO(copy unknown fields?)
686   // TODO(use official upb msg copy function)
687   memcpy((upb_Message*)new_msg_self->msg, self->msg, size);
688   Arena_fuse(self->arena, Arena_get(new_msg_self->arena));
689   return new_msg;
690 }
691 
692 // Support function for Message_eq, and also used by other #eq functions.
Message_Equal(const upb_Message * m1,const upb_Message * m2,const upb_MessageDef * m)693 bool Message_Equal(const upb_Message* m1, const upb_Message* m2,
694                    const upb_MessageDef* m) {
695   if (m1 == m2) return true;
696 
697   size_t size1, size2;
698   int encode_opts = kUpb_Encode_SkipUnknown | kUpb_Encode_Deterministic;
699   upb_Arena* arena_tmp = upb_Arena_New();
700   const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
701 
702   // Compare deterministically serialized payloads with no unknown fields.
703   char* data1 = upb_Encode(m1, layout, encode_opts, arena_tmp, &size1);
704   char* data2 = upb_Encode(m2, layout, encode_opts, arena_tmp, &size2);
705 
706   if (data1 && data2) {
707     bool ret = (size1 == size2) && (memcmp(data1, data2, size1) == 0);
708     upb_Arena_Free(arena_tmp);
709     return ret;
710   } else {
711     upb_Arena_Free(arena_tmp);
712     rb_raise(cParseError, "Error comparing messages");
713   }
714 }
715 
716 /*
717  * call-seq:
718  *     Message.==(other) => boolean
719  *
720  * Performs a deep comparison of this message with another. Messages are equal
721  * if they have the same type and if each field is equal according to the :==
722  * method's semantics (a more efficient comparison may actually be done if the
723  * field is of a primitive type).
724  */
Message_eq(VALUE _self,VALUE _other)725 static VALUE Message_eq(VALUE _self, VALUE _other) {
726   if (CLASS_OF(_self) != CLASS_OF(_other)) return Qfalse;
727 
728   Message* self = ruby_to_Message(_self);
729   Message* other = ruby_to_Message(_other);
730   assert(self->msgdef == other->msgdef);
731 
732   return Message_Equal(self->msg, other->msg, self->msgdef) ? Qtrue : Qfalse;
733 }
734 
Message_Hash(const upb_Message * msg,const upb_MessageDef * m,uint64_t seed)735 uint64_t Message_Hash(const upb_Message* msg, const upb_MessageDef* m,
736                       uint64_t seed) {
737   upb_Arena* arena = upb_Arena_New();
738   const char* data;
739   size_t size;
740 
741   // Hash a deterministically serialized payloads with no unknown fields.
742   data = upb_Encode(msg, upb_MessageDef_MiniTable(m),
743                     kUpb_Encode_SkipUnknown | kUpb_Encode_Deterministic, arena,
744                     &size);
745 
746   if (data) {
747     uint64_t ret = _upb_Hash(data, size, seed);
748     upb_Arena_Free(arena);
749     return ret;
750   } else {
751     upb_Arena_Free(arena);
752     rb_raise(cParseError, "Error calculating hash");
753   }
754 }
755 
756 /*
757  * call-seq:
758  *     Message.hash => hash_value
759  *
760  * Returns a hash value that represents this message's field values.
761  */
Message_hash(VALUE _self)762 static VALUE Message_hash(VALUE _self) {
763   Message* self = ruby_to_Message(_self);
764   uint64_t hash_value = Message_Hash(self->msg, self->msgdef, 0);
765   // RUBY_FIXNUM_MAX should be one less than a power of 2.
766   assert((RUBY_FIXNUM_MAX & (RUBY_FIXNUM_MAX + 1)) == 0);
767   return INT2FIX(hash_value & RUBY_FIXNUM_MAX);
768 }
769 
770 /*
771  * call-seq:
772  *     Message.inspect => string
773  *
774  * Returns a human-readable string representing this message. It will be
775  * formatted as "<MessageType: field1: value1, field2: value2, ...>". Each
776  * field's value is represented according to its own #inspect method.
777  */
Message_inspect(VALUE _self)778 static VALUE Message_inspect(VALUE _self) {
779   Message* self = ruby_to_Message(_self);
780 
781   StringBuilder* builder = StringBuilder_New();
782   Message_PrintMessage(builder, self->msg, self->msgdef);
783   VALUE ret = StringBuilder_ToRubyString(builder);
784   StringBuilder_Free(builder);
785   return ret;
786 }
787 
788 // Support functions for Message_to_h //////////////////////////////////////////
789 
RepeatedField_CreateArray(const upb_Array * arr,TypeInfo type_info)790 static VALUE RepeatedField_CreateArray(const upb_Array* arr,
791                                        TypeInfo type_info) {
792   int size = arr ? upb_Array_Size(arr) : 0;
793   VALUE ary = rb_ary_new2(size);
794 
795   for (int i = 0; i < size; i++) {
796     upb_MessageValue msgval = upb_Array_Get(arr, i);
797     VALUE val = Scalar_CreateHash(msgval, type_info);
798     rb_ary_push(ary, val);
799   }
800 
801   return ary;
802 }
803 
Message_CreateHash(const upb_Message * msg,const upb_MessageDef * m)804 static VALUE Message_CreateHash(const upb_Message* msg,
805                                 const upb_MessageDef* m) {
806   if (!msg) return Qnil;
807 
808   VALUE hash = rb_hash_new();
809   int n = upb_MessageDef_FieldCount(m);
810   bool is_proto2;
811 
812   // We currently have a few behaviors that are specific to proto2.
813   // This is unfortunate, we should key behaviors off field attributes (like
814   // whether a field has presence), not proto2 vs. proto3. We should see if we
815   // can change this without breaking users.
816   is_proto2 = upb_MessageDef_Syntax(m) == kUpb_Syntax_Proto2;
817 
818   for (int i = 0; i < n; i++) {
819     const upb_FieldDef* field = upb_MessageDef_Field(m, i);
820     TypeInfo type_info = TypeInfo_get(field);
821     upb_MessageValue msgval;
822     VALUE msg_value;
823     VALUE msg_key;
824 
825     if (!is_proto2 && upb_FieldDef_IsSubMessage(field) &&
826         !upb_FieldDef_IsRepeated(field) && !upb_Message_Has(msg, field)) {
827       // TODO: Legacy behavior, remove when we fix the is_proto2 differences.
828       msg_key = ID2SYM(rb_intern(upb_FieldDef_Name(field)));
829       rb_hash_aset(hash, msg_key, Qnil);
830       continue;
831     }
832 
833     // Do not include fields that are not present (oneof or optional fields).
834     if (is_proto2 && upb_FieldDef_HasPresence(field) &&
835         !upb_Message_Has(msg, field)) {
836       continue;
837     }
838 
839     msg_key = ID2SYM(rb_intern(upb_FieldDef_Name(field)));
840     msgval = upb_Message_Get(msg, field);
841 
842     // Proto2 omits empty map/repeated filds also.
843 
844     if (upb_FieldDef_IsMap(field)) {
845       const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(field);
846       const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry_m, 1);
847       const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
848       upb_CType key_type = upb_FieldDef_CType(key_f);
849       msg_value = Map_CreateHash(msgval.map_val, key_type, TypeInfo_get(val_f));
850     } else if (upb_FieldDef_IsRepeated(field)) {
851       if (is_proto2 &&
852           (!msgval.array_val || upb_Array_Size(msgval.array_val) == 0)) {
853         continue;
854       }
855       msg_value = RepeatedField_CreateArray(msgval.array_val, type_info);
856     } else {
857       msg_value = Scalar_CreateHash(msgval, type_info);
858     }
859 
860     rb_hash_aset(hash, msg_key, msg_value);
861   }
862 
863   return hash;
864 }
865 
Scalar_CreateHash(upb_MessageValue msgval,TypeInfo type_info)866 VALUE Scalar_CreateHash(upb_MessageValue msgval, TypeInfo type_info) {
867   if (type_info.type == kUpb_CType_Message) {
868     return Message_CreateHash(msgval.msg_val, type_info.def.msgdef);
869   } else {
870     return Convert_UpbToRuby(msgval, type_info, Qnil);
871   }
872 }
873 
874 /*
875  * call-seq:
876  *     Message.to_h => {}
877  *
878  * Returns the message as a Ruby Hash object, with keys as symbols.
879  */
Message_to_h(VALUE _self)880 static VALUE Message_to_h(VALUE _self) {
881   Message* self = ruby_to_Message(_self);
882   return Message_CreateHash(self->msg, self->msgdef);
883 }
884 
885 /*
886  * call-seq:
887  *     Message.freeze => self
888  *
889  * Freezes the message object. We have to intercept this so we can pin the
890  * Ruby object into memory so we don't forget it's frozen.
891  */
Message_freeze(VALUE _self)892 static VALUE Message_freeze(VALUE _self) {
893   Message* self = ruby_to_Message(_self);
894   if (!RB_OBJ_FROZEN(_self)) {
895     Arena_Pin(self->arena, _self);
896     RB_OBJ_FREEZE(_self);
897   }
898   return _self;
899 }
900 
901 /*
902  * call-seq:
903  *     Message.[](index) => value
904  *
905  * Accesses a field's value by field name. The provided field name should be a
906  * string.
907  */
Message_index(VALUE _self,VALUE field_name)908 static VALUE Message_index(VALUE _self, VALUE field_name) {
909   Message* self = ruby_to_Message(_self);
910   const upb_FieldDef* field;
911 
912   Check_Type(field_name, T_STRING);
913   field = upb_MessageDef_FindFieldByName(self->msgdef, RSTRING_PTR(field_name));
914 
915   if (field == NULL) {
916     return Qnil;
917   }
918 
919   return Message_getfield(_self, field);
920 }
921 
922 /*
923  * call-seq:
924  *     Message.[]=(index, value)
925  *
926  * Sets a field's value by field name. The provided field name should be a
927  * string.
928  */
Message_index_set(VALUE _self,VALUE field_name,VALUE value)929 static VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) {
930   Message* self = ruby_to_Message(_self);
931   const upb_FieldDef* f;
932   upb_MessageValue val;
933   upb_Arena* arena = Arena_get(self->arena);
934 
935   Check_Type(field_name, T_STRING);
936   f = upb_MessageDef_FindFieldByName(self->msgdef, RSTRING_PTR(field_name));
937 
938   if (f == NULL) {
939     rb_raise(rb_eArgError, "Unknown field: %s", RSTRING_PTR(field_name));
940   }
941 
942   val = Convert_RubyToUpb(value, upb_FieldDef_Name(f), TypeInfo_get(f), arena);
943   upb_Message_Set(Message_GetMutable(_self, NULL), f, val, arena);
944 
945   return Qnil;
946 }
947 
948 /*
949  * call-seq:
950  *     MessageClass.decode(data, options) => message
951  *
952  * Decodes the given data (as a string containing bytes in protocol buffers wire
953  * format) under the interpretration given by this message class's definition
954  * and returns a message object with the corresponding field values.
955  * @param options [Hash] options for the decoder
956  *  recursion_limit: set to maximum decoding depth for message (default is 64)
957  */
Message_decode(int argc,VALUE * argv,VALUE klass)958 static VALUE Message_decode(int argc, VALUE* argv, VALUE klass) {
959   VALUE data = argv[0];
960   int options = 0;
961 
962   if (argc < 1 || argc > 2) {
963     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
964   }
965 
966   if (argc == 2) {
967     VALUE hash_args = argv[1];
968     if (TYPE(hash_args) != T_HASH) {
969       rb_raise(rb_eArgError, "Expected hash arguments.");
970     }
971 
972     VALUE depth = rb_hash_lookup(hash_args, ID2SYM(rb_intern("recursion_limit")));
973 
974     if (depth != Qnil && TYPE(depth) == T_FIXNUM) {
975       options |= UPB_DECODE_MAXDEPTH(FIX2INT(depth));
976     }
977   }
978 
979   if (TYPE(data) != T_STRING) {
980     rb_raise(rb_eArgError, "Expected string for binary protobuf data.");
981   }
982 
983   VALUE msg_rb = initialize_rb_class_with_no_args(klass);
984   Message* msg = ruby_to_Message(msg_rb);
985 
986   upb_DecodeStatus status = upb_Decode(
987       RSTRING_PTR(data), RSTRING_LEN(data), (upb_Message*)msg->msg,
988       upb_MessageDef_MiniTable(msg->msgdef), NULL, options, Arena_get(msg->arena));
989 
990   if (status != kUpb_DecodeStatus_Ok) {
991     rb_raise(cParseError, "Error occurred during parsing");
992   }
993 
994   return msg_rb;
995 }
996 
997 /*
998  * call-seq:
999  *     MessageClass.decode_json(data, options = {}) => message
1000  *
1001  * Decodes the given data (as a string containing bytes in protocol buffers wire
1002  * format) under the interpretration given by this message class's definition
1003  * and returns a message object with the corresponding field values.
1004  *
1005  *  @param options [Hash] options for the decoder
1006  *   ignore_unknown_fields: set true to ignore unknown fields (default is to
1007  *   raise an error)
1008  */
Message_decode_json(int argc,VALUE * argv,VALUE klass)1009 static VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
1010   VALUE data = argv[0];
1011   int options = 0;
1012   upb_Status status;
1013 
1014   // TODO(haberman): use this message's pool instead.
1015   const upb_DefPool* symtab = DescriptorPool_GetSymtab(generated_pool);
1016 
1017   if (argc < 1 || argc > 2) {
1018     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
1019   }
1020 
1021   if (argc == 2) {
1022     VALUE hash_args = argv[1];
1023     if (TYPE(hash_args) != T_HASH) {
1024       rb_raise(rb_eArgError, "Expected hash arguments.");
1025     }
1026 
1027     if (RTEST(rb_hash_lookup2(
1028             hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse))) {
1029       options |= upb_JsonDecode_IgnoreUnknown;
1030     }
1031   }
1032 
1033   if (TYPE(data) != T_STRING) {
1034     rb_raise(rb_eArgError, "Expected string for JSON data.");
1035   }
1036 
1037   // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to
1038   // convert, because string handlers pass data directly to message string
1039   // fields.
1040 
1041   VALUE msg_rb = initialize_rb_class_with_no_args(klass);
1042   Message* msg = ruby_to_Message(msg_rb);
1043 
1044   // We don't allow users to decode a wrapper type directly.
1045   if (IsWrapper(msg->msgdef)) {
1046     rb_raise(rb_eRuntimeError, "Cannot parse a wrapper directly.");
1047   }
1048 
1049   upb_Status_Clear(&status);
1050   if (!upb_JsonDecode(RSTRING_PTR(data), RSTRING_LEN(data),
1051                       (upb_Message*)msg->msg, msg->msgdef, symtab, options,
1052                       Arena_get(msg->arena), &status)) {
1053     rb_raise(cParseError, "Error occurred during parsing: %s",
1054              upb_Status_ErrorMessage(&status));
1055   }
1056 
1057   return msg_rb;
1058 }
1059 
1060 /*
1061  * call-seq:
1062  *     MessageClass.encode(msg, options) => bytes
1063  *
1064  * Encodes the given message object to its serialized form in protocol buffers
1065  * wire format.
1066  * @param options [Hash] options for the encoder
1067  *  recursion_limit: set to maximum encoding depth for message (default is 64)
1068  */
Message_encode(int argc,VALUE * argv,VALUE klass)1069 static VALUE Message_encode(int argc, VALUE* argv, VALUE klass) {
1070   Message* msg = ruby_to_Message(argv[0]);
1071   int options = 0;
1072   const char* data;
1073   size_t size;
1074 
1075   if (CLASS_OF(argv[0]) != klass) {
1076     rb_raise(rb_eArgError, "Message of wrong type.");
1077   }
1078 
1079   if (argc < 1 || argc > 2) {
1080     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
1081   }
1082 
1083   if (argc == 2) {
1084     VALUE hash_args = argv[1];
1085     if (TYPE(hash_args) != T_HASH) {
1086       rb_raise(rb_eArgError, "Expected hash arguments.");
1087     }
1088     VALUE depth = rb_hash_lookup(hash_args, ID2SYM(rb_intern("recursion_limit")));
1089 
1090     if (depth != Qnil && TYPE(depth) == T_FIXNUM) {
1091       options |= UPB_DECODE_MAXDEPTH(FIX2INT(depth));
1092     }
1093   }
1094 
1095   upb_Arena *arena = upb_Arena_New();
1096 
1097   data = upb_Encode(msg->msg, upb_MessageDef_MiniTable(msg->msgdef),
1098                     options, arena, &size);
1099 
1100   if (data) {
1101     VALUE ret = rb_str_new(data, size);
1102     rb_enc_associate(ret, rb_ascii8bit_encoding());
1103     upb_Arena_Free(arena);
1104     return ret;
1105   } else {
1106     upb_Arena_Free(arena);
1107     rb_raise(rb_eRuntimeError, "Exceeded maximum depth (possibly cycle)");
1108   }
1109 }
1110 
1111 /*
1112  * call-seq:
1113  *     MessageClass.encode_json(msg, options = {}) => json_string
1114  *
1115  * Encodes the given message object into its serialized JSON representation.
1116  * @param options [Hash] options for the decoder
1117  *  preserve_proto_fieldnames: set true to use original fieldnames (default is
1118  * to camelCase) emit_defaults: set true to emit 0/false values (default is to
1119  * omit them)
1120  */
Message_encode_json(int argc,VALUE * argv,VALUE klass)1121 static VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1122   Message* msg = ruby_to_Message(argv[0]);
1123   int options = 0;
1124   char buf[1024];
1125   size_t size;
1126   upb_Status status;
1127 
1128   // TODO(haberman): use this message's pool instead.
1129   const upb_DefPool* symtab = DescriptorPool_GetSymtab(generated_pool);
1130 
1131   if (argc < 1 || argc > 2) {
1132     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
1133   }
1134 
1135   if (argc == 2) {
1136     VALUE hash_args = argv[1];
1137     if (TYPE(hash_args) != T_HASH) {
1138       if (RTEST(rb_funcall(hash_args, rb_intern("respond_to?"), 1, rb_str_new2("to_h")))) {
1139         hash_args = rb_funcall(hash_args, rb_intern("to_h"), 0);
1140       } else {
1141         rb_raise(rb_eArgError, "Expected hash arguments.");
1142       }
1143     }
1144 
1145     if (RTEST(rb_hash_lookup2(hash_args,
1146                               ID2SYM(rb_intern("preserve_proto_fieldnames")),
1147                               Qfalse))) {
1148       options |= upb_JsonEncode_UseProtoNames;
1149     }
1150 
1151     if (RTEST(rb_hash_lookup2(hash_args, ID2SYM(rb_intern("emit_defaults")),
1152                               Qfalse))) {
1153       options |= upb_JsonEncode_EmitDefaults;
1154     }
1155   }
1156 
1157   upb_Status_Clear(&status);
1158   size = upb_JsonEncode(msg->msg, msg->msgdef, symtab, options, buf,
1159                         sizeof(buf), &status);
1160 
1161   if (!upb_Status_IsOk(&status)) {
1162     rb_raise(cParseError, "Error occurred during encoding: %s",
1163              upb_Status_ErrorMessage(&status));
1164   }
1165 
1166   VALUE ret;
1167   if (size >= sizeof(buf)) {
1168     char* buf2 = malloc(size + 1);
1169     upb_JsonEncode(msg->msg, msg->msgdef, symtab, options, buf2, size + 1,
1170                    &status);
1171     ret = rb_str_new(buf2, size);
1172     free(buf2);
1173   } else {
1174     ret = rb_str_new(buf, size);
1175   }
1176 
1177   rb_enc_associate(ret, rb_utf8_encoding());
1178   return ret;
1179 }
1180 
1181 /*
1182  * call-seq:
1183  *     Message.descriptor => descriptor
1184  *
1185  * Class method that returns the Descriptor instance corresponding to this
1186  * message class's type.
1187  */
Message_descriptor(VALUE klass)1188 static VALUE Message_descriptor(VALUE klass) {
1189   return rb_ivar_get(klass, descriptor_instancevar_interned);
1190 }
1191 
build_class_from_descriptor(VALUE descriptor)1192 VALUE build_class_from_descriptor(VALUE descriptor) {
1193   const char* name;
1194   VALUE klass;
1195 
1196   name = upb_MessageDef_FullName(Descriptor_GetMsgDef(descriptor));
1197   if (name == NULL) {
1198     rb_raise(rb_eRuntimeError, "Descriptor does not have assigned name.");
1199   }
1200 
1201   klass = rb_define_class_id(
1202       // Docs say this parameter is ignored. User will assign return value to
1203       // their own toplevel constant class name.
1204       rb_intern("Message"), rb_cObject);
1205   rb_ivar_set(klass, descriptor_instancevar_interned, descriptor);
1206   rb_define_alloc_func(klass, Message_alloc);
1207   rb_require("google/protobuf/message_exts");
1208   rb_include_module(klass, rb_eval_string("::Google::Protobuf::MessageExts"));
1209   rb_extend_object(
1210       klass, rb_eval_string("::Google::Protobuf::MessageExts::ClassMethods"));
1211 
1212   rb_define_method(klass, "method_missing", Message_method_missing, -1);
1213   rb_define_method(klass, "respond_to_missing?", Message_respond_to_missing,
1214                    -1);
1215   rb_define_method(klass, "initialize", Message_initialize, -1);
1216   rb_define_method(klass, "dup", Message_dup, 0);
1217   // Also define #clone so that we don't inherit Object#clone.
1218   rb_define_method(klass, "clone", Message_dup, 0);
1219   rb_define_method(klass, "==", Message_eq, 1);
1220   rb_define_method(klass, "eql?", Message_eq, 1);
1221   rb_define_method(klass, "freeze", Message_freeze, 0);
1222   rb_define_method(klass, "hash", Message_hash, 0);
1223   rb_define_method(klass, "to_h", Message_to_h, 0);
1224   rb_define_method(klass, "inspect", Message_inspect, 0);
1225   rb_define_method(klass, "to_s", Message_inspect, 0);
1226   rb_define_method(klass, "[]", Message_index, 1);
1227   rb_define_method(klass, "[]=", Message_index_set, 2);
1228   rb_define_singleton_method(klass, "decode", Message_decode, -1);
1229   rb_define_singleton_method(klass, "encode", Message_encode, -1);
1230   rb_define_singleton_method(klass, "decode_json", Message_decode_json, -1);
1231   rb_define_singleton_method(klass, "encode_json", Message_encode_json, -1);
1232   rb_define_singleton_method(klass, "descriptor", Message_descriptor, 0);
1233 
1234   return klass;
1235 }
1236 
1237 /*
1238  * call-seq:
1239  *     Enum.lookup(number) => name
1240  *
1241  * This module method, provided on each generated enum module, looks up an enum
1242  * value by number and returns its name as a Ruby symbol, or nil if not found.
1243  */
enum_lookup(VALUE self,VALUE number)1244 static VALUE enum_lookup(VALUE self, VALUE number) {
1245   int32_t num = NUM2INT(number);
1246   VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned);
1247   const upb_EnumDef* e = EnumDescriptor_GetEnumDef(desc);
1248   const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e, num);
1249   if (ev) {
1250     return ID2SYM(rb_intern(upb_EnumValueDef_Name(ev)));
1251   } else {
1252     return Qnil;
1253   }
1254 }
1255 
1256 /*
1257  * call-seq:
1258  *     Enum.resolve(name) => number
1259  *
1260  * This module method, provided on each generated enum module, looks up an enum
1261  * value by name (as a Ruby symbol) and returns its name, or nil if not found.
1262  */
enum_resolve(VALUE self,VALUE sym)1263 static VALUE enum_resolve(VALUE self, VALUE sym) {
1264   const char* name = rb_id2name(SYM2ID(sym));
1265   VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned);
1266   const upb_EnumDef* e = EnumDescriptor_GetEnumDef(desc);
1267   const upb_EnumValueDef* ev = upb_EnumDef_FindValueByName(e, name);
1268   if (ev) {
1269     return INT2NUM(upb_EnumValueDef_Number(ev));
1270   } else {
1271     return Qnil;
1272   }
1273 }
1274 
1275 /*
1276  * call-seq:
1277  *     Enum.descriptor
1278  *
1279  * This module method, provided on each generated enum module, returns the
1280  * EnumDescriptor corresponding to this enum type.
1281  */
enum_descriptor(VALUE self)1282 static VALUE enum_descriptor(VALUE self) {
1283   return rb_ivar_get(self, descriptor_instancevar_interned);
1284 }
1285 
build_module_from_enumdesc(VALUE _enumdesc)1286 VALUE build_module_from_enumdesc(VALUE _enumdesc) {
1287   const upb_EnumDef* e = EnumDescriptor_GetEnumDef(_enumdesc);
1288   VALUE mod = rb_define_module_id(rb_intern(upb_EnumDef_FullName(e)));
1289 
1290   int n = upb_EnumDef_ValueCount(e);
1291   for (int i = 0; i < n; i++) {
1292     const upb_EnumValueDef* ev = upb_EnumDef_Value(e, i);
1293     upb_Arena* arena = upb_Arena_New();
1294     const char* src_name = upb_EnumValueDef_Name(ev);
1295     char* name = upb_strdup2(src_name, strlen(src_name), arena);
1296     int32_t value = upb_EnumValueDef_Number(ev);
1297     if (name[0] < 'A' || name[0] > 'Z') {
1298       if (name[0] >= 'a' && name[0] <= 'z') {
1299         name[0] -= 32; // auto capitalize
1300       } else {
1301         rb_warn(
1302           "Enum value '%s' does not start with an uppercase letter "
1303           "as is required for Ruby constants.",
1304           name);
1305       }
1306     }
1307     rb_define_const(mod, name, INT2NUM(value));
1308     upb_Arena_Free(arena);
1309   }
1310 
1311   rb_define_singleton_method(mod, "lookup", enum_lookup, 1);
1312   rb_define_singleton_method(mod, "resolve", enum_resolve, 1);
1313   rb_define_singleton_method(mod, "descriptor", enum_descriptor, 0);
1314   rb_ivar_set(mod, descriptor_instancevar_interned, _enumdesc);
1315 
1316   return mod;
1317 }
1318 
1319 // Internal only; used by Google::Protobuf.deep_copy.
Message_deep_copy(const upb_Message * msg,const upb_MessageDef * m,upb_Arena * arena)1320 upb_Message* Message_deep_copy(const upb_Message* msg, const upb_MessageDef* m,
1321                                upb_Arena* arena) {
1322   // Serialize and parse.
1323   upb_Arena* tmp_arena = upb_Arena_New();
1324   const upb_MiniTable* layout = upb_MessageDef_MiniTable(m);
1325   size_t size;
1326 
1327   char* data = upb_Encode(msg, layout, 0, tmp_arena, &size);
1328   upb_Message* new_msg = upb_Message_New(m, arena);
1329 
1330   if (!data || upb_Decode(data, size, new_msg, layout, NULL, 0, arena) !=
1331                    kUpb_DecodeStatus_Ok) {
1332     upb_Arena_Free(tmp_arena);
1333     rb_raise(cParseError, "Error occurred copying proto");
1334   }
1335 
1336   upb_Arena_Free(tmp_arena);
1337   return new_msg;
1338 }
1339 
Message_GetUpbMessage(VALUE value,const upb_MessageDef * m,const char * name,upb_Arena * arena)1340 const upb_Message* Message_GetUpbMessage(VALUE value, const upb_MessageDef* m,
1341                                          const char* name, upb_Arena* arena) {
1342   if (value == Qnil) {
1343     rb_raise(cTypeError, "nil message not allowed here.");
1344   }
1345 
1346   VALUE klass = CLASS_OF(value);
1347   VALUE desc_rb = rb_ivar_get(klass, descriptor_instancevar_interned);
1348   const upb_MessageDef* val_m =
1349       desc_rb == Qnil ? NULL : Descriptor_GetMsgDef(desc_rb);
1350 
1351   if (val_m != m) {
1352     // Check for possible implicit conversions
1353     // TODO: hash conversion?
1354 
1355     switch (upb_MessageDef_WellKnownType(m)) {
1356       case kUpb_WellKnown_Timestamp: {
1357         // Time -> Google::Protobuf::Timestamp
1358         upb_Message* msg = upb_Message_New(m, arena);
1359         upb_MessageValue sec, nsec;
1360         struct timespec time;
1361         const upb_FieldDef* sec_f = upb_MessageDef_FindFieldByNumber(m, 1);
1362         const upb_FieldDef* nsec_f = upb_MessageDef_FindFieldByNumber(m, 2);
1363 
1364         if (!rb_obj_is_kind_of(value, rb_cTime)) goto badtype;
1365 
1366         time = rb_time_timespec(value);
1367         sec.int64_val = time.tv_sec;
1368         nsec.int32_val = time.tv_nsec;
1369         upb_Message_Set(msg, sec_f, sec, arena);
1370         upb_Message_Set(msg, nsec_f, nsec, arena);
1371         return msg;
1372       }
1373       case kUpb_WellKnown_Duration: {
1374         // Numeric -> Google::Protobuf::Duration
1375         upb_Message* msg = upb_Message_New(m, arena);
1376         upb_MessageValue sec, nsec;
1377         const upb_FieldDef* sec_f = upb_MessageDef_FindFieldByNumber(m, 1);
1378         const upb_FieldDef* nsec_f = upb_MessageDef_FindFieldByNumber(m, 2);
1379 
1380         if (!rb_obj_is_kind_of(value, rb_cNumeric)) goto badtype;
1381 
1382         sec.int64_val = NUM2LL(value);
1383         nsec.int32_val = round((NUM2DBL(value) - NUM2LL(value)) * 1000000000);
1384         upb_Message_Set(msg, sec_f, sec, arena);
1385         upb_Message_Set(msg, nsec_f, nsec, arena);
1386         return msg;
1387       }
1388       default:
1389       badtype:
1390         rb_raise(cTypeError,
1391                  "Invalid type %s to assign to submessage field '%s'.",
1392                  rb_class2name(CLASS_OF(value)), name);
1393     }
1394   }
1395 
1396   Message* self = ruby_to_Message(value);
1397   Arena_fuse(self->arena, arena);
1398 
1399   return self->msg;
1400 }
1401 
Message_register(VALUE protobuf)1402 void Message_register(VALUE protobuf) {
1403   cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
1404 
1405   // Ruby-interned string: "descriptor". We use this identifier to store an
1406   // instance variable on message classes we create in order to link them back
1407   // to their descriptors.
1408   descriptor_instancevar_interned = rb_intern("descriptor");
1409 }
1410