xref: /aosp_15_r20/external/protobuf/ruby/ext/google/protobuf_c/protobuf.c (revision 1b3f573f81763fcece89efc2b6a5209149e44ab8)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2014 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include "protobuf.h"
32 
33 #include <ruby/version.h>
34 
35 #include "defs.h"
36 #include "map.h"
37 #include "message.h"
38 #include "repeated_field.h"
39 
40 VALUE cParseError;
41 VALUE cTypeError;
42 
map_field_key(const upb_FieldDef * field)43 const upb_FieldDef *map_field_key(const upb_FieldDef *field) {
44   const upb_MessageDef *entry = upb_FieldDef_MessageSubDef(field);
45   return upb_MessageDef_FindFieldByNumber(entry, 1);
46 }
47 
map_field_value(const upb_FieldDef * field)48 const upb_FieldDef *map_field_value(const upb_FieldDef *field) {
49   const upb_MessageDef *entry = upb_FieldDef_MessageSubDef(field);
50   return upb_MessageDef_FindFieldByNumber(entry, 2);
51 }
52 
53 // -----------------------------------------------------------------------------
54 // StringBuilder, for inspect
55 // -----------------------------------------------------------------------------
56 
57 struct StringBuilder {
58   size_t size;
59   size_t cap;
60   char *data;
61 };
62 
63 typedef struct StringBuilder StringBuilder;
64 
StringBuilder_SizeOf(size_t cap)65 static size_t StringBuilder_SizeOf(size_t cap) {
66   return sizeof(StringBuilder) + cap;
67 }
68 
StringBuilder_New()69 StringBuilder *StringBuilder_New() {
70   const size_t cap = 128;
71   StringBuilder *builder = malloc(sizeof(*builder));
72   builder->size = 0;
73   builder->cap = cap;
74   builder->data = malloc(builder->cap);
75   return builder;
76 }
77 
StringBuilder_Free(StringBuilder * b)78 void StringBuilder_Free(StringBuilder *b) {
79   free(b->data);
80   free(b);
81 }
82 
StringBuilder_Printf(StringBuilder * b,const char * fmt,...)83 void StringBuilder_Printf(StringBuilder *b, const char *fmt, ...) {
84   size_t have = b->cap - b->size;
85   size_t n;
86   va_list args;
87 
88   va_start(args, fmt);
89   n = vsnprintf(&b->data[b->size], have, fmt, args);
90   va_end(args);
91 
92   if (have <= n) {
93     while (have <= n) {
94       b->cap *= 2;
95       have = b->cap - b->size;
96     }
97     b->data = realloc(b->data, StringBuilder_SizeOf(b->cap));
98     va_start(args, fmt);
99     n = vsnprintf(&b->data[b->size], have, fmt, args);
100     va_end(args);
101     PBRUBY_ASSERT(n < have);
102   }
103 
104   b->size += n;
105 }
106 
StringBuilder_ToRubyString(StringBuilder * b)107 VALUE StringBuilder_ToRubyString(StringBuilder *b) {
108   VALUE ret = rb_str_new(b->data, b->size);
109   rb_enc_associate(ret, rb_utf8_encoding());
110   return ret;
111 }
112 
StringBuilder_PrintEnum(StringBuilder * b,int32_t val,const upb_EnumDef * e)113 static void StringBuilder_PrintEnum(StringBuilder *b, int32_t val,
114                                     const upb_EnumDef *e) {
115   const upb_EnumValueDef *ev = upb_EnumDef_FindValueByNumber(e, val);
116   if (ev) {
117     StringBuilder_Printf(b, ":%s", upb_EnumValueDef_Name(ev));
118   } else {
119     StringBuilder_Printf(b, "%" PRId32, val);
120   }
121 }
122 
StringBuilder_PrintMsgval(StringBuilder * b,upb_MessageValue val,TypeInfo info)123 void StringBuilder_PrintMsgval(StringBuilder *b, upb_MessageValue val,
124                                TypeInfo info) {
125   switch (info.type) {
126     case kUpb_CType_Bool:
127       StringBuilder_Printf(b, "%s", val.bool_val ? "true" : "false");
128       break;
129     case kUpb_CType_Float: {
130       VALUE str = rb_inspect(DBL2NUM(val.float_val));
131       StringBuilder_Printf(b, "%s", RSTRING_PTR(str));
132       break;
133     }
134     case kUpb_CType_Double: {
135       VALUE str = rb_inspect(DBL2NUM(val.double_val));
136       StringBuilder_Printf(b, "%s", RSTRING_PTR(str));
137       break;
138     }
139     case kUpb_CType_Int32:
140       StringBuilder_Printf(b, "%" PRId32, val.int32_val);
141       break;
142     case kUpb_CType_UInt32:
143       StringBuilder_Printf(b, "%" PRIu32, val.uint32_val);
144       break;
145     case kUpb_CType_Int64:
146       StringBuilder_Printf(b, "%" PRId64, val.int64_val);
147       break;
148     case kUpb_CType_UInt64:
149       StringBuilder_Printf(b, "%" PRIu64, val.uint64_val);
150       break;
151     case kUpb_CType_String:
152       StringBuilder_Printf(b, "\"%.*s\"", (int)val.str_val.size,
153                            val.str_val.data);
154       break;
155     case kUpb_CType_Bytes:
156       StringBuilder_Printf(b, "\"%.*s\"", (int)val.str_val.size,
157                            val.str_val.data);
158       break;
159     case kUpb_CType_Enum:
160       StringBuilder_PrintEnum(b, val.int32_val, info.def.enumdef);
161       break;
162     case kUpb_CType_Message:
163       Message_PrintMessage(b, val.msg_val, info.def.msgdef);
164       break;
165   }
166 }
167 
168 // -----------------------------------------------------------------------------
169 // Arena
170 // -----------------------------------------------------------------------------
171 
172 typedef struct {
173   upb_Arena *arena;
174   VALUE pinned_objs;
175 } Arena;
176 
Arena_mark(void * data)177 static void Arena_mark(void *data) {
178   Arena *arena = data;
179   rb_gc_mark(arena->pinned_objs);
180 }
181 
Arena_free(void * data)182 static void Arena_free(void *data) {
183   Arena *arena = data;
184   upb_Arena_Free(arena->arena);
185   xfree(arena);
186 }
187 
188 static VALUE cArena;
189 
190 const rb_data_type_t Arena_type = {
191     "Google::Protobuf::Internal::Arena",
192     {Arena_mark, Arena_free, NULL},
193     .flags = RUBY_TYPED_FREE_IMMEDIATELY,
194 };
195 
ruby_upb_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)196 static void* ruby_upb_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize, size_t size) {
197   if (size == 0) {
198     xfree(ptr);
199     return NULL;
200   } else {
201     return xrealloc(ptr, size);
202   }
203 }
204 
205 upb_alloc ruby_upb_alloc = {&ruby_upb_allocfunc};
206 
Arena_alloc(VALUE klass)207 static VALUE Arena_alloc(VALUE klass) {
208   Arena *arena = ALLOC(Arena);
209   arena->arena = upb_Arena_Init(NULL, 0, &ruby_upb_alloc);
210   arena->pinned_objs = Qnil;
211   return TypedData_Wrap_Struct(klass, &Arena_type, arena);
212 }
213 
Arena_get(VALUE _arena)214 upb_Arena *Arena_get(VALUE _arena) {
215   Arena *arena;
216   TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
217   return arena->arena;
218 }
219 
Arena_fuse(VALUE _arena,upb_Arena * other)220 void Arena_fuse(VALUE _arena, upb_Arena *other) {
221   Arena *arena;
222   TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
223   if (!upb_Arena_Fuse(arena->arena, other)) {
224     rb_raise(rb_eRuntimeError,
225              "Unable to fuse arenas. This should never happen since Ruby does "
226              "not use initial blocks");
227   }
228 }
229 
Arena_new()230 VALUE Arena_new() { return Arena_alloc(cArena); }
231 
Arena_Pin(VALUE _arena,VALUE obj)232 void Arena_Pin(VALUE _arena, VALUE obj) {
233   Arena *arena;
234   TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
235   if (arena->pinned_objs == Qnil) {
236     arena->pinned_objs = rb_ary_new();
237   }
238   rb_ary_push(arena->pinned_objs, obj);
239 }
240 
Arena_register(VALUE module)241 void Arena_register(VALUE module) {
242   VALUE internal = rb_define_module_under(module, "Internal");
243   VALUE klass = rb_define_class_under(internal, "Arena", rb_cObject);
244   rb_define_alloc_func(klass, Arena_alloc);
245   rb_gc_register_address(&cArena);
246   cArena = klass;
247 }
248 
249 // -----------------------------------------------------------------------------
250 // Object Cache
251 // -----------------------------------------------------------------------------
252 
253 // A pointer -> Ruby Object cache that keeps references to Ruby wrapper
254 // objects.  This allows us to look up any Ruby wrapper object by the address
255 // of the object it is wrapping. That way we can avoid ever creating two
256 // different wrapper objects for the same C object, which saves memory and
257 // preserves object identity.
258 //
259 // We use WeakMap for the cache. For Ruby <2.7 we also need a secondary Hash
260 // to store WeakMap keys because Ruby <2.7 WeakMap doesn't allow non-finalizable
261 // keys.
262 //
263 // We also need the secondary Hash if sizeof(long) < sizeof(VALUE), because this
264 // means it may not be possible to fit a pointer into a Fixnum. Keys are
265 // pointers, and if they fit into a Fixnum, Ruby doesn't collect them, but if
266 // they overflow and require allocating a Bignum, they could get collected
267 // prematurely, thus removing the cache entry. This happens on 64-bit Windows,
268 // on which pointers are 64 bits but longs are 32 bits. In this case, we enable
269 // the secondary Hash to hold the keys and prevent them from being collected.
270 
271 #if RUBY_API_VERSION_CODE >= 20700 && SIZEOF_LONG >= SIZEOF_VALUE
272 #define USE_SECONDARY_MAP 0
273 #else
274 #define USE_SECONDARY_MAP 1
275 #endif
276 
277 #if USE_SECONDARY_MAP
278 
279 // Maps Numeric -> Object. The object is then used as a key into the WeakMap.
280 // This is needed for Ruby <2.7 where a number cannot be a key to WeakMap.
281 // The object is used only for its identity; it does not contain any data.
282 VALUE secondary_map = Qnil;
283 
284 // Mutations to the map are under a mutex, because SeconaryMap_MaybeGC()
285 // iterates over the map which cannot happen in parallel with insertions, or
286 // Ruby will throw:
287 //   can't add a new key into hash during iteration (RuntimeError)
288 VALUE secondary_map_mutex = Qnil;
289 
290 // Lambda that will GC entries from the secondary map that are no longer present
291 // in the primary map.
292 VALUE gc_secondary_map_lambda = Qnil;
293 ID length;
294 
295 extern VALUE weak_obj_cache;
296 
SecondaryMap_Init()297 static void SecondaryMap_Init() {
298   rb_gc_register_address(&secondary_map);
299   rb_gc_register_address(&gc_secondary_map_lambda);
300   rb_gc_register_address(&secondary_map_mutex);
301   secondary_map = rb_hash_new();
302   gc_secondary_map_lambda = rb_eval_string(
303       "->(secondary, weak) {\n"
304       "  secondary.delete_if { |k, v| !weak.key?(v) }\n"
305       "}\n");
306   secondary_map_mutex = rb_mutex_new();
307   length = rb_intern("length");
308 }
309 
310 // The secondary map is a regular Hash, and will never shrink on its own.
311 // The main object cache is a WeakMap that will automatically remove entries
312 // when the target object is no longer reachable, but unless we manually
313 // remove the corresponding entries from the secondary map, it will grow
314 // without bound.
315 //
316 // To avoid this unbounded growth we periodically remove entries from the
317 // secondary map that are no longer present in the WeakMap. The logic of
318 // how often to perform this GC is an artbirary tuning parameter that
319 // represents a straightforward CPU/memory tradeoff.
320 //
321 // Requires: secondary_map_mutex is held.
SecondaryMap_MaybeGC()322 static void SecondaryMap_MaybeGC() {
323   PBRUBY_ASSERT(rb_mutex_locked_p(secondary_map_mutex) == Qtrue);
324   size_t weak_len = NUM2ULL(rb_funcall(weak_obj_cache, length, 0));
325   size_t secondary_len = RHASH_SIZE(secondary_map);
326   if (secondary_len < weak_len) {
327     // Logically this case should not be possible: a valid entry cannot exist in
328     // the weak table unless there is a corresponding entry in the secondary
329     // table. It should *always* be the case that secondary_len >= weak_len.
330     //
331     // However ObjectSpace::WeakMap#length (and therefore weak_len) is
332     // unreliable: it overreports its true length by including non-live objects.
333     // However these non-live objects are not yielded in iteration, so we may
334     // have previously deleted them from the secondary map in a previous
335     // invocation of SecondaryMap_MaybeGC().
336     //
337     // In this case, we can't measure any waste, so we just return.
338     return;
339   }
340   size_t waste = secondary_len - weak_len;
341   // GC if we could remove at least 2000 entries or 20% of the table size
342   // (whichever is greater).  Since the cost of the GC pass is O(N), we
343   // want to make sure that we condition this on overall table size, to
344   // avoid O(N^2) CPU costs.
345   size_t threshold = PBRUBY_MAX(secondary_len * 0.2, 2000);
346   if (waste > threshold) {
347     rb_funcall(gc_secondary_map_lambda, rb_intern("call"), 2, secondary_map,
348                weak_obj_cache);
349   }
350 }
351 
352 // Requires: secondary_map_mutex is held by this thread iff create == true.
SecondaryMap_Get(VALUE key,bool create)353 static VALUE SecondaryMap_Get(VALUE key, bool create) {
354   PBRUBY_ASSERT(!create || rb_mutex_locked_p(secondary_map_mutex) == Qtrue);
355   VALUE ret = rb_hash_lookup(secondary_map, key);
356   if (ret == Qnil && create) {
357     SecondaryMap_MaybeGC();
358     ret = rb_class_new_instance(0, NULL, rb_cObject);
359     rb_hash_aset(secondary_map, key, ret);
360   }
361   return ret;
362 }
363 
364 #endif
365 
366 // Requires: secondary_map_mutex is held by this thread iff create == true.
ObjectCache_GetKey(const void * key,bool create)367 static VALUE ObjectCache_GetKey(const void *key, bool create) {
368   VALUE key_val = (VALUE)key;
369   PBRUBY_ASSERT((key_val & 3) == 0);
370   VALUE ret = LL2NUM(key_val >> 2);
371 #if USE_SECONDARY_MAP
372   ret = SecondaryMap_Get(ret, create);
373 #endif
374   return ret;
375 }
376 
377 // Public ObjectCache API.
378 
379 VALUE weak_obj_cache = Qnil;
380 ID item_get;
381 ID item_set;
382 
ObjectCache_Init()383 static void ObjectCache_Init() {
384   rb_gc_register_address(&weak_obj_cache);
385   VALUE klass = rb_eval_string("ObjectSpace::WeakMap");
386   weak_obj_cache = rb_class_new_instance(0, NULL, klass);
387   item_get = rb_intern("[]");
388   item_set = rb_intern("[]=");
389 #if USE_SECONDARY_MAP
390   SecondaryMap_Init();
391 #endif
392 }
393 
ObjectCache_Add(const void * key,VALUE val)394 void ObjectCache_Add(const void *key, VALUE val) {
395   PBRUBY_ASSERT(ObjectCache_Get(key) == Qnil);
396 #if USE_SECONDARY_MAP
397   rb_mutex_lock(secondary_map_mutex);
398 #endif
399   VALUE key_rb = ObjectCache_GetKey(key, true);
400   rb_funcall(weak_obj_cache, item_set, 2, key_rb, val);
401 #if USE_SECONDARY_MAP
402   rb_mutex_unlock(secondary_map_mutex);
403 #endif
404   PBRUBY_ASSERT(ObjectCache_Get(key) == val);
405 }
406 
407 // Returns the cached object for this key, if any. Otherwise returns Qnil.
ObjectCache_Get(const void * key)408 VALUE ObjectCache_Get(const void *key) {
409   VALUE key_rb = ObjectCache_GetKey(key, false);
410   return rb_funcall(weak_obj_cache, item_get, 1, key_rb);
411 }
412 
413 /*
414  * call-seq:
415  *     Google::Protobuf.discard_unknown(msg)
416  *
417  * Discard unknown fields in the given message object and recursively discard
418  * unknown fields in submessages.
419  */
Google_Protobuf_discard_unknown(VALUE self,VALUE msg_rb)420 static VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
421   const upb_MessageDef *m;
422   upb_Message *msg = Message_GetMutable(msg_rb, &m);
423   if (!upb_Message_DiscardUnknown(msg, m, 128)) {
424     rb_raise(rb_eRuntimeError, "Messages nested too deeply.");
425   }
426 
427   return Qnil;
428 }
429 
430 /*
431  * call-seq:
432  *     Google::Protobuf.deep_copy(obj) => copy_of_obj
433  *
434  * Performs a deep copy of a RepeatedField instance, a Map instance, or a
435  * message object, recursively copying its members.
436  */
Google_Protobuf_deep_copy(VALUE self,VALUE obj)437 VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
438   VALUE klass = CLASS_OF(obj);
439   if (klass == cRepeatedField) {
440     return RepeatedField_deep_copy(obj);
441   } else if (klass == cMap) {
442     return Map_deep_copy(obj);
443   } else {
444     VALUE new_arena_rb = Arena_new();
445     upb_Arena *new_arena = Arena_get(new_arena_rb);
446     const upb_MessageDef *m;
447     const upb_Message *msg = Message_Get(obj, &m);
448     upb_Message *new_msg = Message_deep_copy(msg, m, new_arena);
449     return Message_GetRubyWrapper(new_msg, m, new_arena_rb);
450   }
451 }
452 
453 // -----------------------------------------------------------------------------
454 // Initialization/entry point.
455 // -----------------------------------------------------------------------------
456 
457 // This must be named "Init_protobuf_c" because the Ruby module is named
458 // "protobuf_c" -- the VM looks for this symbol in our .so.
Init_protobuf_c()459 __attribute__((visibility("default"))) void Init_protobuf_c() {
460   ObjectCache_Init();
461 
462   VALUE google = rb_define_module("Google");
463   VALUE protobuf = rb_define_module_under(google, "Protobuf");
464 
465   Arena_register(protobuf);
466   Defs_register(protobuf);
467   RepeatedField_register(protobuf);
468   Map_register(protobuf);
469   Message_register(protobuf);
470 
471   cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
472   rb_gc_register_mark_object(cParseError);
473   cTypeError = rb_const_get(protobuf, rb_intern("TypeError"));
474   rb_gc_register_mark_object(cTypeError);
475 
476   rb_define_singleton_method(protobuf, "discard_unknown",
477                              Google_Protobuf_discard_unknown, 1);
478   rb_define_singleton_method(protobuf, "deep_copy", Google_Protobuf_deep_copy,
479                              1);
480 }
481