xref: /aosp_15_r20/external/protobuf/ruby/ext/google/protobuf_c/convert.c (revision 1b3f573f81763fcece89efc2b6a5209149e44ab8)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // -----------------------------------------------------------------------------
32 // Ruby <-> upb data conversion functions.
33 //
34 // This file Also contains a few other assorted algorithms on upb_MessageValue.
35 //
36 // None of the algorithms in this file require any access to the internal
37 // representation of Ruby or upb objects.
38 // -----------------------------------------------------------------------------
39 
40 #include "convert.h"
41 
42 #include "message.h"
43 #include "protobuf.h"
44 
Convert_StringData(VALUE str,upb_Arena * arena)45 static upb_StringView Convert_StringData(VALUE str, upb_Arena* arena) {
46   upb_StringView ret;
47   if (arena) {
48     char* ptr = upb_Arena_Malloc(arena, RSTRING_LEN(str));
49     memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str));
50     ret.data = ptr;
51   } else {
52     // Data is only needed temporarily (within map lookup).
53     ret.data = RSTRING_PTR(str);
54   }
55   ret.size = RSTRING_LEN(str);
56   return ret;
57 }
58 
is_ruby_num(VALUE value)59 static bool is_ruby_num(VALUE value) {
60   return (TYPE(value) == T_FLOAT || TYPE(value) == T_FIXNUM ||
61           TYPE(value) == T_BIGNUM);
62 }
63 
Convert_CheckInt(const char * name,upb_CType type,VALUE val)64 static void Convert_CheckInt(const char* name, upb_CType type, VALUE val) {
65   if (!is_ruby_num(val)) {
66     rb_raise(cTypeError,
67              "Expected number type for integral field '%s' (given %s).", name,
68              rb_class2name(CLASS_OF(val)));
69   }
70 
71   // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
72   // bound; we just need to do precision checks (i.e., disallow rounding) and
73   // check for < 0 on unsigned types.
74   if (TYPE(val) == T_FLOAT) {
75     double dbl_val = NUM2DBL(val);
76     if (floor(dbl_val) != dbl_val) {
77       rb_raise(rb_eRangeError,
78                "Non-integral floating point value assigned to integer field "
79                "'%s' (given %s).",
80                name, rb_class2name(CLASS_OF(val)));
81     }
82   }
83   if (type == kUpb_CType_UInt32 || type == kUpb_CType_UInt64) {
84     if (NUM2DBL(val) < 0) {
85       rb_raise(
86           rb_eRangeError,
87           "Assigning negative value to unsigned integer field '%s' (given %s).",
88           name, rb_class2name(CLASS_OF(val)));
89     }
90   }
91 }
92 
Convert_ToEnum(VALUE value,const char * name,const upb_EnumDef * e)93 static int32_t Convert_ToEnum(VALUE value, const char* name,
94                               const upb_EnumDef* e) {
95   int32_t val;
96 
97   switch (TYPE(value)) {
98     case T_FLOAT:
99     case T_FIXNUM:
100     case T_BIGNUM:
101       Convert_CheckInt(name, kUpb_CType_Int32, value);
102       val = NUM2INT(value);
103       break;
104     case T_STRING: {
105       const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNameWithSize(
106           e, RSTRING_PTR(value), RSTRING_LEN(value));
107       if (!ev) goto unknownval;
108       val = upb_EnumValueDef_Number(ev);
109       break;
110     }
111     case T_SYMBOL: {
112       const upb_EnumValueDef* ev =
113           upb_EnumDef_FindValueByName(e, rb_id2name(SYM2ID(value)));
114       if (!ev)
115         goto unknownval;
116       val = upb_EnumValueDef_Number(ev);
117       break;
118     }
119     default:
120       rb_raise(cTypeError,
121                "Expected number or symbol type for enum field '%s'.", name);
122   }
123 
124   return val;
125 
126 unknownval:
127   rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
128 }
129 
Convert_RubyToUpb(VALUE value,const char * name,TypeInfo type_info,upb_Arena * arena)130 upb_MessageValue Convert_RubyToUpb(VALUE value, const char* name,
131                                    TypeInfo type_info, upb_Arena* arena) {
132   upb_MessageValue ret;
133 
134   switch (type_info.type) {
135     case kUpb_CType_Float:
136       if (!is_ruby_num(value)) {
137         rb_raise(cTypeError,
138                  "Expected number type for float field '%s' (given %s).", name,
139                  rb_class2name(CLASS_OF(value)));
140       }
141       ret.float_val = NUM2DBL(value);
142       break;
143     case kUpb_CType_Double:
144       if (!is_ruby_num(value)) {
145         rb_raise(cTypeError,
146                  "Expected number type for double field '%s' (given %s).", name,
147                  rb_class2name(CLASS_OF(value)));
148       }
149       ret.double_val = NUM2DBL(value);
150       break;
151     case kUpb_CType_Bool: {
152       if (value == Qtrue) {
153         ret.bool_val = 1;
154       } else if (value == Qfalse) {
155         ret.bool_val = 0;
156       } else {
157         rb_raise(cTypeError,
158                  "Invalid argument for boolean field '%s' (given %s).", name,
159                  rb_class2name(CLASS_OF(value)));
160       }
161       break;
162     }
163     case kUpb_CType_String: {
164       VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding());
165       if (rb_obj_class(value) == rb_cSymbol) {
166         value = rb_funcall(value, rb_intern("to_s"), 0);
167       } else if (rb_obj_class(value) != rb_cString) {
168         rb_raise(cTypeError,
169                  "Invalid argument for string field '%s' (given %s).", name,
170                  rb_class2name(CLASS_OF(value)));
171       }
172 
173       if (rb_obj_encoding(value) != utf8) {
174         // Note: this will not duplicate underlying string data unless
175         // necessary.
176         value = rb_str_encode(value, utf8, 0, Qnil);
177 
178         if (rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
179           rb_raise(rb_eEncodingError, "String is invalid UTF-8");
180         }
181       }
182 
183       ret.str_val = Convert_StringData(value, arena);
184       break;
185     }
186     case kUpb_CType_Bytes: {
187       VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding());
188       if (rb_obj_class(value) != rb_cString) {
189         rb_raise(cTypeError,
190                  "Invalid argument for bytes field '%s' (given %s).", name,
191                  rb_class2name(CLASS_OF(value)));
192       }
193 
194       if (rb_obj_encoding(value) != bytes) {
195         // Note: this will not duplicate underlying string data unless
196         // necessary.
197         // TODO(haberman): is this really necessary to get raw bytes?
198         value = rb_str_encode(value, bytes, 0, Qnil);
199       }
200 
201       ret.str_val = Convert_StringData(value, arena);
202       break;
203     }
204     case kUpb_CType_Message:
205       ret.msg_val =
206           Message_GetUpbMessage(value, type_info.def.msgdef, name, arena);
207       break;
208     case kUpb_CType_Enum:
209       ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef);
210       break;
211     case kUpb_CType_Int32:
212     case kUpb_CType_Int64:
213     case kUpb_CType_UInt32:
214     case kUpb_CType_UInt64:
215       Convert_CheckInt(name, type_info.type, value);
216       switch (type_info.type) {
217         case kUpb_CType_Int32:
218           ret.int32_val = NUM2INT(value);
219           break;
220         case kUpb_CType_Int64:
221           ret.int64_val = NUM2LL(value);
222           break;
223         case kUpb_CType_UInt32:
224           ret.uint32_val = NUM2UINT(value);
225           break;
226         case kUpb_CType_UInt64:
227           ret.uint64_val = NUM2ULL(value);
228           break;
229         default:
230           break;
231       }
232       break;
233     default:
234       break;
235   }
236 
237   return ret;
238 }
239 
Convert_UpbToRuby(upb_MessageValue upb_val,TypeInfo type_info,VALUE arena)240 VALUE Convert_UpbToRuby(upb_MessageValue upb_val, TypeInfo type_info,
241                         VALUE arena) {
242   switch (type_info.type) {
243     case kUpb_CType_Float:
244       return DBL2NUM(upb_val.float_val);
245     case kUpb_CType_Double:
246       return DBL2NUM(upb_val.double_val);
247     case kUpb_CType_Bool:
248       return upb_val.bool_val ? Qtrue : Qfalse;
249     case kUpb_CType_Int32:
250       return INT2NUM(upb_val.int32_val);
251     case kUpb_CType_Int64:
252       return LL2NUM(upb_val.int64_val);
253     case kUpb_CType_UInt32:
254       return UINT2NUM(upb_val.uint32_val);
255     case kUpb_CType_UInt64:
256       return ULL2NUM(upb_val.int64_val);
257     case kUpb_CType_Enum: {
258       const upb_EnumValueDef *ev = upb_EnumDef_FindValueByNumber(
259           type_info.def.enumdef, upb_val.int32_val);
260       if (ev) {
261         return ID2SYM(rb_intern(upb_EnumValueDef_Name(ev)));
262       } else {
263         return INT2NUM(upb_val.int32_val);
264       }
265     }
266     case kUpb_CType_String: {
267       VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
268       rb_enc_associate(str_rb, rb_utf8_encoding());
269       rb_obj_freeze(str_rb);
270       return str_rb;
271     }
272     case kUpb_CType_Bytes: {
273       VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
274       rb_enc_associate(str_rb, rb_ascii8bit_encoding());
275       rb_obj_freeze(str_rb);
276       return str_rb;
277     }
278     case kUpb_CType_Message:
279       return Message_GetRubyWrapper((upb_Message*)upb_val.msg_val,
280                                     type_info.def.msgdef, arena);
281     default:
282       rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d",
283                (int)type_info.type);
284   }
285 }
286 
Msgval_DeepCopy(upb_MessageValue msgval,TypeInfo type_info,upb_Arena * arena)287 upb_MessageValue Msgval_DeepCopy(upb_MessageValue msgval, TypeInfo type_info,
288                                  upb_Arena* arena) {
289   upb_MessageValue new_msgval;
290 
291   switch (type_info.type) {
292     default:
293       memcpy(&new_msgval, &msgval, sizeof(msgval));
294       break;
295     case kUpb_CType_String:
296     case kUpb_CType_Bytes: {
297       size_t n = msgval.str_val.size;
298       char* mem = upb_Arena_Malloc(arena, n);
299       new_msgval.str_val.data = mem;
300       new_msgval.str_val.size = n;
301       memcpy(mem, msgval.str_val.data, n);
302       break;
303     }
304     case kUpb_CType_Message:
305       new_msgval.msg_val =
306           Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena);
307       break;
308   }
309 
310   return new_msgval;
311 }
312 
Msgval_IsEqual(upb_MessageValue val1,upb_MessageValue val2,TypeInfo type_info)313 bool Msgval_IsEqual(upb_MessageValue val1, upb_MessageValue val2,
314                     TypeInfo type_info) {
315   switch (type_info.type) {
316     case kUpb_CType_Bool:
317       return memcmp(&val1, &val2, 1) == 0;
318     case kUpb_CType_Float:
319     case kUpb_CType_Int32:
320     case kUpb_CType_UInt32:
321     case kUpb_CType_Enum:
322       return memcmp(&val1, &val2, 4) == 0;
323     case kUpb_CType_Double:
324     case kUpb_CType_Int64:
325     case kUpb_CType_UInt64:
326       return memcmp(&val1, &val2, 8) == 0;
327     case kUpb_CType_String:
328     case kUpb_CType_Bytes:
329       return val1.str_val.size == val2.str_val.size &&
330              memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
331                  0;
332     case kUpb_CType_Message:
333       return Message_Equal(val1.msg_val, val2.msg_val, type_info.def.msgdef);
334     default:
335       rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
336   }
337 }
338 
Msgval_GetHash(upb_MessageValue val,TypeInfo type_info,uint64_t seed)339 uint64_t Msgval_GetHash(upb_MessageValue val, TypeInfo type_info,
340                         uint64_t seed) {
341   switch (type_info.type) {
342     case kUpb_CType_Bool:
343       return _upb_Hash(&val, 1, seed);
344     case kUpb_CType_Float:
345     case kUpb_CType_Int32:
346     case kUpb_CType_UInt32:
347     case kUpb_CType_Enum:
348       return _upb_Hash(&val, 4, seed);
349     case kUpb_CType_Double:
350     case kUpb_CType_Int64:
351     case kUpb_CType_UInt64:
352       return _upb_Hash(&val, 8, seed);
353     case kUpb_CType_String:
354     case kUpb_CType_Bytes:
355       return _upb_Hash(val.str_val.data, val.str_val.size, seed);
356     case kUpb_CType_Message:
357       return Message_Hash(val.msg_val, type_info.def.msgdef, seed);
358     default:
359       rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
360   }
361 }
362