1 /* Amalgamated source file */
2 #include "ruby-upb.h"
3 /*
4 * Copyright (c) 2009-2021, Google LLC
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Google LLC nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * This is where we define macros used across upb.
32 *
33 * All of these macros are undef'd in port_undef.inc to avoid leaking them to
34 * users.
35 *
36 * The correct usage is:
37 *
38 * #include "upb/foobar.h"
39 * #include "upb/baz.h"
40 *
41 * // MUST be last included header.
42 * #include "upb/port_def.inc"
43 *
44 * // Code for this file.
45 * // <...>
46 *
47 * // Can be omitted for .c files, required for .h.
48 * #include "upb/port_undef.inc"
49 *
50 * This file is private and must not be included by users!
51 */
52
53 #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
54 (defined(__cplusplus) && __cplusplus >= 201103L) || \
55 (defined(_MSC_VER) && _MSC_VER >= 1900))
56 #error upb requires C99 or C++11 or MSVC >= 2015.
57 #endif
58
59 #include <stdint.h>
60 #include <stddef.h>
61
62 #if UINTPTR_MAX == 0xffffffff
63 #define UPB_SIZE(size32, size64) size32
64 #else
65 #define UPB_SIZE(size32, size64) size64
66 #endif
67
68 /* If we always read/write as a consistent type to each address, this shouldn't
69 * violate aliasing.
70 */
71 #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
72
73 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
74 *UPB_PTR_AT(msg, case_offset, int) == case_val \
75 ? *UPB_PTR_AT(msg, offset, fieldtype) \
76 : default
77
78 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
79 *UPB_PTR_AT(msg, case_offset, int) = case_val; \
80 *UPB_PTR_AT(msg, offset, fieldtype) = value;
81
82 #define UPB_MAPTYPE_STRING 0
83
84 /* UPB_INLINE: inline if possible, emit standalone code if required. */
85 #ifdef __cplusplus
86 #define UPB_INLINE inline
87 #elif defined (__GNUC__) || defined(__clang__)
88 #define UPB_INLINE static __inline__
89 #else
90 #define UPB_INLINE static
91 #endif
92
93 #define UPB_MALLOC_ALIGN 8
94 #define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
95 #define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
96 #define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, UPB_MALLOC_ALIGN)
97 #define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
98
99 // Hints to the compiler about likely/unlikely branches.
100 #if defined (__GNUC__) || defined(__clang__)
101 #define UPB_LIKELY(x) __builtin_expect((bool)(x), 1)
102 #define UPB_UNLIKELY(x) __builtin_expect((bool)(x), 0)
103 #else
104 #define UPB_LIKELY(x) (x)
105 #define UPB_UNLIKELY(x) (x)
106 #endif
107
108 // Macros for function attributes on compilers that support them.
109 #ifdef __GNUC__
110 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
111 #define UPB_NOINLINE __attribute__((noinline))
112 #define UPB_NORETURN __attribute__((__noreturn__))
113 #define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
114 #elif defined(_MSC_VER)
115 #define UPB_NOINLINE
116 #define UPB_FORCEINLINE
117 #define UPB_NORETURN __declspec(noreturn)
118 #define UPB_PRINTF(str, first_vararg)
119 #else /* !defined(__GNUC__) */
120 #define UPB_FORCEINLINE
121 #define UPB_NOINLINE
122 #define UPB_NORETURN
123 #define UPB_PRINTF(str, first_vararg)
124 #endif
125
126 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
127 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
128
129 #define UPB_UNUSED(var) (void)var
130
131 // UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
132 #ifdef NDEBUG
133 #ifdef __GNUC__
134 #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
135 #elif defined _MSC_VER
136 #define UPB_ASSUME(expr) if (!(expr)) __assume(0)
137 #else
138 #define UPB_ASSUME(expr) do {} while (false && (expr))
139 #endif
140 #else
141 #define UPB_ASSUME(expr) assert(expr)
142 #endif
143
144 /* UPB_ASSERT(): in release mode, we use the expression without letting it be
145 * evaluated. This prevents "unused variable" warnings. */
146 #ifdef NDEBUG
147 #define UPB_ASSERT(expr) do {} while (false && (expr))
148 #else
149 #define UPB_ASSERT(expr) assert(expr)
150 #endif
151
152 #if defined(__GNUC__) || defined(__clang__)
153 #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
154 #else
155 #define UPB_UNREACHABLE() do { assert(0); } while(0)
156 #endif
157
158 /* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
159 #ifdef __APPLE__
160 #define UPB_SETJMP(buf) _setjmp(buf)
161 #define UPB_LONGJMP(buf, val) _longjmp(buf, val)
162 #else
163 #define UPB_SETJMP(buf) setjmp(buf)
164 #define UPB_LONGJMP(buf, val) longjmp(buf, val)
165 #endif
166
167 /* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */
168 #define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr))
169
170 /* Configure whether fasttable is switched on or not. *************************/
171
172 #ifdef __has_attribute
173 #define UPB_HAS_ATTRIBUTE(x) __has_attribute(x)
174 #else
175 #define UPB_HAS_ATTRIBUTE(x) 0
176 #endif
177
178 #if UPB_HAS_ATTRIBUTE(musttail)
179 #define UPB_MUSTTAIL __attribute__((musttail))
180 #else
181 #define UPB_MUSTTAIL
182 #endif
183
184 #undef UPB_HAS_ATTRIBUTE
185
186 /* This check is not fully robust: it does not require that we have "musttail"
187 * support available. We need tail calls to avoid consuming arbitrary amounts
188 * of stack space.
189 *
190 * GCC/Clang can mostly be trusted to generate tail calls as long as
191 * optimization is enabled, but, debug builds will not generate tail calls
192 * unless "musttail" is available.
193 *
194 * We should probably either:
195 * 1. require that the compiler supports musttail.
196 * 2. add some fallback code for when musttail isn't available (ie. return
197 * instead of tail calling). This is safe and portable, but this comes at
198 * a CPU cost.
199 */
200 #if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__)
201 #define UPB_FASTTABLE_SUPPORTED 1
202 #else
203 #define UPB_FASTTABLE_SUPPORTED 0
204 #endif
205
206 /* define UPB_ENABLE_FASTTABLE to force fast table support.
207 * This is useful when we want to ensure we are really getting fasttable,
208 * for example for testing or benchmarking. */
209 #if defined(UPB_ENABLE_FASTTABLE)
210 #if !UPB_FASTTABLE_SUPPORTED
211 #error fasttable is x86-64/ARM64 only and requires GCC or Clang.
212 #endif
213 #define UPB_FASTTABLE 1
214 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
215 * This is useful for releasing code that might be used on multiple platforms,
216 * for example the PHP or Ruby C extensions. */
217 #elif defined(UPB_TRY_ENABLE_FASTTABLE)
218 #define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
219 #else
220 #define UPB_FASTTABLE 0
221 #endif
222
223 /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
224 * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */
225 #if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE)
226 #define UPB_FASTTABLE_INIT(...)
227 #else
228 #define UPB_FASTTABLE_INIT(...) __VA_ARGS__
229 #endif
230
231 #undef UPB_FASTTABLE_SUPPORTED
232
233 /* ASAN poisoning (for arena) *************************************************/
234
235 #if defined(__SANITIZE_ADDRESS__)
236 #define UPB_ASAN 1
237 #ifdef __cplusplus
238 extern "C" {
239 #endif
240 void __asan_poison_memory_region(void const volatile *addr, size_t size);
241 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
242 #ifdef __cplusplus
243 } /* extern "C" */
244 #endif
245 #define UPB_POISON_MEMORY_REGION(addr, size) \
246 __asan_poison_memory_region((addr), (size))
247 #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
248 __asan_unpoison_memory_region((addr), (size))
249 #else
250 #define UPB_ASAN 0
251 #define UPB_POISON_MEMORY_REGION(addr, size) \
252 ((void)(addr), (void)(size))
253 #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
254 ((void)(addr), (void)(size))
255 #endif
256
257 /* Disable proto2 arena behavior (TEMPORARY) **********************************/
258
259 #ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
260 #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
261 #else
262 #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
263 #endif
264
265 /** upb/collections.c ************************************************************/
266
267 #include <string.h>
268
269
270 /* Strings/bytes are special-cased in maps. */
271 static char _upb_CTypeo_mapsize[12] = {
272 0,
273 1, /* kUpb_CType_Bool */
274 4, /* kUpb_CType_Float */
275 4, /* kUpb_CType_Int32 */
276 4, /* kUpb_CType_UInt32 */
277 4, /* kUpb_CType_Enum */
278 sizeof(void*), /* kUpb_CType_Message */
279 8, /* kUpb_CType_Double */
280 8, /* kUpb_CType_Int64 */
281 8, /* kUpb_CType_UInt64 */
282 0, /* kUpb_CType_String */
283 0, /* kUpb_CType_Bytes */
284 };
285
286 static const char _upb_CTypeo_sizelg2[12] = {
287 0,
288 0, /* kUpb_CType_Bool */
289 2, /* kUpb_CType_Float */
290 2, /* kUpb_CType_Int32 */
291 2, /* kUpb_CType_UInt32 */
292 2, /* kUpb_CType_Enum */
293 UPB_SIZE(2, 3), /* kUpb_CType_Message */
294 3, /* kUpb_CType_Double */
295 3, /* kUpb_CType_Int64 */
296 3, /* kUpb_CType_UInt64 */
297 UPB_SIZE(3, 4), /* kUpb_CType_String */
298 UPB_SIZE(3, 4), /* kUpb_CType_Bytes */
299 };
300
301 /** upb_Array *****************************************************************/
302
upb_Array_New(upb_Arena * a,upb_CType type)303 upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) {
304 return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]);
305 }
306
upb_Array_Size(const upb_Array * arr)307 size_t upb_Array_Size(const upb_Array* arr) { return arr->len; }
308
upb_Array_Get(const upb_Array * arr,size_t i)309 upb_MessageValue upb_Array_Get(const upb_Array* arr, size_t i) {
310 upb_MessageValue ret;
311 const char* data = _upb_array_constptr(arr);
312 int lg2 = arr->data & 7;
313 UPB_ASSERT(i < arr->len);
314 memcpy(&ret, data + (i << lg2), 1 << lg2);
315 return ret;
316 }
317
upb_Array_Set(upb_Array * arr,size_t i,upb_MessageValue val)318 void upb_Array_Set(upb_Array* arr, size_t i, upb_MessageValue val) {
319 char* data = _upb_array_ptr(arr);
320 int lg2 = arr->data & 7;
321 UPB_ASSERT(i < arr->len);
322 memcpy(data + (i << lg2), &val, 1 << lg2);
323 }
324
upb_Array_Append(upb_Array * arr,upb_MessageValue val,upb_Arena * arena)325 bool upb_Array_Append(upb_Array* arr, upb_MessageValue val, upb_Arena* arena) {
326 if (!upb_Array_Resize(arr, arr->len + 1, arena)) {
327 return false;
328 }
329 upb_Array_Set(arr, arr->len - 1, val);
330 return true;
331 }
332
upb_Array_Move(upb_Array * arr,size_t dst_idx,size_t src_idx,size_t count)333 void upb_Array_Move(upb_Array* arr, size_t dst_idx, size_t src_idx,
334 size_t count) {
335 char* data = _upb_array_ptr(arr);
336 int lg2 = arr->data & 7;
337 memmove(&data[dst_idx << lg2], &data[src_idx << lg2], count << lg2);
338 }
339
upb_Array_Insert(upb_Array * arr,size_t i,size_t count,upb_Arena * arena)340 bool upb_Array_Insert(upb_Array* arr, size_t i, size_t count,
341 upb_Arena* arena) {
342 UPB_ASSERT(i <= arr->len);
343 UPB_ASSERT(count + arr->len >= count);
344 size_t oldsize = arr->len;
345 if (!upb_Array_Resize(arr, arr->len + count, arena)) {
346 return false;
347 }
348 upb_Array_Move(arr, i + count, i, oldsize - i);
349 return true;
350 }
351
352 /*
353 * i end arr->len
354 * |------------|XXXXXXXX|--------|
355 */
upb_Array_Delete(upb_Array * arr,size_t i,size_t count)356 void upb_Array_Delete(upb_Array* arr, size_t i, size_t count) {
357 size_t end = i + count;
358 UPB_ASSERT(i <= end);
359 UPB_ASSERT(end <= arr->len);
360 upb_Array_Move(arr, i, end, arr->len - end);
361 arr->len -= count;
362 }
363
upb_Array_Resize(upb_Array * arr,size_t size,upb_Arena * arena)364 bool upb_Array_Resize(upb_Array* arr, size_t size, upb_Arena* arena) {
365 return _upb_Array_Resize(arr, size, arena);
366 }
367
368 /** upb_Map *******************************************************************/
369
upb_Map_New(upb_Arena * a,upb_CType key_type,upb_CType value_type)370 upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) {
371 return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type],
372 _upb_CTypeo_mapsize[value_type]);
373 }
374
upb_Map_Size(const upb_Map * map)375 size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); }
376
upb_Map_Get(const upb_Map * map,upb_MessageValue key,upb_MessageValue * val)377 bool upb_Map_Get(const upb_Map* map, upb_MessageValue key,
378 upb_MessageValue* val) {
379 return _upb_Map_Get(map, &key, map->key_size, val, map->val_size);
380 }
381
upb_Map_Clear(upb_Map * map)382 void upb_Map_Clear(upb_Map* map) { _upb_Map_Clear(map); }
383
upb_Map_Insert(upb_Map * map,upb_MessageValue key,upb_MessageValue val,upb_Arena * arena)384 upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key,
385 upb_MessageValue val, upb_Arena* arena) {
386 return (upb_MapInsertStatus)_upb_Map_Insert(map, &key, map->key_size, &val,
387 map->val_size, arena);
388 }
389
upb_Map_Delete(upb_Map * map,upb_MessageValue key)390 bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) {
391 return _upb_Map_Delete(map, &key, map->key_size);
392 }
393
upb_MapIterator_Next(const upb_Map * map,size_t * iter)394 bool upb_MapIterator_Next(const upb_Map* map, size_t* iter) {
395 return _upb_map_next(map, iter);
396 }
397
upb_MapIterator_Done(const upb_Map * map,size_t iter)398 bool upb_MapIterator_Done(const upb_Map* map, size_t iter) {
399 upb_strtable_iter i;
400 UPB_ASSERT(iter != kUpb_Map_Begin);
401 i.t = &map->table;
402 i.index = iter;
403 return upb_strtable_done(&i);
404 }
405
406 /* Returns the key and value for this entry of the map. */
upb_MapIterator_Key(const upb_Map * map,size_t iter)407 upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter) {
408 upb_strtable_iter i;
409 upb_MessageValue ret;
410 i.t = &map->table;
411 i.index = iter;
412 _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
413 return ret;
414 }
415
upb_MapIterator_Value(const upb_Map * map,size_t iter)416 upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter) {
417 upb_strtable_iter i;
418 upb_MessageValue ret;
419 i.t = &map->table;
420 i.index = iter;
421 _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
422 return ret;
423 }
424
425 /* void upb_MapIterator_SetValue(upb_Map *map, size_t iter, upb_MessageValue
426 * value); */
427
428 /** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input
429 * file:
430 *
431 * google/protobuf/descriptor.proto
432 *
433 * Do not edit -- your changes will be discarded when the file is
434 * regenerated. */
435
436 #include <stddef.h>
437
438
439 static const upb_MiniTable_Sub google_protobuf_FileDescriptorSet_submsgs[1] = {
440 {.submsg = &google_protobuf_FileDescriptorProto_msginit},
441 };
442
443 static const upb_MiniTable_Field google_protobuf_FileDescriptorSet__fields[1] = {
444 {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
445 };
446
447 const upb_MiniTable google_protobuf_FileDescriptorSet_msginit = {
448 &google_protobuf_FileDescriptorSet_submsgs[0],
449 &google_protobuf_FileDescriptorSet__fields[0],
450 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
451 };
452
453 static const upb_MiniTable_Sub google_protobuf_FileDescriptorProto_submsgs[6] = {
454 {.submsg = &google_protobuf_DescriptorProto_msginit},
455 {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
456 {.submsg = &google_protobuf_ServiceDescriptorProto_msginit},
457 {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
458 {.submsg = &google_protobuf_FileOptions_msginit},
459 {.submsg = &google_protobuf_SourceCodeInfo_msginit},
460 };
461
462 static const upb_MiniTable_Field google_protobuf_FileDescriptorProto__fields[12] = {
463 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
464 {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
465 {3, UPB_SIZE(20, 40), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
466 {4, UPB_SIZE(24, 48), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
467 {5, UPB_SIZE(28, 56), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
468 {6, UPB_SIZE(32, 64), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
469 {7, UPB_SIZE(36, 72), UPB_SIZE(0, 0), 3, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
470 {8, UPB_SIZE(40, 80), UPB_SIZE(3, 3), 4, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
471 {9, UPB_SIZE(44, 88), UPB_SIZE(4, 4), 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
472 {10, UPB_SIZE(48, 96), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
473 {11, UPB_SIZE(52, 104), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
474 {12, UPB_SIZE(56, 112), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
475 };
476
477 const upb_MiniTable google_protobuf_FileDescriptorProto_msginit = {
478 &google_protobuf_FileDescriptorProto_submsgs[0],
479 &google_protobuf_FileDescriptorProto__fields[0],
480 UPB_SIZE(64, 128), 12, kUpb_ExtMode_NonExtendable, 12, 255, 0,
481 };
482
483 static const upb_MiniTable_Sub google_protobuf_DescriptorProto_submsgs[8] = {
484 {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
485 {.submsg = &google_protobuf_DescriptorProto_msginit},
486 {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
487 {.submsg = &google_protobuf_DescriptorProto_ExtensionRange_msginit},
488 {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
489 {.submsg = &google_protobuf_MessageOptions_msginit},
490 {.submsg = &google_protobuf_OneofDescriptorProto_msginit},
491 {.submsg = &google_protobuf_DescriptorProto_ReservedRange_msginit},
492 };
493
494 static const upb_MiniTable_Field google_protobuf_DescriptorProto__fields[10] = {
495 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
496 {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
497 {3, UPB_SIZE(16, 32), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
498 {4, UPB_SIZE(20, 40), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
499 {5, UPB_SIZE(24, 48), UPB_SIZE(0, 0), 3, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
500 {6, UPB_SIZE(28, 56), UPB_SIZE(0, 0), 4, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
501 {7, UPB_SIZE(32, 64), UPB_SIZE(2, 2), 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
502 {8, UPB_SIZE(36, 72), UPB_SIZE(0, 0), 6, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
503 {9, UPB_SIZE(40, 80), UPB_SIZE(0, 0), 7, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
504 {10, UPB_SIZE(44, 88), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
505 };
506
507 const upb_MiniTable google_protobuf_DescriptorProto_msginit = {
508 &google_protobuf_DescriptorProto_submsgs[0],
509 &google_protobuf_DescriptorProto__fields[0],
510 UPB_SIZE(48, 96), 10, kUpb_ExtMode_NonExtendable, 10, 255, 0,
511 };
512
513 static const upb_MiniTable_Sub google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
514 {.submsg = &google_protobuf_ExtensionRangeOptions_msginit},
515 };
516
517 static const upb_MiniTable_Field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
518 {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
519 {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
520 {3, UPB_SIZE(12, 16), UPB_SIZE(3, 3), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
521 };
522
523 const upb_MiniTable google_protobuf_DescriptorProto_ExtensionRange_msginit = {
524 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
525 &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
526 UPB_SIZE(16, 24), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
527 };
528
529 static const upb_MiniTable_Field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
530 {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
531 {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
532 };
533
534 const upb_MiniTable google_protobuf_DescriptorProto_ReservedRange_msginit = {
535 NULL,
536 &google_protobuf_DescriptorProto_ReservedRange__fields[0],
537 UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
538 };
539
540 static const upb_MiniTable_Sub google_protobuf_ExtensionRangeOptions_submsgs[1] = {
541 {.submsg = &google_protobuf_UninterpretedOption_msginit},
542 };
543
544 static const upb_MiniTable_Field google_protobuf_ExtensionRangeOptions__fields[1] = {
545 {999, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
546 };
547
548 const upb_MiniTable google_protobuf_ExtensionRangeOptions_msginit = {
549 &google_protobuf_ExtensionRangeOptions_submsgs[0],
550 &google_protobuf_ExtensionRangeOptions__fields[0],
551 UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
552 };
553
554 static const upb_MiniTable_Sub google_protobuf_FieldDescriptorProto_submsgs[3] = {
555 {.subenum = &google_protobuf_FieldDescriptorProto_Label_enuminit},
556 {.subenum = &google_protobuf_FieldDescriptorProto_Type_enuminit},
557 {.submsg = &google_protobuf_FieldOptions_msginit},
558 };
559
560 static const upb_MiniTable_Field google_protobuf_FieldDescriptorProto__fields[11] = {
561 {1, UPB_SIZE(24, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
562 {2, UPB_SIZE(32, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
563 {3, UPB_SIZE(4, 4), UPB_SIZE(3, 3), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
564 {4, UPB_SIZE(8, 8), UPB_SIZE(4, 4), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
565 {5, UPB_SIZE(12, 12), UPB_SIZE(5, 5), 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
566 {6, UPB_SIZE(40, 56), UPB_SIZE(6, 6), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
567 {7, UPB_SIZE(48, 72), UPB_SIZE(7, 7), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
568 {8, UPB_SIZE(56, 88), UPB_SIZE(8, 8), 2, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
569 {9, UPB_SIZE(16, 16), UPB_SIZE(9, 9), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
570 {10, UPB_SIZE(60, 96), UPB_SIZE(10, 10), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
571 {17, UPB_SIZE(20, 20), UPB_SIZE(11, 11), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
572 };
573
574 const upb_MiniTable google_protobuf_FieldDescriptorProto_msginit = {
575 &google_protobuf_FieldDescriptorProto_submsgs[0],
576 &google_protobuf_FieldDescriptorProto__fields[0],
577 UPB_SIZE(72, 112), 11, kUpb_ExtMode_NonExtendable, 10, 255, 0,
578 };
579
580 static const upb_MiniTable_Sub google_protobuf_OneofDescriptorProto_submsgs[1] = {
581 {.submsg = &google_protobuf_OneofOptions_msginit},
582 };
583
584 static const upb_MiniTable_Field google_protobuf_OneofDescriptorProto__fields[2] = {
585 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
586 {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
587 };
588
589 const upb_MiniTable google_protobuf_OneofDescriptorProto_msginit = {
590 &google_protobuf_OneofDescriptorProto_submsgs[0],
591 &google_protobuf_OneofDescriptorProto__fields[0],
592 UPB_SIZE(16, 32), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
593 };
594
595 static const upb_MiniTable_Sub google_protobuf_EnumDescriptorProto_submsgs[3] = {
596 {.submsg = &google_protobuf_EnumValueDescriptorProto_msginit},
597 {.submsg = &google_protobuf_EnumOptions_msginit},
598 {.submsg = &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit},
599 };
600
601 static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto__fields[5] = {
602 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
603 {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
604 {3, UPB_SIZE(16, 32), UPB_SIZE(2, 2), 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
605 {4, UPB_SIZE(20, 40), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
606 {5, UPB_SIZE(24, 48), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
607 };
608
609 const upb_MiniTable google_protobuf_EnumDescriptorProto_msginit = {
610 &google_protobuf_EnumDescriptorProto_submsgs[0],
611 &google_protobuf_EnumDescriptorProto__fields[0],
612 UPB_SIZE(32, 56), 5, kUpb_ExtMode_NonExtendable, 5, 255, 0,
613 };
614
615 static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
616 {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
617 {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
618 };
619
620 const upb_MiniTable google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
621 NULL,
622 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
623 UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
624 };
625
626 static const upb_MiniTable_Sub google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
627 {.submsg = &google_protobuf_EnumValueOptions_msginit},
628 };
629
630 static const upb_MiniTable_Field google_protobuf_EnumValueDescriptorProto__fields[3] = {
631 {1, UPB_SIZE(8, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
632 {2, UPB_SIZE(4, 4), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
633 {3, UPB_SIZE(16, 24), UPB_SIZE(3, 3), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
634 };
635
636 const upb_MiniTable google_protobuf_EnumValueDescriptorProto_msginit = {
637 &google_protobuf_EnumValueDescriptorProto_submsgs[0],
638 &google_protobuf_EnumValueDescriptorProto__fields[0],
639 UPB_SIZE(24, 32), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
640 };
641
642 static const upb_MiniTable_Sub google_protobuf_ServiceDescriptorProto_submsgs[2] = {
643 {.submsg = &google_protobuf_MethodDescriptorProto_msginit},
644 {.submsg = &google_protobuf_ServiceOptions_msginit},
645 };
646
647 static const upb_MiniTable_Field google_protobuf_ServiceDescriptorProto__fields[3] = {
648 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
649 {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
650 {3, UPB_SIZE(16, 32), UPB_SIZE(2, 2), 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
651 };
652
653 const upb_MiniTable google_protobuf_ServiceDescriptorProto_msginit = {
654 &google_protobuf_ServiceDescriptorProto_submsgs[0],
655 &google_protobuf_ServiceDescriptorProto__fields[0],
656 UPB_SIZE(24, 40), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
657 };
658
659 static const upb_MiniTable_Sub google_protobuf_MethodDescriptorProto_submsgs[1] = {
660 {.submsg = &google_protobuf_MethodOptions_msginit},
661 };
662
663 static const upb_MiniTable_Field google_protobuf_MethodDescriptorProto__fields[6] = {
664 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
665 {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
666 {3, UPB_SIZE(20, 40), UPB_SIZE(3, 3), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
667 {4, UPB_SIZE(28, 56), UPB_SIZE(4, 4), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
668 {5, UPB_SIZE(1, 1), UPB_SIZE(5, 5), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
669 {6, UPB_SIZE(2, 2), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
670 };
671
672 const upb_MiniTable google_protobuf_MethodDescriptorProto_msginit = {
673 &google_protobuf_MethodDescriptorProto_submsgs[0],
674 &google_protobuf_MethodDescriptorProto__fields[0],
675 UPB_SIZE(32, 64), 6, kUpb_ExtMode_NonExtendable, 6, 255, 0,
676 };
677
678 static const upb_MiniTable_Sub google_protobuf_FileOptions_submsgs[2] = {
679 {.subenum = &google_protobuf_FileOptions_OptimizeMode_enuminit},
680 {.submsg = &google_protobuf_UninterpretedOption_msginit},
681 };
682
683 static const upb_MiniTable_Field google_protobuf_FileOptions__fields[21] = {
684 {1, UPB_SIZE(20, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
685 {8, UPB_SIZE(28, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
686 {9, UPB_SIZE(4, 4), UPB_SIZE(3, 3), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
687 {10, UPB_SIZE(8, 8), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
688 {11, UPB_SIZE(36, 56), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
689 {16, UPB_SIZE(9, 9), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
690 {17, UPB_SIZE(10, 10), UPB_SIZE(7, 7), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
691 {18, UPB_SIZE(11, 11), UPB_SIZE(8, 8), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
692 {20, UPB_SIZE(12, 12), UPB_SIZE(9, 9), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
693 {23, UPB_SIZE(13, 13), UPB_SIZE(10, 10), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
694 {27, UPB_SIZE(14, 14), UPB_SIZE(11, 11), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
695 {31, UPB_SIZE(15, 15), UPB_SIZE(12, 12), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
696 {36, UPB_SIZE(44, 72), UPB_SIZE(13, 13), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
697 {37, UPB_SIZE(52, 88), UPB_SIZE(14, 14), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
698 {39, UPB_SIZE(60, 104), UPB_SIZE(15, 15), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
699 {40, UPB_SIZE(68, 120), UPB_SIZE(16, 16), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
700 {41, UPB_SIZE(76, 136), UPB_SIZE(17, 17), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
701 {42, UPB_SIZE(16, 16), UPB_SIZE(18, 18), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
702 {44, UPB_SIZE(84, 152), UPB_SIZE(19, 19), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
703 {45, UPB_SIZE(92, 168), UPB_SIZE(20, 20), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
704 {999, UPB_SIZE(100, 184), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
705 };
706
707 const upb_MiniTable google_protobuf_FileOptions_msginit = {
708 &google_protobuf_FileOptions_submsgs[0],
709 &google_protobuf_FileOptions__fields[0],
710 UPB_SIZE(104, 192), 21, kUpb_ExtMode_Extendable, 1, 255, 0,
711 };
712
713 static const upb_MiniTable_Sub google_protobuf_MessageOptions_submsgs[1] = {
714 {.submsg = &google_protobuf_UninterpretedOption_msginit},
715 };
716
717 static const upb_MiniTable_Field google_protobuf_MessageOptions__fields[5] = {
718 {1, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
719 {2, UPB_SIZE(2, 2), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
720 {3, UPB_SIZE(3, 3), UPB_SIZE(3, 3), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
721 {7, UPB_SIZE(4, 4), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
722 {999, UPB_SIZE(8, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
723 };
724
725 const upb_MiniTable google_protobuf_MessageOptions_msginit = {
726 &google_protobuf_MessageOptions_submsgs[0],
727 &google_protobuf_MessageOptions__fields[0],
728 UPB_SIZE(16, 16), 5, kUpb_ExtMode_Extendable, 3, 255, 0,
729 };
730
731 static const upb_MiniTable_Sub google_protobuf_FieldOptions_submsgs[3] = {
732 {.subenum = &google_protobuf_FieldOptions_CType_enuminit},
733 {.subenum = &google_protobuf_FieldOptions_JSType_enuminit},
734 {.submsg = &google_protobuf_UninterpretedOption_msginit},
735 };
736
737 static const upb_MiniTable_Field google_protobuf_FieldOptions__fields[8] = {
738 {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
739 {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
740 {3, UPB_SIZE(9, 9), UPB_SIZE(3, 3), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
741 {5, UPB_SIZE(10, 10), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
742 {6, UPB_SIZE(12, 12), UPB_SIZE(5, 5), 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
743 {10, UPB_SIZE(16, 16), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
744 {15, UPB_SIZE(17, 17), UPB_SIZE(7, 7), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
745 {999, UPB_SIZE(20, 24), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
746 };
747
748 const upb_MiniTable google_protobuf_FieldOptions_msginit = {
749 &google_protobuf_FieldOptions_submsgs[0],
750 &google_protobuf_FieldOptions__fields[0],
751 UPB_SIZE(24, 32), 8, kUpb_ExtMode_Extendable, 3, 255, 0,
752 };
753
754 static const upb_MiniTable_Sub google_protobuf_OneofOptions_submsgs[1] = {
755 {.submsg = &google_protobuf_UninterpretedOption_msginit},
756 };
757
758 static const upb_MiniTable_Field google_protobuf_OneofOptions__fields[1] = {
759 {999, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
760 };
761
762 const upb_MiniTable google_protobuf_OneofOptions_msginit = {
763 &google_protobuf_OneofOptions_submsgs[0],
764 &google_protobuf_OneofOptions__fields[0],
765 UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
766 };
767
768 static const upb_MiniTable_Sub google_protobuf_EnumOptions_submsgs[1] = {
769 {.submsg = &google_protobuf_UninterpretedOption_msginit},
770 };
771
772 static const upb_MiniTable_Field google_protobuf_EnumOptions__fields[3] = {
773 {2, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
774 {3, UPB_SIZE(2, 2), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
775 {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
776 };
777
778 const upb_MiniTable google_protobuf_EnumOptions_msginit = {
779 &google_protobuf_EnumOptions_submsgs[0],
780 &google_protobuf_EnumOptions__fields[0],
781 UPB_SIZE(8, 16), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
782 };
783
784 static const upb_MiniTable_Sub google_protobuf_EnumValueOptions_submsgs[1] = {
785 {.submsg = &google_protobuf_UninterpretedOption_msginit},
786 };
787
788 static const upb_MiniTable_Field google_protobuf_EnumValueOptions__fields[2] = {
789 {1, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
790 {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
791 };
792
793 const upb_MiniTable google_protobuf_EnumValueOptions_msginit = {
794 &google_protobuf_EnumValueOptions_submsgs[0],
795 &google_protobuf_EnumValueOptions__fields[0],
796 UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 1, 255, 0,
797 };
798
799 static const upb_MiniTable_Sub google_protobuf_ServiceOptions_submsgs[1] = {
800 {.submsg = &google_protobuf_UninterpretedOption_msginit},
801 };
802
803 static const upb_MiniTable_Field google_protobuf_ServiceOptions__fields[2] = {
804 {33, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
805 {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
806 };
807
808 const upb_MiniTable google_protobuf_ServiceOptions_msginit = {
809 &google_protobuf_ServiceOptions_submsgs[0],
810 &google_protobuf_ServiceOptions__fields[0],
811 UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 0, 255, 0,
812 };
813
814 static const upb_MiniTable_Sub google_protobuf_MethodOptions_submsgs[2] = {
815 {.subenum = &google_protobuf_MethodOptions_IdempotencyLevel_enuminit},
816 {.submsg = &google_protobuf_UninterpretedOption_msginit},
817 };
818
819 static const upb_MiniTable_Field google_protobuf_MethodOptions__fields[3] = {
820 {33, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
821 {34, UPB_SIZE(4, 4), UPB_SIZE(2, 2), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
822 {999, UPB_SIZE(8, 8), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
823 };
824
825 const upb_MiniTable google_protobuf_MethodOptions_msginit = {
826 &google_protobuf_MethodOptions_submsgs[0],
827 &google_protobuf_MethodOptions__fields[0],
828 UPB_SIZE(16, 16), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
829 };
830
831 static const upb_MiniTable_Sub google_protobuf_UninterpretedOption_submsgs[1] = {
832 {.submsg = &google_protobuf_UninterpretedOption_NamePart_msginit},
833 };
834
835 static const upb_MiniTable_Field google_protobuf_UninterpretedOption__fields[7] = {
836 {2, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
837 {3, UPB_SIZE(8, 16), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
838 {4, UPB_SIZE(32, 64), UPB_SIZE(2, 2), kUpb_NoSub, 4, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
839 {5, UPB_SIZE(40, 72), UPB_SIZE(3, 3), kUpb_NoSub, 3, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
840 {6, UPB_SIZE(48, 80), UPB_SIZE(4, 4), kUpb_NoSub, 1, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
841 {7, UPB_SIZE(16, 32), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
842 {8, UPB_SIZE(24, 48), UPB_SIZE(6, 6), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
843 };
844
845 const upb_MiniTable google_protobuf_UninterpretedOption_msginit = {
846 &google_protobuf_UninterpretedOption_submsgs[0],
847 &google_protobuf_UninterpretedOption__fields[0],
848 UPB_SIZE(56, 88), 7, kUpb_ExtMode_NonExtendable, 0, 255, 0,
849 };
850
851 static const upb_MiniTable_Field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
852 {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
853 {2, UPB_SIZE(1, 1), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
854 };
855
856 const upb_MiniTable google_protobuf_UninterpretedOption_NamePart_msginit = {
857 NULL,
858 &google_protobuf_UninterpretedOption_NamePart__fields[0],
859 UPB_SIZE(16, 24), 2, kUpb_ExtMode_NonExtendable, 2, 255, 2,
860 };
861
862 static const upb_MiniTable_Sub google_protobuf_SourceCodeInfo_submsgs[1] = {
863 {.submsg = &google_protobuf_SourceCodeInfo_Location_msginit},
864 };
865
866 static const upb_MiniTable_Field google_protobuf_SourceCodeInfo__fields[1] = {
867 {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
868 };
869
870 const upb_MiniTable google_protobuf_SourceCodeInfo_msginit = {
871 &google_protobuf_SourceCodeInfo_submsgs[0],
872 &google_protobuf_SourceCodeInfo__fields[0],
873 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
874 };
875
876 static const upb_MiniTable_Field google_protobuf_SourceCodeInfo_Location__fields[5] = {
877 {1, UPB_SIZE(4, 8), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
878 {2, UPB_SIZE(8, 16), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
879 {3, UPB_SIZE(12, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
880 {4, UPB_SIZE(20, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
881 {6, UPB_SIZE(28, 56), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
882 };
883
884 const upb_MiniTable google_protobuf_SourceCodeInfo_Location_msginit = {
885 NULL,
886 &google_protobuf_SourceCodeInfo_Location__fields[0],
887 UPB_SIZE(32, 64), 5, kUpb_ExtMode_NonExtendable, 4, 255, 0,
888 };
889
890 static const upb_MiniTable_Sub google_protobuf_GeneratedCodeInfo_submsgs[1] = {
891 {.submsg = &google_protobuf_GeneratedCodeInfo_Annotation_msginit},
892 };
893
894 static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo__fields[1] = {
895 {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
896 };
897
898 const upb_MiniTable google_protobuf_GeneratedCodeInfo_msginit = {
899 &google_protobuf_GeneratedCodeInfo_submsgs[0],
900 &google_protobuf_GeneratedCodeInfo__fields[0],
901 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
902 };
903
904 static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
905 {1, UPB_SIZE(12, 16), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
906 {2, UPB_SIZE(16, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
907 {3, UPB_SIZE(4, 4), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
908 {4, UPB_SIZE(8, 8), UPB_SIZE(3, 3), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
909 };
910
911 const upb_MiniTable google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
912 NULL,
913 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
914 UPB_SIZE(24, 40), 4, kUpb_ExtMode_NonExtendable, 4, 255, 0,
915 };
916
917 static const upb_MiniTable *messages_layout[27] = {
918 &google_protobuf_FileDescriptorSet_msginit,
919 &google_protobuf_FileDescriptorProto_msginit,
920 &google_protobuf_DescriptorProto_msginit,
921 &google_protobuf_DescriptorProto_ExtensionRange_msginit,
922 &google_protobuf_DescriptorProto_ReservedRange_msginit,
923 &google_protobuf_ExtensionRangeOptions_msginit,
924 &google_protobuf_FieldDescriptorProto_msginit,
925 &google_protobuf_OneofDescriptorProto_msginit,
926 &google_protobuf_EnumDescriptorProto_msginit,
927 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
928 &google_protobuf_EnumValueDescriptorProto_msginit,
929 &google_protobuf_ServiceDescriptorProto_msginit,
930 &google_protobuf_MethodDescriptorProto_msginit,
931 &google_protobuf_FileOptions_msginit,
932 &google_protobuf_MessageOptions_msginit,
933 &google_protobuf_FieldOptions_msginit,
934 &google_protobuf_OneofOptions_msginit,
935 &google_protobuf_EnumOptions_msginit,
936 &google_protobuf_EnumValueOptions_msginit,
937 &google_protobuf_ServiceOptions_msginit,
938 &google_protobuf_MethodOptions_msginit,
939 &google_protobuf_UninterpretedOption_msginit,
940 &google_protobuf_UninterpretedOption_NamePart_msginit,
941 &google_protobuf_SourceCodeInfo_msginit,
942 &google_protobuf_SourceCodeInfo_Location_msginit,
943 &google_protobuf_GeneratedCodeInfo_msginit,
944 &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
945 };
946
947 const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Type_enuminit = {
948 NULL,
949 0x7fffeULL,
950 0,
951 };
952
953 const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Label_enuminit = {
954 NULL,
955 0xeULL,
956 0,
957 };
958
959 const upb_MiniTable_Enum google_protobuf_FileOptions_OptimizeMode_enuminit = {
960 NULL,
961 0xeULL,
962 0,
963 };
964
965 const upb_MiniTable_Enum google_protobuf_FieldOptions_CType_enuminit = {
966 NULL,
967 0x7ULL,
968 0,
969 };
970
971 const upb_MiniTable_Enum google_protobuf_FieldOptions_JSType_enuminit = {
972 NULL,
973 0x7ULL,
974 0,
975 };
976
977 const upb_MiniTable_Enum google_protobuf_MethodOptions_IdempotencyLevel_enuminit = {
978 NULL,
979 0x7ULL,
980 0,
981 };
982
983 static const upb_MiniTable_Enum *enums_layout[6] = {
984 &google_protobuf_FieldDescriptorProto_Type_enuminit,
985 &google_protobuf_FieldDescriptorProto_Label_enuminit,
986 &google_protobuf_FileOptions_OptimizeMode_enuminit,
987 &google_protobuf_FieldOptions_CType_enuminit,
988 &google_protobuf_FieldOptions_JSType_enuminit,
989 &google_protobuf_MethodOptions_IdempotencyLevel_enuminit,
990 };
991
992 const upb_MiniTable_File google_protobuf_descriptor_proto_upb_file_layout = {
993 messages_layout,
994 enums_layout,
995 NULL,
996 27,
997 6,
998 0,
999 };
1000
1001
1002
1003 /** upb/decode_fast.c ************************************************************/
1004 // Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64.
1005 // Also the table size grows by 2x.
1006 //
1007 // Could potentially be ported to other 64-bit archs that pass at least six
1008 // arguments in registers and have 8 unused high bits in pointers.
1009 //
1010 // The overall design is to create specialized functions for every possible
1011 // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
1012 // to the specialized function as quickly as possible.
1013
1014
1015
1016 /* Must be last. */
1017
1018 #if UPB_FASTTABLE
1019
1020 // The standard set of arguments passed to each parsing function.
1021 // Thanks to x86-64 calling conventions, these will stay in registers.
1022 #define UPB_PARSE_PARAMS \
1023 upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
1024 uint64_t hasbits, uint64_t data
1025
1026 #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
1027
1028 #define RETURN_GENERIC(m) \
1029 /* Uncomment either of these for debugging purposes. */ \
1030 /* fprintf(stderr, m); */ \
1031 /*__builtin_trap(); */ \
1032 return fastdecode_generic(d, ptr, msg, table, hasbits, 0);
1033
1034 typedef enum {
1035 CARD_s = 0, /* Singular (optional, non-repeated) */
1036 CARD_o = 1, /* Oneof */
1037 CARD_r = 2, /* Repeated */
1038 CARD_p = 3 /* Packed Repeated */
1039 } upb_card;
1040
1041 UPB_NOINLINE
fastdecode_isdonefallback(UPB_PARSE_PARAMS)1042 static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
1043 int overrun = data;
1044 int status;
1045 ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
1046 if (ptr == NULL) {
1047 return fastdecode_err(d, status);
1048 }
1049 data = fastdecode_loadtag(ptr);
1050 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
1051 }
1052
1053 UPB_FORCEINLINE
fastdecode_dispatch(UPB_PARSE_PARAMS)1054 static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
1055 if (UPB_UNLIKELY(ptr >= d->limit_ptr)) {
1056 int overrun = ptr - d->end;
1057 if (UPB_LIKELY(overrun == d->limit)) {
1058 // Parse is finished.
1059 *(uint32_t*)msg |= hasbits; // Sync hasbits.
1060 const upb_MiniTable* l = decode_totablep(table);
1061 return UPB_UNLIKELY(l->required_count)
1062 ? decode_checkrequired(d, ptr, msg, l)
1063 : ptr;
1064 } else {
1065 data = overrun;
1066 UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
1067 }
1068 }
1069
1070 // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
1071 data = fastdecode_loadtag(ptr);
1072 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
1073 }
1074
1075 UPB_FORCEINLINE
fastdecode_checktag(uint16_t data,int tagbytes)1076 static bool fastdecode_checktag(uint16_t data, int tagbytes) {
1077 if (tagbytes == 1) {
1078 return (data & 0xff) == 0;
1079 } else {
1080 return data == 0;
1081 }
1082 }
1083
1084 UPB_FORCEINLINE
fastdecode_longsize(const char * ptr,int * size)1085 static const char* fastdecode_longsize(const char* ptr, int* size) {
1086 int i;
1087 UPB_ASSERT(*size & 0x80);
1088 *size &= 0xff;
1089 for (i = 0; i < 3; i++) {
1090 ptr++;
1091 size_t byte = (uint8_t)ptr[-1];
1092 *size += (byte - 1) << (7 + 7 * i);
1093 if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
1094 }
1095 ptr++;
1096 size_t byte = (uint8_t)ptr[-1];
1097 // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
1098 // for a 32 bit varint.
1099 if (UPB_UNLIKELY(byte >= 8)) return NULL;
1100 *size += (byte - 1) << 28;
1101 return ptr;
1102 }
1103
1104 UPB_FORCEINLINE
fastdecode_boundscheck(const char * ptr,size_t len,const char * end)1105 static bool fastdecode_boundscheck(const char* ptr, size_t len,
1106 const char* end) {
1107 uintptr_t uptr = (uintptr_t)ptr;
1108 uintptr_t uend = (uintptr_t)end + 16;
1109 uintptr_t res = uptr + len;
1110 return res < uptr || res > uend;
1111 }
1112
1113 UPB_FORCEINLINE
fastdecode_boundscheck2(const char * ptr,size_t len,const char * end)1114 static bool fastdecode_boundscheck2(const char* ptr, size_t len,
1115 const char* end) {
1116 // This is one extra branch compared to the more normal:
1117 // return (size_t)(end - ptr) < size;
1118 // However it is one less computation if we are just about to use "ptr + len":
1119 // https://godbolt.org/z/35YGPz
1120 // In microbenchmarks this shows an overall 4% improvement.
1121 uintptr_t uptr = (uintptr_t)ptr;
1122 uintptr_t uend = (uintptr_t)end;
1123 uintptr_t res = uptr + len;
1124 return res < uptr || res > uend;
1125 }
1126
1127 typedef const char* fastdecode_delimfunc(upb_Decoder* d, const char* ptr,
1128 void* ctx);
1129
1130 UPB_FORCEINLINE
fastdecode_delimited(upb_Decoder * d,const char * ptr,fastdecode_delimfunc * func,void * ctx)1131 static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr,
1132 fastdecode_delimfunc* func, void* ctx) {
1133 ptr++;
1134 int len = (int8_t)ptr[-1];
1135 if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
1136 // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
1137 // If it exceeds the buffer limit, limit/limit_ptr will change during
1138 // sub-message parsing, so we need to preserve delta, not limit.
1139 if (UPB_UNLIKELY(len & 0x80)) {
1140 // Size varint >1 byte (length >= 128).
1141 ptr = fastdecode_longsize(ptr, &len);
1142 if (!ptr) {
1143 // Corrupt wire format: size exceeded INT_MAX.
1144 return NULL;
1145 }
1146 }
1147 if (ptr - d->end + (int)len > d->limit) {
1148 // Corrupt wire format: invalid limit.
1149 return NULL;
1150 }
1151 int delta = decode_pushlimit(d, ptr, len);
1152 ptr = func(d, ptr, ctx);
1153 decode_poplimit(d, ptr, delta);
1154 } else {
1155 // Fast case: Sub-message is <128 bytes and fits in the current buffer.
1156 // This means we can preserve limit/limit_ptr verbatim.
1157 const char* saved_limit_ptr = d->limit_ptr;
1158 int saved_limit = d->limit;
1159 d->limit_ptr = ptr + len;
1160 d->limit = d->limit_ptr - d->end;
1161 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
1162 ptr = func(d, ptr, ctx);
1163 d->limit_ptr = saved_limit_ptr;
1164 d->limit = saved_limit;
1165 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
1166 }
1167 return ptr;
1168 }
1169
1170 /* singular, oneof, repeated field handling ***********************************/
1171
1172 typedef struct {
1173 upb_Array* arr;
1174 void* end;
1175 } fastdecode_arr;
1176
1177 typedef enum {
1178 FD_NEXT_ATLIMIT,
1179 FD_NEXT_SAMEFIELD,
1180 FD_NEXT_OTHERFIELD
1181 } fastdecode_next;
1182
1183 typedef struct {
1184 void* dst;
1185 fastdecode_next next;
1186 uint32_t tag;
1187 } fastdecode_nextret;
1188
1189 UPB_FORCEINLINE
fastdecode_resizearr(upb_Decoder * d,void * dst,fastdecode_arr * farr,int valbytes)1190 static void* fastdecode_resizearr(upb_Decoder* d, void* dst,
1191 fastdecode_arr* farr, int valbytes) {
1192 if (UPB_UNLIKELY(dst == farr->end)) {
1193 size_t old_size = farr->arr->size;
1194 size_t old_bytes = old_size * valbytes;
1195 size_t new_size = old_size * 2;
1196 size_t new_bytes = new_size * valbytes;
1197 char* old_ptr = _upb_array_ptr(farr->arr);
1198 char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes);
1199 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
1200 farr->arr->size = new_size;
1201 farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2);
1202 dst = (void*)(new_ptr + (old_size * valbytes));
1203 farr->end = (void*)(new_ptr + (new_size * valbytes));
1204 }
1205 return dst;
1206 }
1207
1208 UPB_FORCEINLINE
fastdecode_tagmatch(uint32_t tag,uint64_t data,int tagbytes)1209 static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
1210 if (tagbytes == 1) {
1211 return (uint8_t)tag == (uint8_t)data;
1212 } else {
1213 return (uint16_t)tag == (uint16_t)data;
1214 }
1215 }
1216
1217 UPB_FORCEINLINE
fastdecode_commitarr(void * dst,fastdecode_arr * farr,int valbytes)1218 static void fastdecode_commitarr(void* dst, fastdecode_arr* farr,
1219 int valbytes) {
1220 farr->arr->len =
1221 (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes;
1222 }
1223
1224 UPB_FORCEINLINE
fastdecode_nextrepeated(upb_Decoder * d,void * dst,const char ** ptr,fastdecode_arr * farr,uint64_t data,int tagbytes,int valbytes)1225 static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst,
1226 const char** ptr,
1227 fastdecode_arr* farr,
1228 uint64_t data, int tagbytes,
1229 int valbytes) {
1230 fastdecode_nextret ret;
1231 dst = (char*)dst + valbytes;
1232
1233 if (UPB_LIKELY(!decode_isdone(d, ptr))) {
1234 ret.tag = fastdecode_loadtag(*ptr);
1235 if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
1236 ret.next = FD_NEXT_SAMEFIELD;
1237 } else {
1238 fastdecode_commitarr(dst, farr, valbytes);
1239 ret.next = FD_NEXT_OTHERFIELD;
1240 }
1241 } else {
1242 fastdecode_commitarr(dst, farr, valbytes);
1243 ret.next = FD_NEXT_ATLIMIT;
1244 }
1245
1246 ret.dst = dst;
1247 return ret;
1248 }
1249
1250 UPB_FORCEINLINE
fastdecode_fieldmem(upb_Message * msg,uint64_t data)1251 static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) {
1252 size_t ofs = data >> 48;
1253 return (char*)msg + ofs;
1254 }
1255
1256 UPB_FORCEINLINE
fastdecode_getfield(upb_Decoder * d,const char * ptr,upb_Message * msg,uint64_t * data,uint64_t * hasbits,fastdecode_arr * farr,int valbytes,upb_card card)1257 static void* fastdecode_getfield(upb_Decoder* d, const char* ptr,
1258 upb_Message* msg, uint64_t* data,
1259 uint64_t* hasbits, fastdecode_arr* farr,
1260 int valbytes, upb_card card) {
1261 switch (card) {
1262 case CARD_s: {
1263 uint8_t hasbit_index = *data >> 24;
1264 // Set hasbit and return pointer to scalar field.
1265 *hasbits |= 1ull << hasbit_index;
1266 return fastdecode_fieldmem(msg, *data);
1267 }
1268 case CARD_o: {
1269 uint16_t case_ofs = *data >> 32;
1270 uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
1271 uint8_t field_number = *data >> 24;
1272 *oneof_case = field_number;
1273 return fastdecode_fieldmem(msg, *data);
1274 }
1275 case CARD_r: {
1276 // Get pointer to upb_Array and allocate/expand if necessary.
1277 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
1278 upb_Array** arr_p = fastdecode_fieldmem(msg, *data);
1279 char* begin;
1280 *(uint32_t*)msg |= *hasbits;
1281 *hasbits = 0;
1282 if (UPB_LIKELY(!*arr_p)) {
1283 farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2);
1284 *arr_p = farr->arr;
1285 } else {
1286 farr->arr = *arr_p;
1287 }
1288 begin = _upb_array_ptr(farr->arr);
1289 farr->end = begin + (farr->arr->size * valbytes);
1290 *data = fastdecode_loadtag(ptr);
1291 return begin + (farr->arr->len * valbytes);
1292 }
1293 default:
1294 UPB_UNREACHABLE();
1295 }
1296 }
1297
1298 UPB_FORCEINLINE
fastdecode_flippacked(uint64_t * data,int tagbytes)1299 static bool fastdecode_flippacked(uint64_t* data, int tagbytes) {
1300 *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype.
1301 return fastdecode_checktag(*data, tagbytes);
1302 }
1303
1304 #define FASTDECODE_CHECKPACKED(tagbytes, card, func) \
1305 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
1306 if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \
1307 UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \
1308 } \
1309 RETURN_GENERIC("packed check tag mismatch\n"); \
1310 }
1311
1312 /* varint fields **************************************************************/
1313
1314 UPB_FORCEINLINE
fastdecode_munge(uint64_t val,int valbytes,bool zigzag)1315 static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
1316 if (valbytes == 1) {
1317 return val != 0;
1318 } else if (zigzag) {
1319 if (valbytes == 4) {
1320 uint32_t n = val;
1321 return (n >> 1) ^ -(int32_t)(n & 1);
1322 } else if (valbytes == 8) {
1323 return (val >> 1) ^ -(int64_t)(val & 1);
1324 }
1325 UPB_UNREACHABLE();
1326 }
1327 return val;
1328 }
1329
1330 UPB_FORCEINLINE
fastdecode_varint64(const char * ptr,uint64_t * val)1331 static const char* fastdecode_varint64(const char* ptr, uint64_t* val) {
1332 ptr++;
1333 *val = (uint8_t)ptr[-1];
1334 if (UPB_UNLIKELY(*val & 0x80)) {
1335 int i;
1336 for (i = 0; i < 8; i++) {
1337 ptr++;
1338 uint64_t byte = (uint8_t)ptr[-1];
1339 *val += (byte - 1) << (7 + 7 * i);
1340 if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
1341 }
1342 ptr++;
1343 uint64_t byte = (uint8_t)ptr[-1];
1344 if (byte > 1) {
1345 return NULL;
1346 }
1347 *val += (byte - 1) << 63;
1348 }
1349 done:
1350 UPB_ASSUME(ptr != NULL);
1351 return ptr;
1352 }
1353
1354 #define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1355 valbytes, card, zigzag, packed) \
1356 uint64_t val; \
1357 void* dst; \
1358 fastdecode_arr farr; \
1359 \
1360 FASTDECODE_CHECKPACKED(tagbytes, card, packed); \
1361 \
1362 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
1363 card); \
1364 if (card == CARD_r) { \
1365 if (UPB_UNLIKELY(!dst)) { \
1366 RETURN_GENERIC("need array resize\n"); \
1367 } \
1368 } \
1369 \
1370 again: \
1371 if (card == CARD_r) { \
1372 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
1373 } \
1374 \
1375 ptr += tagbytes; \
1376 ptr = fastdecode_varint64(ptr, &val); \
1377 if (ptr == NULL) return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1378 val = fastdecode_munge(val, valbytes, zigzag); \
1379 memcpy(dst, &val, valbytes); \
1380 \
1381 if (card == CARD_r) { \
1382 fastdecode_nextret ret = fastdecode_nextrepeated( \
1383 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
1384 switch (ret.next) { \
1385 case FD_NEXT_SAMEFIELD: \
1386 dst = ret.dst; \
1387 goto again; \
1388 case FD_NEXT_OTHERFIELD: \
1389 data = ret.tag; \
1390 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1391 case FD_NEXT_ATLIMIT: \
1392 return ptr; \
1393 } \
1394 } \
1395 \
1396 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1397
1398 typedef struct {
1399 uint8_t valbytes;
1400 bool zigzag;
1401 void* dst;
1402 fastdecode_arr farr;
1403 } fastdecode_varintdata;
1404
1405 UPB_FORCEINLINE
fastdecode_topackedvarint(upb_Decoder * d,const char * ptr,void * ctx)1406 static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr,
1407 void* ctx) {
1408 fastdecode_varintdata* data = ctx;
1409 void* dst = data->dst;
1410 uint64_t val;
1411
1412 while (!decode_isdone(d, &ptr)) {
1413 dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
1414 ptr = fastdecode_varint64(ptr, &val);
1415 if (ptr == NULL) return NULL;
1416 val = fastdecode_munge(val, data->valbytes, data->zigzag);
1417 memcpy(dst, &val, data->valbytes);
1418 dst = (char*)dst + data->valbytes;
1419 }
1420
1421 fastdecode_commitarr(dst, &data->farr, data->valbytes);
1422 return ptr;
1423 }
1424
1425 #define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1426 valbytes, zigzag, unpacked) \
1427 fastdecode_varintdata ctx = {valbytes, zigzag}; \
1428 \
1429 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \
1430 \
1431 ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \
1432 valbytes, CARD_r); \
1433 if (UPB_UNLIKELY(!ctx.dst)) { \
1434 RETURN_GENERIC("need array resize\n"); \
1435 } \
1436 \
1437 ptr += tagbytes; \
1438 ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \
1439 \
1440 if (UPB_UNLIKELY(ptr == NULL)) { \
1441 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1442 } \
1443 \
1444 UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0);
1445
1446 #define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1447 valbytes, card, zigzag, unpacked, packed) \
1448 if (card == CARD_p) { \
1449 FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1450 valbytes, zigzag, unpacked); \
1451 } else { \
1452 FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1453 valbytes, card, zigzag, packed); \
1454 }
1455
1456 #define z_ZZ true
1457 #define b_ZZ false
1458 #define v_ZZ false
1459
1460 /* Generate all combinations:
1461 * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
1462
1463 #define F(card, type, valbytes, tagbytes) \
1464 UPB_NOINLINE \
1465 const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1466 FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
1467 CARD_##card, type##_ZZ, \
1468 upb_pr##type##valbytes##_##tagbytes##bt, \
1469 upb_pp##type##valbytes##_##tagbytes##bt); \
1470 }
1471
1472 #define TYPES(card, tagbytes) \
1473 F(card, b, 1, tagbytes) \
1474 F(card, v, 4, tagbytes) \
1475 F(card, v, 8, tagbytes) \
1476 F(card, z, 4, tagbytes) \
1477 F(card, z, 8, tagbytes)
1478
1479 #define TAGBYTES(card) \
1480 TYPES(card, 1) \
1481 TYPES(card, 2)
1482
1483 TAGBYTES(s)
1484 TAGBYTES(o)
1485 TAGBYTES(r)
1486 TAGBYTES(p)
1487
1488 #undef z_ZZ
1489 #undef b_ZZ
1490 #undef v_ZZ
1491 #undef o_ONEOF
1492 #undef s_ONEOF
1493 #undef r_ONEOF
1494 #undef F
1495 #undef TYPES
1496 #undef TAGBYTES
1497 #undef FASTDECODE_UNPACKEDVARINT
1498 #undef FASTDECODE_PACKEDVARINT
1499 #undef FASTDECODE_VARINT
1500
1501 /* fixed fields ***************************************************************/
1502
1503 #define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1504 valbytes, card, packed) \
1505 void* dst; \
1506 fastdecode_arr farr; \
1507 \
1508 FASTDECODE_CHECKPACKED(tagbytes, card, packed) \
1509 \
1510 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
1511 card); \
1512 if (card == CARD_r) { \
1513 if (UPB_UNLIKELY(!dst)) { \
1514 RETURN_GENERIC("couldn't allocate array in arena\n"); \
1515 } \
1516 } \
1517 \
1518 again: \
1519 if (card == CARD_r) { \
1520 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
1521 } \
1522 \
1523 ptr += tagbytes; \
1524 memcpy(dst, ptr, valbytes); \
1525 ptr += valbytes; \
1526 \
1527 if (card == CARD_r) { \
1528 fastdecode_nextret ret = fastdecode_nextrepeated( \
1529 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
1530 switch (ret.next) { \
1531 case FD_NEXT_SAMEFIELD: \
1532 dst = ret.dst; \
1533 goto again; \
1534 case FD_NEXT_OTHERFIELD: \
1535 data = ret.tag; \
1536 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1537 case FD_NEXT_ATLIMIT: \
1538 return ptr; \
1539 } \
1540 } \
1541 \
1542 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1543
1544 #define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1545 valbytes, unpacked) \
1546 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \
1547 \
1548 ptr += tagbytes; \
1549 int size = (uint8_t)ptr[0]; \
1550 ptr++; \
1551 if (size & 0x80) { \
1552 ptr = fastdecode_longsize(ptr, &size); \
1553 } \
1554 \
1555 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \
1556 (size % valbytes) != 0)) { \
1557 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1558 } \
1559 \
1560 upb_Array** arr_p = fastdecode_fieldmem(msg, data); \
1561 upb_Array* arr = *arr_p; \
1562 uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \
1563 int elems = size / valbytes; \
1564 \
1565 if (UPB_LIKELY(!arr)) { \
1566 *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \
1567 if (!arr) { \
1568 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1569 } \
1570 } else { \
1571 _upb_Array_Resize(arr, elems, &d->arena); \
1572 } \
1573 \
1574 char* dst = _upb_array_ptr(arr); \
1575 memcpy(dst, ptr, size); \
1576 arr->len = elems; \
1577 \
1578 ptr += size; \
1579 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1580
1581 #define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1582 valbytes, card, unpacked, packed) \
1583 if (card == CARD_p) { \
1584 FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1585 valbytes, unpacked); \
1586 } else { \
1587 FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1588 valbytes, card, packed); \
1589 }
1590
1591 /* Generate all combinations:
1592 * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
1593
1594 #define F(card, valbytes, tagbytes) \
1595 UPB_NOINLINE \
1596 const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1597 FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
1598 CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \
1599 upb_prf##valbytes##_##tagbytes##bt); \
1600 }
1601
1602 #define TYPES(card, tagbytes) \
1603 F(card, 4, tagbytes) \
1604 F(card, 8, tagbytes)
1605
1606 #define TAGBYTES(card) \
1607 TYPES(card, 1) \
1608 TYPES(card, 2)
1609
1610 TAGBYTES(s)
1611 TAGBYTES(o)
1612 TAGBYTES(r)
1613 TAGBYTES(p)
1614
1615 #undef F
1616 #undef TYPES
1617 #undef TAGBYTES
1618 #undef FASTDECODE_UNPACKEDFIXED
1619 #undef FASTDECODE_PACKEDFIXED
1620
1621 /* string fields **************************************************************/
1622
1623 typedef const char* fastdecode_copystr_func(struct upb_Decoder* d,
1624 const char* ptr, upb_Message* msg,
1625 const upb_MiniTable* table,
1626 uint64_t hasbits,
1627 upb_StringView* dst);
1628
1629 UPB_NOINLINE
fastdecode_verifyutf8(upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1630 static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
1631 upb_Message* msg, intptr_t table,
1632 uint64_t hasbits, uint64_t data) {
1633 upb_StringView* dst = (upb_StringView*)data;
1634 if (!decode_verifyutf8_inl(dst->data, dst->size)) {
1635 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8);
1636 }
1637 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1638 }
1639
1640 #define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
1641 int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
1642 ptr++; \
1643 if (size & 0x80) { \
1644 ptr = fastdecode_longsize(ptr, &size); \
1645 } \
1646 \
1647 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \
1648 dst->size = 0; \
1649 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1650 } \
1651 \
1652 if (d->options & kUpb_DecodeOption_AliasString) { \
1653 dst->data = ptr; \
1654 dst->size = size; \
1655 } else { \
1656 char* data = upb_Arena_Malloc(&d->arena, size); \
1657 if (!data) { \
1658 return fastdecode_err(d, kUpb_DecodeStatus_OutOfMemory); \
1659 } \
1660 memcpy(data, ptr, size); \
1661 dst->data = data; \
1662 dst->size = size; \
1663 } \
1664 \
1665 ptr += size; \
1666 if (validate_utf8) { \
1667 data = (uint64_t)dst; \
1668 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
1669 } else { \
1670 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
1671 }
1672
1673 UPB_NOINLINE
fastdecode_longstring_utf8(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1674 static const char* fastdecode_longstring_utf8(struct upb_Decoder* d,
1675 const char* ptr, upb_Message* msg,
1676 intptr_t table, uint64_t hasbits,
1677 uint64_t data) {
1678 upb_StringView* dst = (upb_StringView*)data;
1679 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true);
1680 }
1681
1682 UPB_NOINLINE
fastdecode_longstring_noutf8(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1683 static const char* fastdecode_longstring_noutf8(
1684 struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
1685 uint64_t hasbits, uint64_t data) {
1686 upb_StringView* dst = (upb_StringView*)data;
1687 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false);
1688 }
1689
1690 UPB_FORCEINLINE
fastdecode_docopy(upb_Decoder * d,const char * ptr,uint32_t size,int copy,char * data,upb_StringView * dst)1691 static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
1692 int copy, char* data, upb_StringView* dst) {
1693 d->arena.head.ptr += copy;
1694 dst->data = data;
1695 UPB_UNPOISON_MEMORY_REGION(data, copy);
1696 memcpy(data, ptr, copy);
1697 UPB_POISON_MEMORY_REGION(data + size, copy - size);
1698 }
1699
1700 #define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
1701 card, validate_utf8) \
1702 upb_StringView* dst; \
1703 fastdecode_arr farr; \
1704 int64_t size; \
1705 size_t arena_has; \
1706 size_t common_has; \
1707 char* buf; \
1708 \
1709 UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \
1710 UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
1711 \
1712 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
1713 sizeof(upb_StringView), card); \
1714 \
1715 again: \
1716 if (card == CARD_r) { \
1717 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
1718 } \
1719 \
1720 size = (uint8_t)ptr[tagbytes]; \
1721 ptr += tagbytes + 1; \
1722 dst->size = size; \
1723 \
1724 buf = d->arena.head.ptr; \
1725 arena_has = _upb_ArenaHas(&d->arena); \
1726 common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
1727 \
1728 if (UPB_LIKELY(size <= 15 - tagbytes)) { \
1729 if (arena_has < 16) goto longstr; \
1730 d->arena.head.ptr += 16; \
1731 memcpy(buf, ptr - tagbytes - 1, 16); \
1732 dst->data = buf + tagbytes + 1; \
1733 } else if (UPB_LIKELY(size <= 32)) { \
1734 if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
1735 fastdecode_docopy(d, ptr, size, 32, buf, dst); \
1736 } else if (UPB_LIKELY(size <= 64)) { \
1737 if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
1738 fastdecode_docopy(d, ptr, size, 64, buf, dst); \
1739 } else if (UPB_LIKELY(size < 128)) { \
1740 if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
1741 fastdecode_docopy(d, ptr, size, 128, buf, dst); \
1742 } else { \
1743 goto longstr; \
1744 } \
1745 \
1746 ptr += size; \
1747 \
1748 if (card == CARD_r) { \
1749 if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
1750 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \
1751 } \
1752 fastdecode_nextret ret = fastdecode_nextrepeated( \
1753 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
1754 switch (ret.next) { \
1755 case FD_NEXT_SAMEFIELD: \
1756 dst = ret.dst; \
1757 goto again; \
1758 case FD_NEXT_OTHERFIELD: \
1759 data = ret.tag; \
1760 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1761 case FD_NEXT_ATLIMIT: \
1762 return ptr; \
1763 } \
1764 } \
1765 \
1766 if (card != CARD_r && validate_utf8) { \
1767 data = (uint64_t)dst; \
1768 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
1769 } \
1770 \
1771 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
1772 \
1773 longstr: \
1774 if (card == CARD_r) { \
1775 fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
1776 } \
1777 ptr--; \
1778 if (validate_utf8) { \
1779 UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
1780 hasbits, (uint64_t)dst); \
1781 } else { \
1782 UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
1783 hasbits, (uint64_t)dst); \
1784 }
1785
1786 #define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
1787 copyfunc, validate_utf8) \
1788 upb_StringView* dst; \
1789 fastdecode_arr farr; \
1790 int64_t size; \
1791 \
1792 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
1793 RETURN_GENERIC("string field tag mismatch\n"); \
1794 } \
1795 \
1796 if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
1797 UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
1798 } \
1799 \
1800 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
1801 sizeof(upb_StringView), card); \
1802 \
1803 again: \
1804 if (card == CARD_r) { \
1805 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
1806 } \
1807 \
1808 size = (int8_t)ptr[tagbytes]; \
1809 ptr += tagbytes + 1; \
1810 dst->data = ptr; \
1811 dst->size = size; \
1812 \
1813 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
1814 ptr--; \
1815 if (validate_utf8) { \
1816 return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
1817 (uint64_t)dst); \
1818 } else { \
1819 return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
1820 (uint64_t)dst); \
1821 } \
1822 } \
1823 \
1824 ptr += size; \
1825 \
1826 if (card == CARD_r) { \
1827 if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
1828 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \
1829 } \
1830 fastdecode_nextret ret = fastdecode_nextrepeated( \
1831 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
1832 switch (ret.next) { \
1833 case FD_NEXT_SAMEFIELD: \
1834 dst = ret.dst; \
1835 if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
1836 /* Buffer flipped and we can't alias any more. Bounce to */ \
1837 /* copyfunc(), but via dispatch since we need to reload table */ \
1838 /* data also. */ \
1839 fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \
1840 data = ret.tag; \
1841 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1842 } \
1843 goto again; \
1844 case FD_NEXT_OTHERFIELD: \
1845 data = ret.tag; \
1846 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1847 case FD_NEXT_ATLIMIT: \
1848 return ptr; \
1849 } \
1850 } \
1851 \
1852 if (card != CARD_r && validate_utf8) { \
1853 data = (uint64_t)dst; \
1854 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
1855 } \
1856 \
1857 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1858
1859 /* Generate all combinations:
1860 * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
1861
1862 #define s_VALIDATE true
1863 #define b_VALIDATE false
1864
1865 #define F(card, tagbytes, type) \
1866 UPB_NOINLINE \
1867 const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1868 FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
1869 CARD_##card, type##_VALIDATE); \
1870 } \
1871 const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1872 FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \
1873 CARD_##card, upb_c##card##type##_##tagbytes##bt, \
1874 type##_VALIDATE); \
1875 }
1876
1877 #define UTF8(card, tagbytes) \
1878 F(card, tagbytes, s) \
1879 F(card, tagbytes, b)
1880
1881 #define TAGBYTES(card) \
1882 UTF8(card, 1) \
1883 UTF8(card, 2)
1884
1885 TAGBYTES(s)
TAGBYTES(o)1886 TAGBYTES(o)
1887 TAGBYTES(r)
1888
1889 #undef s_VALIDATE
1890 #undef b_VALIDATE
1891 #undef F
1892 #undef TAGBYTES
1893 #undef FASTDECODE_LONGSTRING
1894 #undef FASTDECODE_COPYSTRING
1895 #undef FASTDECODE_STRING
1896
1897 /* message fields *************************************************************/
1898
1899 UPB_INLINE
1900 upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l,
1901 int msg_ceil_bytes) {
1902 size_t size = l->size + sizeof(upb_Message_Internal);
1903 char* msg_data;
1904 if (UPB_LIKELY(msg_ceil_bytes > 0 &&
1905 _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) {
1906 UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
1907 msg_data = d->arena.head.ptr;
1908 d->arena.head.ptr += size;
1909 UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
1910 memset(msg_data, 0, msg_ceil_bytes);
1911 UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
1912 } else {
1913 msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
1914 memset(msg_data, 0, size);
1915 }
1916 return msg_data + sizeof(upb_Message_Internal);
1917 }
1918
1919 typedef struct {
1920 intptr_t table;
1921 upb_Message* msg;
1922 } fastdecode_submsgdata;
1923
1924 UPB_FORCEINLINE
fastdecode_tosubmsg(upb_Decoder * d,const char * ptr,void * ctx)1925 static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr,
1926 void* ctx) {
1927 fastdecode_submsgdata* submsg = ctx;
1928 ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
1929 UPB_ASSUME(ptr != NULL);
1930 return ptr;
1931 }
1932
1933 #define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \
1934 msg_ceil_bytes, card) \
1935 \
1936 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
1937 RETURN_GENERIC("submessage field tag mismatch\n"); \
1938 } \
1939 \
1940 if (--d->depth == 0) { \
1941 return fastdecode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); \
1942 } \
1943 \
1944 upb_Message** dst; \
1945 uint32_t submsg_idx = (data >> 16) & 0xff; \
1946 const upb_MiniTable* tablep = decode_totablep(table); \
1947 const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \
1948 fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \
1949 fastdecode_arr farr; \
1950 \
1951 if (subtablep->table_mask == (uint8_t)-1) { \
1952 RETURN_GENERIC("submessage doesn't have fast tables."); \
1953 } \
1954 \
1955 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
1956 sizeof(upb_Message*), card); \
1957 \
1958 if (card == CARD_s) { \
1959 *(uint32_t*)msg |= hasbits; \
1960 hasbits = 0; \
1961 } \
1962 \
1963 again: \
1964 if (card == CARD_r) { \
1965 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \
1966 } \
1967 \
1968 submsg.msg = *dst; \
1969 \
1970 if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \
1971 *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \
1972 } \
1973 \
1974 ptr += tagbytes; \
1975 ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \
1976 \
1977 if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \
1978 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
1979 } \
1980 \
1981 if (card == CARD_r) { \
1982 fastdecode_nextret ret = fastdecode_nextrepeated( \
1983 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \
1984 switch (ret.next) { \
1985 case FD_NEXT_SAMEFIELD: \
1986 dst = ret.dst; \
1987 goto again; \
1988 case FD_NEXT_OTHERFIELD: \
1989 d->depth++; \
1990 data = ret.tag; \
1991 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
1992 case FD_NEXT_ATLIMIT: \
1993 d->depth++; \
1994 return ptr; \
1995 } \
1996 } \
1997 \
1998 d->depth++; \
1999 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
2000
2001 #define F(card, tagbytes, size_ceil, ceil_arg) \
2002 const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \
2003 UPB_PARSE_PARAMS) { \
2004 FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \
2005 CARD_##card); \
2006 }
2007
2008 #define SIZES(card, tagbytes) \
2009 F(card, tagbytes, 64, 64) \
2010 F(card, tagbytes, 128, 128) \
2011 F(card, tagbytes, 192, 192) \
2012 F(card, tagbytes, 256, 256) \
2013 F(card, tagbytes, max, -1)
2014
2015 #define TAGBYTES(card) \
2016 SIZES(card, 1) \
2017 SIZES(card, 2)
2018
2019 TAGBYTES(s)
2020 TAGBYTES(o)
2021 TAGBYTES(r)
2022
2023 #undef TAGBYTES
2024 #undef SIZES
2025 #undef F
2026 #undef FASTDECODE_SUBMSG
2027
2028 #endif /* UPB_FASTTABLE */
2029
2030 /** upb/json_decode.c ************************************************************/
2031
2032 #include <errno.h>
2033 #include <float.h>
2034 #include <inttypes.h>
2035 #include <limits.h>
2036 #include <math.h>
2037 #include <setjmp.h>
2038 #include <stdlib.h>
2039 #include <string.h>
2040
2041
2042 /* Special header, must be included last. */
2043
2044 typedef struct {
2045 const char *ptr, *end;
2046 upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
2047 const upb_DefPool* symtab;
2048 int depth;
2049 upb_Status* status;
2050 jmp_buf err;
2051 int line;
2052 const char* line_begin;
2053 bool is_first;
2054 int options;
2055 const upb_FieldDef* debug_field;
2056 } jsondec;
2057
2058 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
2059
2060 /* Forward declarations of mutually-recursive functions. */
2061 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
2062 const upb_MessageDef* m);
2063 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
2064 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
2065 const upb_MessageDef* m);
2066 static void jsondec_object(jsondec* d, upb_Message* msg,
2067 const upb_MessageDef* m);
2068
jsondec_streql(upb_StringView str,const char * lit)2069 static bool jsondec_streql(upb_StringView str, const char* lit) {
2070 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
2071 }
2072
jsondec_isnullvalue(const upb_FieldDef * f)2073 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
2074 return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
2075 strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
2076 "google.protobuf.NullValue") == 0;
2077 }
2078
jsondec_isvalue(const upb_FieldDef * f)2079 static bool jsondec_isvalue(const upb_FieldDef* f) {
2080 return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
2081 upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
2082 kUpb_WellKnown_Value) ||
2083 jsondec_isnullvalue(f);
2084 }
2085
jsondec_err(jsondec * d,const char * msg)2086 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
2087 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
2088 (int)(d->ptr - d->line_begin), msg);
2089 UPB_LONGJMP(d->err, 1);
2090 }
2091
2092 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)2093 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
2094 va_list argp;
2095 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
2096 (int)(d->ptr - d->line_begin));
2097 va_start(argp, fmt);
2098 upb_Status_VAppendErrorFormat(d->status, fmt, argp);
2099 va_end(argp);
2100 UPB_LONGJMP(d->err, 1);
2101 }
2102
jsondec_skipws(jsondec * d)2103 static void jsondec_skipws(jsondec* d) {
2104 while (d->ptr != d->end) {
2105 switch (*d->ptr) {
2106 case '\n':
2107 d->line++;
2108 d->line_begin = d->ptr;
2109 /* Fallthrough. */
2110 case '\r':
2111 case '\t':
2112 case ' ':
2113 d->ptr++;
2114 break;
2115 default:
2116 return;
2117 }
2118 }
2119 jsondec_err(d, "Unexpected EOF");
2120 }
2121
jsondec_tryparsech(jsondec * d,char ch)2122 static bool jsondec_tryparsech(jsondec* d, char ch) {
2123 if (d->ptr == d->end || *d->ptr != ch) return false;
2124 d->ptr++;
2125 return true;
2126 }
2127
jsondec_parselit(jsondec * d,const char * lit)2128 static void jsondec_parselit(jsondec* d, const char* lit) {
2129 size_t avail = d->end - d->ptr;
2130 size_t len = strlen(lit);
2131 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
2132 jsondec_errf(d, "Expected: '%s'", lit);
2133 }
2134 d->ptr += len;
2135 }
2136
jsondec_wsch(jsondec * d,char ch)2137 static void jsondec_wsch(jsondec* d, char ch) {
2138 jsondec_skipws(d);
2139 if (!jsondec_tryparsech(d, ch)) {
2140 jsondec_errf(d, "Expected: '%c'", ch);
2141 }
2142 }
2143
jsondec_true(jsondec * d)2144 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)2145 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)2146 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
2147
jsondec_entrysep(jsondec * d)2148 static void jsondec_entrysep(jsondec* d) {
2149 jsondec_skipws(d);
2150 jsondec_parselit(d, ":");
2151 }
2152
jsondec_rawpeek(jsondec * d)2153 static int jsondec_rawpeek(jsondec* d) {
2154 switch (*d->ptr) {
2155 case '{':
2156 return JD_OBJECT;
2157 case '[':
2158 return JD_ARRAY;
2159 case '"':
2160 return JD_STRING;
2161 case '-':
2162 case '0':
2163 case '1':
2164 case '2':
2165 case '3':
2166 case '4':
2167 case '5':
2168 case '6':
2169 case '7':
2170 case '8':
2171 case '9':
2172 return JD_NUMBER;
2173 case 't':
2174 return JD_TRUE;
2175 case 'f':
2176 return JD_FALSE;
2177 case 'n':
2178 return JD_NULL;
2179 default:
2180 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
2181 }
2182 }
2183
2184 /* JSON object/array **********************************************************/
2185
2186 /* These are used like so:
2187 *
2188 * jsondec_objstart(d);
2189 * while (jsondec_objnext(d)) {
2190 * ...
2191 * }
2192 * jsondec_objend(d) */
2193
jsondec_peek(jsondec * d)2194 static int jsondec_peek(jsondec* d) {
2195 jsondec_skipws(d);
2196 return jsondec_rawpeek(d);
2197 }
2198
jsondec_push(jsondec * d)2199 static void jsondec_push(jsondec* d) {
2200 if (--d->depth < 0) {
2201 jsondec_err(d, "Recursion limit exceeded");
2202 }
2203 d->is_first = true;
2204 }
2205
jsondec_seqnext(jsondec * d,char end_ch)2206 static bool jsondec_seqnext(jsondec* d, char end_ch) {
2207 bool is_first = d->is_first;
2208 d->is_first = false;
2209 jsondec_skipws(d);
2210 if (*d->ptr == end_ch) return false;
2211 if (!is_first) jsondec_parselit(d, ",");
2212 return true;
2213 }
2214
jsondec_arrstart(jsondec * d)2215 static void jsondec_arrstart(jsondec* d) {
2216 jsondec_push(d);
2217 jsondec_wsch(d, '[');
2218 }
2219
jsondec_arrend(jsondec * d)2220 static void jsondec_arrend(jsondec* d) {
2221 d->depth++;
2222 jsondec_wsch(d, ']');
2223 }
2224
jsondec_arrnext(jsondec * d)2225 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
2226
jsondec_objstart(jsondec * d)2227 static void jsondec_objstart(jsondec* d) {
2228 jsondec_push(d);
2229 jsondec_wsch(d, '{');
2230 }
2231
jsondec_objend(jsondec * d)2232 static void jsondec_objend(jsondec* d) {
2233 d->depth++;
2234 jsondec_wsch(d, '}');
2235 }
2236
jsondec_objnext(jsondec * d)2237 static bool jsondec_objnext(jsondec* d) {
2238 if (!jsondec_seqnext(d, '}')) return false;
2239 if (jsondec_peek(d) != JD_STRING) {
2240 jsondec_err(d, "Object must start with string");
2241 }
2242 return true;
2243 }
2244
2245 /* JSON number ****************************************************************/
2246
jsondec_tryskipdigits(jsondec * d)2247 static bool jsondec_tryskipdigits(jsondec* d) {
2248 const char* start = d->ptr;
2249
2250 while (d->ptr < d->end) {
2251 if (*d->ptr < '0' || *d->ptr > '9') {
2252 break;
2253 }
2254 d->ptr++;
2255 }
2256
2257 return d->ptr != start;
2258 }
2259
jsondec_skipdigits(jsondec * d)2260 static void jsondec_skipdigits(jsondec* d) {
2261 if (!jsondec_tryskipdigits(d)) {
2262 jsondec_err(d, "Expected one or more digits");
2263 }
2264 }
2265
jsondec_number(jsondec * d)2266 static double jsondec_number(jsondec* d) {
2267 const char* start = d->ptr;
2268
2269 assert(jsondec_rawpeek(d) == JD_NUMBER);
2270
2271 /* Skip over the syntax of a number, as specified by JSON. */
2272 if (*d->ptr == '-') d->ptr++;
2273
2274 if (jsondec_tryparsech(d, '0')) {
2275 if (jsondec_tryskipdigits(d)) {
2276 jsondec_err(d, "number cannot have leading zero");
2277 }
2278 } else {
2279 jsondec_skipdigits(d);
2280 }
2281
2282 if (d->ptr == d->end) goto parse;
2283 if (jsondec_tryparsech(d, '.')) {
2284 jsondec_skipdigits(d);
2285 }
2286 if (d->ptr == d->end) goto parse;
2287
2288 if (*d->ptr == 'e' || *d->ptr == 'E') {
2289 d->ptr++;
2290 if (d->ptr == d->end) {
2291 jsondec_err(d, "Unexpected EOF in number");
2292 }
2293 if (*d->ptr == '+' || *d->ptr == '-') {
2294 d->ptr++;
2295 }
2296 jsondec_skipdigits(d);
2297 }
2298
2299 parse:
2300 /* Having verified the syntax of a JSON number, use strtod() to parse
2301 * (strtod() accepts a superset of JSON syntax). */
2302 errno = 0;
2303 {
2304 char* end;
2305 double val = strtod(start, &end);
2306 assert(end == d->ptr);
2307
2308 /* Currently the min/max-val conformance tests fail if we check this. Does
2309 * this mean the conformance tests are wrong or strtod() is wrong, or
2310 * something else? Investigate further. */
2311 /*
2312 if (errno == ERANGE) {
2313 jsondec_err(d, "Number out of range");
2314 }
2315 */
2316
2317 if (val > DBL_MAX || val < -DBL_MAX) {
2318 jsondec_err(d, "Number out of range");
2319 }
2320
2321 return val;
2322 }
2323 }
2324
2325 /* JSON string ****************************************************************/
2326
jsondec_escape(jsondec * d)2327 static char jsondec_escape(jsondec* d) {
2328 switch (*d->ptr++) {
2329 case '"':
2330 return '\"';
2331 case '\\':
2332 return '\\';
2333 case '/':
2334 return '/';
2335 case 'b':
2336 return '\b';
2337 case 'f':
2338 return '\f';
2339 case 'n':
2340 return '\n';
2341 case 'r':
2342 return '\r';
2343 case 't':
2344 return '\t';
2345 default:
2346 jsondec_err(d, "Invalid escape char");
2347 }
2348 }
2349
jsondec_codepoint(jsondec * d)2350 static uint32_t jsondec_codepoint(jsondec* d) {
2351 uint32_t cp = 0;
2352 const char* end;
2353
2354 if (d->end - d->ptr < 4) {
2355 jsondec_err(d, "EOF inside string");
2356 }
2357
2358 end = d->ptr + 4;
2359 while (d->ptr < end) {
2360 char ch = *d->ptr++;
2361 if (ch >= '0' && ch <= '9') {
2362 ch -= '0';
2363 } else if (ch >= 'a' && ch <= 'f') {
2364 ch = ch - 'a' + 10;
2365 } else if (ch >= 'A' && ch <= 'F') {
2366 ch = ch - 'A' + 10;
2367 } else {
2368 jsondec_err(d, "Invalid hex digit");
2369 }
2370 cp = (cp << 4) | ch;
2371 }
2372
2373 return cp;
2374 }
2375
2376 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)2377 static size_t jsondec_unicode(jsondec* d, char* out) {
2378 uint32_t cp = jsondec_codepoint(d);
2379 if (cp >= 0xd800 && cp <= 0xdbff) {
2380 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
2381 uint32_t high = cp;
2382 uint32_t low;
2383 jsondec_parselit(d, "\\u");
2384 low = jsondec_codepoint(d);
2385 if (low < 0xdc00 || low > 0xdfff) {
2386 jsondec_err(d, "Invalid low surrogate");
2387 }
2388 cp = (high & 0x3ff) << 10;
2389 cp |= (low & 0x3ff);
2390 cp += 0x10000;
2391 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
2392 jsondec_err(d, "Unpaired low surrogate");
2393 }
2394
2395 /* Write to UTF-8 */
2396 if (cp <= 0x7f) {
2397 out[0] = cp;
2398 return 1;
2399 } else if (cp <= 0x07FF) {
2400 out[0] = ((cp >> 6) & 0x1F) | 0xC0;
2401 out[1] = ((cp >> 0) & 0x3F) | 0x80;
2402 return 2;
2403 } else if (cp <= 0xFFFF) {
2404 out[0] = ((cp >> 12) & 0x0F) | 0xE0;
2405 out[1] = ((cp >> 6) & 0x3F) | 0x80;
2406 out[2] = ((cp >> 0) & 0x3F) | 0x80;
2407 return 3;
2408 } else if (cp < 0x10FFFF) {
2409 out[0] = ((cp >> 18) & 0x07) | 0xF0;
2410 out[1] = ((cp >> 12) & 0x3f) | 0x80;
2411 out[2] = ((cp >> 6) & 0x3f) | 0x80;
2412 out[3] = ((cp >> 0) & 0x3f) | 0x80;
2413 return 4;
2414 } else {
2415 jsondec_err(d, "Invalid codepoint");
2416 }
2417 }
2418
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)2419 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
2420 size_t oldsize = *buf_end - *buf;
2421 size_t len = *end - *buf;
2422 size_t size = UPB_MAX(8, 2 * oldsize);
2423
2424 *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
2425 if (!*buf) jsondec_err(d, "Out of memory");
2426
2427 *end = *buf + len;
2428 *buf_end = *buf + size;
2429 }
2430
jsondec_string(jsondec * d)2431 static upb_StringView jsondec_string(jsondec* d) {
2432 char* buf = NULL;
2433 char* end = NULL;
2434 char* buf_end = NULL;
2435
2436 jsondec_skipws(d);
2437
2438 if (*d->ptr++ != '"') {
2439 jsondec_err(d, "Expected string");
2440 }
2441
2442 while (d->ptr < d->end) {
2443 char ch = *d->ptr++;
2444
2445 if (end == buf_end) {
2446 jsondec_resize(d, &buf, &end, &buf_end);
2447 }
2448
2449 switch (ch) {
2450 case '"': {
2451 upb_StringView ret;
2452 ret.data = buf;
2453 ret.size = end - buf;
2454 *end = '\0'; /* Needed for possible strtod(). */
2455 return ret;
2456 }
2457 case '\\':
2458 if (d->ptr == d->end) goto eof;
2459 if (*d->ptr == 'u') {
2460 d->ptr++;
2461 if (buf_end - end < 4) {
2462 /* Allow space for maximum-sized code point (4 bytes). */
2463 jsondec_resize(d, &buf, &end, &buf_end);
2464 }
2465 end += jsondec_unicode(d, end);
2466 } else {
2467 *end++ = jsondec_escape(d);
2468 }
2469 break;
2470 default:
2471 if ((unsigned char)*d->ptr < 0x20) {
2472 jsondec_err(d, "Invalid char in JSON string");
2473 }
2474 *end++ = ch;
2475 break;
2476 }
2477 }
2478
2479 eof:
2480 jsondec_err(d, "EOF inside string");
2481 }
2482
jsondec_skipval(jsondec * d)2483 static void jsondec_skipval(jsondec* d) {
2484 switch (jsondec_peek(d)) {
2485 case JD_OBJECT:
2486 jsondec_objstart(d);
2487 while (jsondec_objnext(d)) {
2488 jsondec_string(d);
2489 jsondec_entrysep(d);
2490 jsondec_skipval(d);
2491 }
2492 jsondec_objend(d);
2493 break;
2494 case JD_ARRAY:
2495 jsondec_arrstart(d);
2496 while (jsondec_arrnext(d)) {
2497 jsondec_skipval(d);
2498 }
2499 jsondec_arrend(d);
2500 break;
2501 case JD_TRUE:
2502 jsondec_true(d);
2503 break;
2504 case JD_FALSE:
2505 jsondec_false(d);
2506 break;
2507 case JD_NULL:
2508 jsondec_null(d);
2509 break;
2510 case JD_STRING:
2511 jsondec_string(d);
2512 break;
2513 case JD_NUMBER:
2514 jsondec_number(d);
2515 break;
2516 }
2517 }
2518
2519 /* Base64 decoding for bytes fields. ******************************************/
2520
jsondec_base64_tablelookup(const char ch)2521 static unsigned int jsondec_base64_tablelookup(const char ch) {
2522 /* Table includes the normal base64 chars plus the URL-safe variant. */
2523 const signed char table[256] = {
2524 -1, -1, -1, -1, -1, -1, -1,
2525 -1, -1, -1, -1, -1, -1, -1,
2526 -1, -1, -1, -1, -1, -1, -1,
2527 -1, -1, -1, -1, -1, -1, -1,
2528 -1, -1, -1, -1, -1, -1, -1,
2529 -1, -1, -1, -1, -1, -1, -1,
2530 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
2531 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
2532 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
2533 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
2534 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
2535 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
2536 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
2537 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
2538 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
2539 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
2540 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
2541 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
2542 -1, -1, -1, -1, -1, -1, -1,
2543 -1, -1, -1, -1, -1, -1, -1,
2544 -1, -1, -1, -1, -1, -1, -1,
2545 -1, -1, -1, -1, -1, -1, -1,
2546 -1, -1, -1, -1, -1, -1, -1,
2547 -1, -1, -1, -1, -1, -1, -1,
2548 -1, -1, -1, -1, -1, -1, -1,
2549 -1, -1, -1, -1, -1, -1, -1,
2550 -1, -1, -1, -1, -1, -1, -1,
2551 -1, -1, -1, -1, -1, -1, -1,
2552 -1, -1, -1, -1, -1, -1, -1,
2553 -1, -1, -1, -1, -1, -1, -1,
2554 -1, -1, -1, -1, -1, -1, -1,
2555 -1, -1, -1, -1, -1, -1, -1,
2556 -1, -1, -1, -1, -1, -1, -1,
2557 -1, -1, -1, -1, -1, -1, -1,
2558 -1, -1, -1, -1, -1, -1, -1,
2559 -1, -1, -1, -1, -1, -1, -1,
2560 -1, -1, -1, -1};
2561
2562 /* Sign-extend return value so high bit will be set on any unexpected char. */
2563 return table[(unsigned)ch];
2564 }
2565
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)2566 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
2567 char* out) {
2568 int32_t val = -1;
2569
2570 switch (end - ptr) {
2571 case 2:
2572 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2573 jsondec_base64_tablelookup(ptr[1]) << 12;
2574 out[0] = val >> 16;
2575 out += 1;
2576 break;
2577 case 3:
2578 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2579 jsondec_base64_tablelookup(ptr[1]) << 12 |
2580 jsondec_base64_tablelookup(ptr[2]) << 6;
2581 out[0] = val >> 16;
2582 out[1] = (val >> 8) & 0xff;
2583 out += 2;
2584 break;
2585 }
2586
2587 if (val < 0) {
2588 jsondec_err(d, "Corrupt base64");
2589 }
2590
2591 return out;
2592 }
2593
jsondec_base64(jsondec * d,upb_StringView str)2594 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
2595 /* We decode in place. This is safe because this is a new buffer (not
2596 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
2597 char* out = (char*)str.data;
2598 const char* ptr = str.data;
2599 const char* end = ptr + str.size;
2600 const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
2601
2602 for (; ptr < end4; ptr += 4, out += 3) {
2603 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2604 jsondec_base64_tablelookup(ptr[1]) << 12 |
2605 jsondec_base64_tablelookup(ptr[2]) << 6 |
2606 jsondec_base64_tablelookup(ptr[3]) << 0;
2607
2608 if (val < 0) {
2609 /* Junk chars or padding. Remove trailing padding, if any. */
2610 if (end - ptr == 4 && ptr[3] == '=') {
2611 if (ptr[2] == '=') {
2612 end -= 2;
2613 } else {
2614 end -= 1;
2615 }
2616 }
2617 break;
2618 }
2619
2620 out[0] = val >> 16;
2621 out[1] = (val >> 8) & 0xff;
2622 out[2] = val & 0xff;
2623 }
2624
2625 if (ptr < end) {
2626 /* Process remaining chars. We do not require padding. */
2627 out = jsondec_partialbase64(d, ptr, end, out);
2628 }
2629
2630 return out - str.data;
2631 }
2632
2633 /* Low-level integer parsing **************************************************/
2634
2635 /* We use these hand-written routines instead of strto[u]l() because the "long
2636 * long" variants aren't in c89. Also our version allows setting a ptr limit. */
2637
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)2638 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
2639 const char* end, uint64_t* val) {
2640 uint64_t u64 = 0;
2641 while (ptr < end) {
2642 unsigned ch = *ptr - '0';
2643 if (ch >= 10) break;
2644 if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
2645 jsondec_err(d, "Integer overflow");
2646 }
2647 u64 *= 10;
2648 u64 += ch;
2649 ptr++;
2650 }
2651
2652 *val = u64;
2653 return ptr;
2654 }
2655
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val)2656 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
2657 const char* end, int64_t* val) {
2658 bool neg = false;
2659 uint64_t u64;
2660
2661 if (ptr != end && *ptr == '-') {
2662 ptr++;
2663 neg = true;
2664 }
2665
2666 ptr = jsondec_buftouint64(d, ptr, end, &u64);
2667 if (u64 > (uint64_t)INT64_MAX + neg) {
2668 jsondec_err(d, "Integer overflow");
2669 }
2670
2671 *val = neg ? -u64 : u64;
2672 return ptr;
2673 }
2674
jsondec_strtouint64(jsondec * d,upb_StringView str)2675 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
2676 const char* end = str.data + str.size;
2677 uint64_t ret;
2678 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
2679 jsondec_err(d, "Non-number characters in quoted integer");
2680 }
2681 return ret;
2682 }
2683
jsondec_strtoint64(jsondec * d,upb_StringView str)2684 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
2685 const char* end = str.data + str.size;
2686 int64_t ret;
2687 if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
2688 jsondec_err(d, "Non-number characters in quoted integer");
2689 }
2690 return ret;
2691 }
2692
2693 /* Primitive value types ******************************************************/
2694
2695 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)2696 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
2697 upb_MessageValue val;
2698
2699 switch (jsondec_peek(d)) {
2700 case JD_NUMBER: {
2701 double dbl = jsondec_number(d);
2702 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
2703 jsondec_err(d, "JSON number is out of range.");
2704 }
2705 val.int64_val = dbl; /* must be guarded, overflow here is UB */
2706 if (val.int64_val != dbl) {
2707 jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
2708 val.int64_val);
2709 }
2710 break;
2711 }
2712 case JD_STRING: {
2713 upb_StringView str = jsondec_string(d);
2714 val.int64_val = jsondec_strtoint64(d, str);
2715 break;
2716 }
2717 default:
2718 jsondec_err(d, "Expected number or string");
2719 }
2720
2721 if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
2722 upb_FieldDef_CType(f) == kUpb_CType_Enum) {
2723 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
2724 jsondec_err(d, "Integer out of range.");
2725 }
2726 val.int32_val = (int32_t)val.int64_val;
2727 }
2728
2729 return val;
2730 }
2731
2732 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)2733 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
2734 upb_MessageValue val = {0};
2735
2736 switch (jsondec_peek(d)) {
2737 case JD_NUMBER: {
2738 double dbl = jsondec_number(d);
2739 if (dbl > 18446744073709549568.0 || dbl < 0) {
2740 jsondec_err(d, "JSON number is out of range.");
2741 }
2742 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
2743 if (val.uint64_val != dbl) {
2744 jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
2745 val.uint64_val);
2746 }
2747 break;
2748 }
2749 case JD_STRING: {
2750 upb_StringView str = jsondec_string(d);
2751 val.uint64_val = jsondec_strtouint64(d, str);
2752 break;
2753 }
2754 default:
2755 jsondec_err(d, "Expected number or string");
2756 }
2757
2758 if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
2759 if (val.uint64_val > UINT32_MAX) {
2760 jsondec_err(d, "Integer out of range.");
2761 }
2762 val.uint32_val = (uint32_t)val.uint64_val;
2763 }
2764
2765 return val;
2766 }
2767
2768 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)2769 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
2770 upb_StringView str;
2771 upb_MessageValue val = {0};
2772
2773 switch (jsondec_peek(d)) {
2774 case JD_NUMBER:
2775 val.double_val = jsondec_number(d);
2776 break;
2777 case JD_STRING:
2778 str = jsondec_string(d);
2779 if (jsondec_streql(str, "NaN")) {
2780 val.double_val = NAN;
2781 } else if (jsondec_streql(str, "Infinity")) {
2782 val.double_val = INFINITY;
2783 } else if (jsondec_streql(str, "-Infinity")) {
2784 val.double_val = -INFINITY;
2785 } else {
2786 val.double_val = strtod(str.data, NULL);
2787 }
2788 break;
2789 default:
2790 jsondec_err(d, "Expected number or string");
2791 }
2792
2793 if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
2794 if (val.double_val != INFINITY && val.double_val != -INFINITY &&
2795 (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
2796 jsondec_err(d, "Float out of range");
2797 }
2798 val.float_val = val.double_val;
2799 }
2800
2801 return val;
2802 }
2803
2804 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)2805 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
2806 upb_MessageValue val;
2807 val.str_val = jsondec_string(d);
2808 if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
2809 val.str_val.size = jsondec_base64(d, val.str_val);
2810 }
2811 return val;
2812 }
2813
jsondec_enum(jsondec * d,const upb_FieldDef * f)2814 static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
2815 switch (jsondec_peek(d)) {
2816 case JD_STRING: {
2817 upb_StringView str = jsondec_string(d);
2818 const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
2819 const upb_EnumValueDef* ev =
2820 upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
2821 upb_MessageValue val;
2822 if (ev) {
2823 val.int32_val = upb_EnumValueDef_Number(ev);
2824 } else {
2825 if (d->options & upb_JsonDecode_IgnoreUnknown) {
2826 val.int32_val = 0;
2827 } else {
2828 jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
2829 UPB_STRINGVIEW_ARGS(str));
2830 }
2831 }
2832 return val;
2833 }
2834 case JD_NULL: {
2835 if (jsondec_isnullvalue(f)) {
2836 upb_MessageValue val;
2837 jsondec_null(d);
2838 val.int32_val = 0;
2839 return val;
2840 }
2841 }
2842 /* Fallthrough. */
2843 default:
2844 return jsondec_int(d, f);
2845 }
2846 }
2847
jsondec_bool(jsondec * d,const upb_FieldDef * f)2848 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
2849 bool is_map_key = upb_FieldDef_Number(f) == 1 &&
2850 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
2851 upb_MessageValue val;
2852
2853 if (is_map_key) {
2854 upb_StringView str = jsondec_string(d);
2855 if (jsondec_streql(str, "true")) {
2856 val.bool_val = true;
2857 } else if (jsondec_streql(str, "false")) {
2858 val.bool_val = false;
2859 } else {
2860 jsondec_err(d, "Invalid boolean map key");
2861 }
2862 } else {
2863 switch (jsondec_peek(d)) {
2864 case JD_TRUE:
2865 val.bool_val = true;
2866 jsondec_true(d);
2867 break;
2868 case JD_FALSE:
2869 val.bool_val = false;
2870 jsondec_false(d);
2871 break;
2872 default:
2873 jsondec_err(d, "Expected true or false");
2874 }
2875 }
2876
2877 return val;
2878 }
2879
2880 /* Composite types (array/message/map) ****************************************/
2881
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)2882 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
2883 upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
2884
2885 jsondec_arrstart(d);
2886 while (jsondec_arrnext(d)) {
2887 upb_MessageValue elem = jsondec_value(d, f);
2888 upb_Array_Append(arr, elem, d->arena);
2889 }
2890 jsondec_arrend(d);
2891 }
2892
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)2893 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
2894 upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
2895 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
2896 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
2897 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
2898
2899 jsondec_objstart(d);
2900 while (jsondec_objnext(d)) {
2901 upb_MessageValue key, val;
2902 key = jsondec_value(d, key_f);
2903 jsondec_entrysep(d);
2904 val = jsondec_value(d, val_f);
2905 upb_Map_Set(map, key, val, d->arena);
2906 }
2907 jsondec_objend(d);
2908 }
2909
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2910 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
2911 const upb_MessageDef* m) {
2912 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
2913 jsondec_object(d, msg, m);
2914 } else {
2915 jsondec_wellknown(d, msg, m);
2916 }
2917 }
2918
jsondec_msg(jsondec * d,const upb_FieldDef * f)2919 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
2920 const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
2921 upb_Message* msg = upb_Message_New(m, d->arena);
2922 upb_MessageValue val;
2923
2924 jsondec_tomsg(d, msg, m);
2925 val.msg_val = msg;
2926 return val;
2927 }
2928
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2929 static void jsondec_field(jsondec* d, upb_Message* msg,
2930 const upb_MessageDef* m) {
2931 upb_StringView name;
2932 const upb_FieldDef* f;
2933 const upb_FieldDef* preserved;
2934
2935 name = jsondec_string(d);
2936 jsondec_entrysep(d);
2937
2938 if (name.size >= 2 && name.data[0] == '[' &&
2939 name.data[name.size - 1] == ']') {
2940 f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
2941 name.size - 2);
2942 if (f && upb_FieldDef_ContainingType(f) != m) {
2943 jsondec_errf(
2944 d, "Extension %s extends message %s, but was seen in message %s",
2945 upb_FieldDef_FullName(f),
2946 upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
2947 upb_MessageDef_FullName(m));
2948 }
2949 } else {
2950 f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
2951 }
2952
2953 if (!f) {
2954 if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
2955 jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
2956 UPB_STRINGVIEW_ARGS(name));
2957 }
2958 jsondec_skipval(d);
2959 return;
2960 }
2961
2962 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
2963 /* JSON "null" indicates a default value, so no need to set anything. */
2964 jsondec_null(d);
2965 return;
2966 }
2967
2968 if (upb_FieldDef_RealContainingOneof(f) &&
2969 upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
2970 jsondec_err(d, "More than one field for this oneof.");
2971 }
2972
2973 preserved = d->debug_field;
2974 d->debug_field = f;
2975
2976 if (upb_FieldDef_IsMap(f)) {
2977 jsondec_map(d, msg, f);
2978 } else if (upb_FieldDef_IsRepeated(f)) {
2979 jsondec_array(d, msg, f);
2980 } else if (upb_FieldDef_IsSubMessage(f)) {
2981 upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
2982 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
2983 jsondec_tomsg(d, submsg, subm);
2984 } else {
2985 upb_MessageValue val = jsondec_value(d, f);
2986 upb_Message_Set(msg, f, val, d->arena);
2987 }
2988
2989 d->debug_field = preserved;
2990 }
2991
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2992 static void jsondec_object(jsondec* d, upb_Message* msg,
2993 const upb_MessageDef* m) {
2994 jsondec_objstart(d);
2995 while (jsondec_objnext(d)) {
2996 jsondec_field(d, msg, m);
2997 }
2998 jsondec_objend(d);
2999 }
3000
jsondec_value(jsondec * d,const upb_FieldDef * f)3001 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
3002 switch (upb_FieldDef_CType(f)) {
3003 case kUpb_CType_Bool:
3004 return jsondec_bool(d, f);
3005 case kUpb_CType_Float:
3006 case kUpb_CType_Double:
3007 return jsondec_double(d, f);
3008 case kUpb_CType_UInt32:
3009 case kUpb_CType_UInt64:
3010 return jsondec_uint(d, f);
3011 case kUpb_CType_Int32:
3012 case kUpb_CType_Int64:
3013 return jsondec_int(d, f);
3014 case kUpb_CType_String:
3015 case kUpb_CType_Bytes:
3016 return jsondec_strfield(d, f);
3017 case kUpb_CType_Enum:
3018 return jsondec_enum(d, f);
3019 case kUpb_CType_Message:
3020 return jsondec_msg(d, f);
3021 default:
3022 UPB_UNREACHABLE();
3023 }
3024 }
3025
3026 /* Well-known types ***********************************************************/
3027
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)3028 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
3029 const char* after) {
3030 uint64_t val;
3031 const char* p = *ptr;
3032 const char* end = p + digits;
3033 size_t after_len = after ? strlen(after) : 0;
3034
3035 UPB_ASSERT(digits <= 9); /* int can't overflow. */
3036
3037 if (jsondec_buftouint64(d, p, end, &val) != end ||
3038 (after_len && memcmp(end, after, after_len) != 0)) {
3039 jsondec_err(d, "Malformed timestamp");
3040 }
3041
3042 UPB_ASSERT(val < INT_MAX);
3043
3044 *ptr = end + after_len;
3045 return (int)val;
3046 }
3047
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)3048 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
3049 uint64_t nanos = 0;
3050 const char* p = *ptr;
3051
3052 if (p != end && *p == '.') {
3053 const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
3054 int digits = (int)(nano_end - p - 1);
3055 int exp_lg10 = 9 - digits;
3056 if (digits > 9) {
3057 jsondec_err(d, "Too many digits for partial seconds");
3058 }
3059 while (exp_lg10--) nanos *= 10;
3060 *ptr = nano_end;
3061 }
3062
3063 UPB_ASSERT(nanos < INT_MAX);
3064
3065 return (int)nanos;
3066 }
3067
3068 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)3069 int jsondec_epochdays(int y, int m, int d) {
3070 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
3071 const uint32_t m_adj = m - 3; /* March-based month. */
3072 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
3073 const uint32_t adjust = carry ? 12 : 0;
3074 const uint32_t y_adj = y + year_base - carry;
3075 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
3076 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
3077 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
3078 }
3079
jsondec_unixtime(int y,int m,int d,int h,int min,int s)3080 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
3081 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
3082 }
3083
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3084 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
3085 const upb_MessageDef* m) {
3086 upb_MessageValue seconds;
3087 upb_MessageValue nanos;
3088 upb_StringView str = jsondec_string(d);
3089 const char* ptr = str.data;
3090 const char* end = ptr + str.size;
3091
3092 if (str.size < 20) goto malformed;
3093
3094 {
3095 /* 1972-01-01T01:00:00 */
3096 int year = jsondec_tsdigits(d, &ptr, 4, "-");
3097 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
3098 int day = jsondec_tsdigits(d, &ptr, 2, "T");
3099 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
3100 int min = jsondec_tsdigits(d, &ptr, 2, ":");
3101 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
3102
3103 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
3104 }
3105
3106 nanos.int32_val = jsondec_nanos(d, &ptr, end);
3107
3108 {
3109 /* [+-]08:00 or Z */
3110 int ofs_hour = 0;
3111 int ofs_min = 0;
3112 bool neg = false;
3113
3114 if (ptr == end) goto malformed;
3115
3116 switch (*ptr++) {
3117 case '-':
3118 neg = true;
3119 /* fallthrough */
3120 case '+':
3121 if ((end - ptr) != 5) goto malformed;
3122 ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
3123 ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
3124 ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
3125 seconds.int64_val += (neg ? ofs_min : -ofs_min);
3126 break;
3127 case 'Z':
3128 if (ptr != end) goto malformed;
3129 break;
3130 default:
3131 goto malformed;
3132 }
3133 }
3134
3135 if (seconds.int64_val < -62135596800) {
3136 jsondec_err(d, "Timestamp out of range");
3137 }
3138
3139 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
3140 d->arena);
3141 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
3142 return;
3143
3144 malformed:
3145 jsondec_err(d, "Malformed timestamp");
3146 }
3147
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3148 static void jsondec_duration(jsondec* d, upb_Message* msg,
3149 const upb_MessageDef* m) {
3150 upb_MessageValue seconds;
3151 upb_MessageValue nanos;
3152 upb_StringView str = jsondec_string(d);
3153 const char* ptr = str.data;
3154 const char* end = ptr + str.size;
3155 const int64_t max = (uint64_t)3652500 * 86400;
3156
3157 /* "3.000000001s", "3s", etc. */
3158 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
3159 nanos.int32_val = jsondec_nanos(d, &ptr, end);
3160
3161 if (end - ptr != 1 || *ptr != 's') {
3162 jsondec_err(d, "Malformed duration");
3163 }
3164
3165 if (seconds.int64_val < -max || seconds.int64_val > max) {
3166 jsondec_err(d, "Duration out of range");
3167 }
3168
3169 if (seconds.int64_val < 0) {
3170 nanos.int32_val = -nanos.int32_val;
3171 }
3172
3173 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
3174 d->arena);
3175 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
3176 }
3177
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3178 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
3179 const upb_MessageDef* m) {
3180 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
3181 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
3182 upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
3183
3184 jsondec_arrstart(d);
3185 while (jsondec_arrnext(d)) {
3186 upb_Message* value_msg = upb_Message_New(value_m, d->arena);
3187 upb_MessageValue value;
3188 value.msg_val = value_msg;
3189 upb_Array_Append(values, value, d->arena);
3190 jsondec_wellknownvalue(d, value_msg, value_m);
3191 }
3192 jsondec_arrend(d);
3193 }
3194
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3195 static void jsondec_struct(jsondec* d, upb_Message* msg,
3196 const upb_MessageDef* m) {
3197 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
3198 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
3199 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
3200 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
3201 upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
3202
3203 jsondec_objstart(d);
3204 while (jsondec_objnext(d)) {
3205 upb_MessageValue key, value;
3206 upb_Message* value_msg = upb_Message_New(value_m, d->arena);
3207 key.str_val = jsondec_string(d);
3208 value.msg_val = value_msg;
3209 upb_Map_Set(fields, key, value, d->arena);
3210 jsondec_entrysep(d);
3211 jsondec_wellknownvalue(d, value_msg, value_m);
3212 }
3213 jsondec_objend(d);
3214 }
3215
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3216 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
3217 const upb_MessageDef* m) {
3218 upb_MessageValue val;
3219 const upb_FieldDef* f;
3220 upb_Message* submsg;
3221
3222 switch (jsondec_peek(d)) {
3223 case JD_NUMBER:
3224 /* double number_value = 2; */
3225 f = upb_MessageDef_FindFieldByNumber(m, 2);
3226 val.double_val = jsondec_number(d);
3227 break;
3228 case JD_STRING:
3229 /* string string_value = 3; */
3230 f = upb_MessageDef_FindFieldByNumber(m, 3);
3231 val.str_val = jsondec_string(d);
3232 break;
3233 case JD_FALSE:
3234 /* bool bool_value = 4; */
3235 f = upb_MessageDef_FindFieldByNumber(m, 4);
3236 val.bool_val = false;
3237 jsondec_false(d);
3238 break;
3239 case JD_TRUE:
3240 /* bool bool_value = 4; */
3241 f = upb_MessageDef_FindFieldByNumber(m, 4);
3242 val.bool_val = true;
3243 jsondec_true(d);
3244 break;
3245 case JD_NULL:
3246 /* NullValue null_value = 1; */
3247 f = upb_MessageDef_FindFieldByNumber(m, 1);
3248 val.int32_val = 0;
3249 jsondec_null(d);
3250 break;
3251 /* Note: these cases return, because upb_Message_Mutable() is enough. */
3252 case JD_OBJECT:
3253 /* Struct struct_value = 5; */
3254 f = upb_MessageDef_FindFieldByNumber(m, 5);
3255 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
3256 jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
3257 return;
3258 case JD_ARRAY:
3259 /* ListValue list_value = 6; */
3260 f = upb_MessageDef_FindFieldByNumber(m, 6);
3261 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
3262 jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
3263 return;
3264 default:
3265 UPB_UNREACHABLE();
3266 }
3267
3268 upb_Message_Set(msg, f, val, d->arena);
3269 }
3270
jsondec_mask(jsondec * d,const char * buf,const char * end)3271 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
3272 const char* end) {
3273 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
3274 * transform in place. */
3275 const char* ptr = buf;
3276 upb_StringView ret;
3277 char* out;
3278
3279 ret.size = end - ptr;
3280 while (ptr < end) {
3281 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
3282 ptr++;
3283 }
3284
3285 out = upb_Arena_Malloc(d->arena, ret.size);
3286 ptr = buf;
3287 ret.data = out;
3288
3289 while (ptr < end) {
3290 char ch = *ptr++;
3291 if (ch >= 'A' && ch <= 'Z') {
3292 *out++ = '_';
3293 *out++ = ch + 32;
3294 } else if (ch == '_') {
3295 jsondec_err(d, "field mask may not contain '_'");
3296 } else {
3297 *out++ = ch;
3298 }
3299 }
3300
3301 return ret;
3302 }
3303
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3304 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
3305 const upb_MessageDef* m) {
3306 /* repeated string paths = 1; */
3307 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
3308 upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
3309 upb_StringView str = jsondec_string(d);
3310 const char* ptr = str.data;
3311 const char* end = ptr + str.size;
3312 upb_MessageValue val;
3313
3314 while (ptr < end) {
3315 const char* elem_end = memchr(ptr, ',', end - ptr);
3316 if (elem_end) {
3317 val.str_val = jsondec_mask(d, ptr, elem_end);
3318 ptr = elem_end + 1;
3319 } else {
3320 val.str_val = jsondec_mask(d, ptr, end);
3321 ptr = end;
3322 }
3323 upb_Array_Append(arr, val, d->arena);
3324 }
3325 }
3326
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3327 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
3328 const upb_MessageDef* m) {
3329 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
3330 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
3331 * where f1, f2, etc. are the normal fields of this type. */
3332 jsondec_field(d, msg, m);
3333 } else {
3334 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
3335 * where <X> is whatever encoding the WKT normally uses. */
3336 upb_StringView str = jsondec_string(d);
3337 jsondec_entrysep(d);
3338 if (!jsondec_streql(str, "value")) {
3339 jsondec_err(d, "Key for well-known type must be 'value'");
3340 }
3341 jsondec_wellknown(d, msg, m);
3342 }
3343 }
3344
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3345 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
3346 const upb_MessageDef* m) {
3347 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
3348 const upb_MessageDef* type_m;
3349 upb_StringView type_url = jsondec_string(d);
3350 const char* end = type_url.data + type_url.size;
3351 const char* ptr = end;
3352 upb_MessageValue val;
3353
3354 val.str_val = type_url;
3355 upb_Message_Set(msg, type_url_f, val, d->arena);
3356
3357 /* Find message name after the last '/' */
3358 while (ptr > type_url.data && *--ptr != '/') {
3359 }
3360
3361 if (ptr == type_url.data || ptr == end) {
3362 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
3363 }
3364
3365 ptr++;
3366 type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
3367
3368 if (!type_m) {
3369 jsondec_err(d, "Type was not found");
3370 }
3371
3372 return type_m;
3373 }
3374
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3375 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
3376 /* string type_url = 1;
3377 * bytes value = 2; */
3378 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
3379 upb_Message* any_msg;
3380 const upb_MessageDef* any_m = NULL;
3381 const char* pre_type_data = NULL;
3382 const char* pre_type_end = NULL;
3383 upb_MessageValue encoded;
3384
3385 jsondec_objstart(d);
3386
3387 /* Scan looking for "@type", which is not necessarily first. */
3388 while (!any_m && jsondec_objnext(d)) {
3389 const char* start = d->ptr;
3390 upb_StringView name = jsondec_string(d);
3391 jsondec_entrysep(d);
3392 if (jsondec_streql(name, "@type")) {
3393 any_m = jsondec_typeurl(d, msg, m);
3394 if (pre_type_data) {
3395 pre_type_end = start;
3396 while (*pre_type_end != ',') pre_type_end--;
3397 }
3398 } else {
3399 if (!pre_type_data) pre_type_data = start;
3400 jsondec_skipval(d);
3401 }
3402 }
3403
3404 if (!any_m) {
3405 jsondec_err(d, "Any object didn't contain a '@type' field");
3406 }
3407
3408 any_msg = upb_Message_New(any_m, d->arena);
3409
3410 if (pre_type_data) {
3411 size_t len = pre_type_end - pre_type_data + 1;
3412 char* tmp = upb_Arena_Malloc(d->arena, len);
3413 const char* saved_ptr = d->ptr;
3414 const char* saved_end = d->end;
3415 memcpy(tmp, pre_type_data, len - 1);
3416 tmp[len - 1] = '}';
3417 d->ptr = tmp;
3418 d->end = tmp + len;
3419 d->is_first = true;
3420 while (jsondec_objnext(d)) {
3421 jsondec_anyfield(d, any_msg, any_m);
3422 }
3423 d->ptr = saved_ptr;
3424 d->end = saved_end;
3425 }
3426
3427 while (jsondec_objnext(d)) {
3428 jsondec_anyfield(d, any_msg, any_m);
3429 }
3430
3431 jsondec_objend(d);
3432
3433 encoded.str_val.data = upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0,
3434 d->arena, &encoded.str_val.size);
3435 upb_Message_Set(msg, value_f, encoded, d->arena);
3436 }
3437
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3438 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
3439 const upb_MessageDef* m) {
3440 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
3441 upb_MessageValue val = jsondec_value(d, value_f);
3442 upb_Message_Set(msg, value_f, val, d->arena);
3443 }
3444
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3445 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
3446 const upb_MessageDef* m) {
3447 switch (upb_MessageDef_WellKnownType(m)) {
3448 case kUpb_WellKnown_Any:
3449 jsondec_any(d, msg, m);
3450 break;
3451 case kUpb_WellKnown_FieldMask:
3452 jsondec_fieldmask(d, msg, m);
3453 break;
3454 case kUpb_WellKnown_Duration:
3455 jsondec_duration(d, msg, m);
3456 break;
3457 case kUpb_WellKnown_Timestamp:
3458 jsondec_timestamp(d, msg, m);
3459 break;
3460 case kUpb_WellKnown_Value:
3461 jsondec_wellknownvalue(d, msg, m);
3462 break;
3463 case kUpb_WellKnown_ListValue:
3464 jsondec_listvalue(d, msg, m);
3465 break;
3466 case kUpb_WellKnown_Struct:
3467 jsondec_struct(d, msg, m);
3468 break;
3469 case kUpb_WellKnown_DoubleValue:
3470 case kUpb_WellKnown_FloatValue:
3471 case kUpb_WellKnown_Int64Value:
3472 case kUpb_WellKnown_UInt64Value:
3473 case kUpb_WellKnown_Int32Value:
3474 case kUpb_WellKnown_UInt32Value:
3475 case kUpb_WellKnown_StringValue:
3476 case kUpb_WellKnown_BytesValue:
3477 case kUpb_WellKnown_BoolValue:
3478 jsondec_wrapper(d, msg, m);
3479 break;
3480 default:
3481 UPB_UNREACHABLE();
3482 }
3483 }
3484
upb_JsonDecode(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)3485 bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
3486 const upb_MessageDef* m, const upb_DefPool* symtab,
3487 int options, upb_Arena* arena, upb_Status* status) {
3488 jsondec d;
3489
3490 if (size == 0) return true;
3491
3492 d.ptr = buf;
3493 d.end = buf + size;
3494 d.arena = arena;
3495 d.symtab = symtab;
3496 d.status = status;
3497 d.options = options;
3498 d.depth = 64;
3499 d.line = 1;
3500 d.line_begin = d.ptr;
3501 d.debug_field = NULL;
3502 d.is_first = false;
3503
3504 if (UPB_SETJMP(d.err)) return false;
3505
3506 jsondec_tomsg(&d, msg, m);
3507 return true;
3508 }
3509
3510 /** upb/json_encode.c ************************************************************/
3511
3512 #include <ctype.h>
3513 #include <float.h>
3514 #include <inttypes.h>
3515 #include <math.h>
3516 #include <setjmp.h>
3517 #include <stdarg.h>
3518 #include <stdio.h>
3519 #include <string.h>
3520
3521
3522 /* Must be last. */
3523
3524 typedef struct {
3525 char *buf, *ptr, *end;
3526 size_t overflow;
3527 int indent_depth;
3528 int options;
3529 const upb_DefPool* ext_pool;
3530 jmp_buf err;
3531 upb_Status* status;
3532 upb_Arena* arena;
3533 } jsonenc;
3534
3535 static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
3536 const upb_MessageDef* m);
3537 static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
3538 const upb_FieldDef* f);
3539 static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
3540 const upb_MessageDef* m);
3541 static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
3542 const upb_MessageDef* m, bool first);
3543 static void jsonenc_value(jsonenc* e, const upb_Message* msg,
3544 const upb_MessageDef* m);
3545
jsonenc_err(jsonenc * e,const char * msg)3546 UPB_NORETURN static void jsonenc_err(jsonenc* e, const char* msg) {
3547 upb_Status_SetErrorMessage(e->status, msg);
3548 longjmp(e->err, 1);
3549 }
3550
3551 UPB_PRINTF(2, 3)
jsonenc_errf(jsonenc * e,const char * fmt,...)3552 UPB_NORETURN static void jsonenc_errf(jsonenc* e, const char* fmt, ...) {
3553 va_list argp;
3554 va_start(argp, fmt);
3555 upb_Status_VSetErrorFormat(e->status, fmt, argp);
3556 va_end(argp);
3557 longjmp(e->err, 1);
3558 }
3559
jsonenc_arena(jsonenc * e)3560 static upb_Arena* jsonenc_arena(jsonenc* e) {
3561 /* Create lazily, since it's only needed for Any */
3562 if (!e->arena) {
3563 e->arena = upb_Arena_New();
3564 }
3565 return e->arena;
3566 }
3567
jsonenc_putbytes(jsonenc * e,const void * data,size_t len)3568 static void jsonenc_putbytes(jsonenc* e, const void* data, size_t len) {
3569 size_t have = e->end - e->ptr;
3570 if (UPB_LIKELY(have >= len)) {
3571 memcpy(e->ptr, data, len);
3572 e->ptr += len;
3573 } else {
3574 if (have) {
3575 memcpy(e->ptr, data, have);
3576 e->ptr += have;
3577 }
3578 e->overflow += (len - have);
3579 }
3580 }
3581
jsonenc_putstr(jsonenc * e,const char * str)3582 static void jsonenc_putstr(jsonenc* e, const char* str) {
3583 jsonenc_putbytes(e, str, strlen(str));
3584 }
3585
3586 UPB_PRINTF(2, 3)
jsonenc_printf(jsonenc * e,const char * fmt,...)3587 static void jsonenc_printf(jsonenc* e, const char* fmt, ...) {
3588 size_t n;
3589 size_t have = e->end - e->ptr;
3590 va_list args;
3591
3592 va_start(args, fmt);
3593 n = _upb_vsnprintf(e->ptr, have, fmt, args);
3594 va_end(args);
3595
3596 if (UPB_LIKELY(have > n)) {
3597 e->ptr += n;
3598 } else {
3599 e->ptr = UPB_PTRADD(e->ptr, have);
3600 e->overflow += (n - have);
3601 }
3602 }
3603
jsonenc_nanos(jsonenc * e,int32_t nanos)3604 static void jsonenc_nanos(jsonenc* e, int32_t nanos) {
3605 int digits = 9;
3606
3607 if (nanos == 0) return;
3608 if (nanos < 0 || nanos >= 1000000000) {
3609 jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
3610 }
3611
3612 while (nanos % 1000 == 0) {
3613 nanos /= 1000;
3614 digits -= 3;
3615 }
3616
3617 jsonenc_printf(e, ".%.*" PRId32, digits, nanos);
3618 }
3619
jsonenc_timestamp(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3620 static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
3621 const upb_MessageDef* m) {
3622 const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
3623 const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
3624 int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
3625 int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
3626 int L, N, I, J, K, hour, min, sec;
3627
3628 if (seconds < -62135596800) {
3629 jsonenc_err(e,
3630 "error formatting timestamp as JSON: minimum acceptable value "
3631 "is 0001-01-01T00:00:00Z");
3632 } else if (seconds > 253402300799) {
3633 jsonenc_err(e,
3634 "error formatting timestamp as JSON: maximum acceptable value "
3635 "is 9999-12-31T23:59:59Z");
3636 }
3637
3638 /* Julian Day -> Y/M/D, Algorithm from:
3639 * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
3640 * Processing Calendar Dates," Communications of the Association of
3641 * Computing Machines, vol. 11 (1968), p. 657. */
3642 seconds += 62135596800; // Ensure seconds is positive.
3643 L = (int)(seconds / 86400) - 719162 + 68569 + 2440588;
3644 N = 4 * L / 146097;
3645 L = L - (146097 * N + 3) / 4;
3646 I = 4000 * (L + 1) / 1461001;
3647 L = L - 1461 * I / 4 + 31;
3648 J = 80 * L / 2447;
3649 K = L - 2447 * J / 80;
3650 L = J / 11;
3651 J = J + 2 - 12 * L;
3652 I = 100 * (N - 49) + I + L;
3653
3654 sec = seconds % 60;
3655 min = (seconds / 60) % 60;
3656 hour = (seconds / 3600) % 24;
3657
3658 jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec);
3659 jsonenc_nanos(e, nanos);
3660 jsonenc_putstr(e, "Z\"");
3661 }
3662
jsonenc_duration(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3663 static void jsonenc_duration(jsonenc* e, const upb_Message* msg,
3664 const upb_MessageDef* m) {
3665 const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
3666 const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
3667 int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
3668 int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
3669
3670 if (seconds > 315576000000 || seconds < -315576000000 ||
3671 (seconds < 0) != (nanos < 0)) {
3672 jsonenc_err(e, "bad duration");
3673 }
3674
3675 if (nanos < 0) {
3676 nanos = -nanos;
3677 }
3678
3679 jsonenc_printf(e, "\"%" PRId64, seconds);
3680 jsonenc_nanos(e, nanos);
3681 jsonenc_putstr(e, "s\"");
3682 }
3683
jsonenc_enum(int32_t val,const upb_FieldDef * f,jsonenc * e)3684 static void jsonenc_enum(int32_t val, const upb_FieldDef* f, jsonenc* e) {
3685 const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
3686
3687 if (strcmp(upb_EnumDef_FullName(e_def), "google.protobuf.NullValue") == 0) {
3688 jsonenc_putstr(e, "null");
3689 } else {
3690 const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
3691
3692 if (ev) {
3693 jsonenc_printf(e, "\"%s\"", upb_EnumValueDef_Name(ev));
3694 } else {
3695 jsonenc_printf(e, "%" PRId32, val);
3696 }
3697 }
3698 }
3699
jsonenc_bytes(jsonenc * e,upb_StringView str)3700 static void jsonenc_bytes(jsonenc* e, upb_StringView str) {
3701 /* This is the regular base64, not the "web-safe" version. */
3702 static const char base64[] =
3703 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3704 const unsigned char* ptr = (unsigned char*)str.data;
3705 const unsigned char* end = UPB_PTRADD(ptr, str.size);
3706 char buf[4];
3707
3708 jsonenc_putstr(e, "\"");
3709
3710 while (end - ptr >= 3) {
3711 buf[0] = base64[ptr[0] >> 2];
3712 buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
3713 buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
3714 buf[3] = base64[ptr[2] & 0x3f];
3715 jsonenc_putbytes(e, buf, 4);
3716 ptr += 3;
3717 }
3718
3719 switch (end - ptr) {
3720 case 2:
3721 buf[0] = base64[ptr[0] >> 2];
3722 buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
3723 buf[2] = base64[(ptr[1] & 0xf) << 2];
3724 buf[3] = '=';
3725 jsonenc_putbytes(e, buf, 4);
3726 break;
3727 case 1:
3728 buf[0] = base64[ptr[0] >> 2];
3729 buf[1] = base64[((ptr[0] & 0x3) << 4)];
3730 buf[2] = '=';
3731 buf[3] = '=';
3732 jsonenc_putbytes(e, buf, 4);
3733 break;
3734 }
3735
3736 jsonenc_putstr(e, "\"");
3737 }
3738
jsonenc_stringbody(jsonenc * e,upb_StringView str)3739 static void jsonenc_stringbody(jsonenc* e, upb_StringView str) {
3740 const char* ptr = str.data;
3741 const char* end = UPB_PTRADD(ptr, str.size);
3742
3743 while (ptr < end) {
3744 switch (*ptr) {
3745 case '\n':
3746 jsonenc_putstr(e, "\\n");
3747 break;
3748 case '\r':
3749 jsonenc_putstr(e, "\\r");
3750 break;
3751 case '\t':
3752 jsonenc_putstr(e, "\\t");
3753 break;
3754 case '\"':
3755 jsonenc_putstr(e, "\\\"");
3756 break;
3757 case '\f':
3758 jsonenc_putstr(e, "\\f");
3759 break;
3760 case '\b':
3761 jsonenc_putstr(e, "\\b");
3762 break;
3763 case '\\':
3764 jsonenc_putstr(e, "\\\\");
3765 break;
3766 default:
3767 if ((uint8_t)*ptr < 0x20) {
3768 jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
3769 } else {
3770 /* This could be a non-ASCII byte. We rely on the string being valid
3771 * UTF-8. */
3772 jsonenc_putbytes(e, ptr, 1);
3773 }
3774 break;
3775 }
3776 ptr++;
3777 }
3778 }
3779
jsonenc_string(jsonenc * e,upb_StringView str)3780 static void jsonenc_string(jsonenc* e, upb_StringView str) {
3781 jsonenc_putstr(e, "\"");
3782 jsonenc_stringbody(e, str);
3783 jsonenc_putstr(e, "\"");
3784 }
3785
upb_JsonEncode_HandleSpecialDoubles(jsonenc * e,double val)3786 static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) {
3787 if (val == INFINITY) {
3788 jsonenc_putstr(e, "\"Infinity\"");
3789 } else if (val == -INFINITY) {
3790 jsonenc_putstr(e, "\"-Infinity\"");
3791 } else if (val != val) {
3792 jsonenc_putstr(e, "\"NaN\"");
3793 } else {
3794 return false;
3795 }
3796 return true;
3797 }
3798
upb_JsonEncode_Double(jsonenc * e,double val)3799 static void upb_JsonEncode_Double(jsonenc* e, double val) {
3800 if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
3801 char buf[32];
3802 _upb_EncodeRoundTripDouble(val, buf, sizeof(buf));
3803 jsonenc_putstr(e, buf);
3804 }
3805
upb_JsonEncode_Float(jsonenc * e,float val)3806 static void upb_JsonEncode_Float(jsonenc* e, float val) {
3807 if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
3808 char buf[32];
3809 _upb_EncodeRoundTripFloat(val, buf, sizeof(buf));
3810 jsonenc_putstr(e, buf);
3811 }
3812
jsonenc_wrapper(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3813 static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
3814 const upb_MessageDef* m) {
3815 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1);
3816 upb_MessageValue val = upb_Message_Get(msg, val_f);
3817 jsonenc_scalar(e, val, val_f);
3818 }
3819
jsonenc_getanymsg(jsonenc * e,upb_StringView type_url)3820 static const upb_MessageDef* jsonenc_getanymsg(jsonenc* e,
3821 upb_StringView type_url) {
3822 /* Find last '/', if any. */
3823 const char* end = type_url.data + type_url.size;
3824 const char* ptr = end;
3825 const upb_MessageDef* ret;
3826
3827 if (!e->ext_pool) {
3828 jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
3829 }
3830
3831 if (type_url.size == 0) goto badurl;
3832
3833 while (true) {
3834 if (--ptr == type_url.data) {
3835 /* Type URL must contain at least one '/', with host before. */
3836 goto badurl;
3837 }
3838 if (*ptr == '/') {
3839 ptr++;
3840 break;
3841 }
3842 }
3843
3844 ret = upb_DefPool_FindMessageByNameWithSize(e->ext_pool, ptr, end - ptr);
3845
3846 if (!ret) {
3847 jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
3848 }
3849
3850 return ret;
3851
3852 badurl:
3853 jsonenc_errf(e, "Bad type URL: " UPB_STRINGVIEW_FORMAT,
3854 UPB_STRINGVIEW_ARGS(type_url));
3855 }
3856
jsonenc_any(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3857 static void jsonenc_any(jsonenc* e, const upb_Message* msg,
3858 const upb_MessageDef* m) {
3859 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
3860 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
3861 upb_StringView type_url = upb_Message_Get(msg, type_url_f).str_val;
3862 upb_StringView value = upb_Message_Get(msg, value_f).str_val;
3863 const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url);
3864 const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
3865 upb_Arena* arena = jsonenc_arena(e);
3866 upb_Message* any = upb_Message_New(any_m, arena);
3867
3868 if (upb_Decode(value.data, value.size, any, any_layout, NULL, 0, arena) !=
3869 kUpb_DecodeStatus_Ok) {
3870 jsonenc_err(e, "Error decoding message in Any");
3871 }
3872
3873 jsonenc_putstr(e, "{\"@type\":");
3874 jsonenc_string(e, type_url);
3875
3876 if (upb_MessageDef_WellKnownType(any_m) == kUpb_WellKnown_Unspecified) {
3877 /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */
3878 jsonenc_msgfields(e, any, any_m, false);
3879 } else {
3880 /* Well-known type: {"@type": "...","value": <well-known encoding>} */
3881 jsonenc_putstr(e, ",\"value\":");
3882 jsonenc_msgfield(e, any, any_m);
3883 }
3884
3885 jsonenc_putstr(e, "}");
3886 }
3887
jsonenc_putsep(jsonenc * e,const char * str,bool * first)3888 static void jsonenc_putsep(jsonenc* e, const char* str, bool* first) {
3889 if (*first) {
3890 *first = false;
3891 } else {
3892 jsonenc_putstr(e, str);
3893 }
3894 }
3895
jsonenc_fieldpath(jsonenc * e,upb_StringView path)3896 static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) {
3897 const char* ptr = path.data;
3898 const char* end = ptr + path.size;
3899
3900 while (ptr < end) {
3901 char ch = *ptr;
3902
3903 if (ch >= 'A' && ch <= 'Z') {
3904 jsonenc_err(e, "Field mask element may not have upper-case letter.");
3905 } else if (ch == '_') {
3906 if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') {
3907 jsonenc_err(e, "Underscore must be followed by a lowercase letter.");
3908 }
3909 ch = *++ptr - 32;
3910 }
3911
3912 jsonenc_putbytes(e, &ch, 1);
3913 ptr++;
3914 }
3915 }
3916
jsonenc_fieldmask(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3917 static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
3918 const upb_MessageDef* m) {
3919 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
3920 const upb_Array* paths = upb_Message_Get(msg, paths_f).array_val;
3921 bool first = true;
3922 size_t i, n = 0;
3923
3924 if (paths) n = upb_Array_Size(paths);
3925
3926 jsonenc_putstr(e, "\"");
3927
3928 for (i = 0; i < n; i++) {
3929 jsonenc_putsep(e, ",", &first);
3930 jsonenc_fieldpath(e, upb_Array_Get(paths, i).str_val);
3931 }
3932
3933 jsonenc_putstr(e, "\"");
3934 }
3935
jsonenc_struct(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3936 static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
3937 const upb_MessageDef* m) {
3938 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
3939 const upb_Map* fields = upb_Message_Get(msg, fields_f).map_val;
3940 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
3941 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
3942 size_t iter = kUpb_Map_Begin;
3943 bool first = true;
3944
3945 jsonenc_putstr(e, "{");
3946
3947 if (fields) {
3948 while (upb_MapIterator_Next(fields, &iter)) {
3949 upb_MessageValue key = upb_MapIterator_Key(fields, iter);
3950 upb_MessageValue val = upb_MapIterator_Value(fields, iter);
3951
3952 jsonenc_putsep(e, ",", &first);
3953 jsonenc_string(e, key.str_val);
3954 jsonenc_putstr(e, ":");
3955 jsonenc_value(e, val.msg_val, upb_FieldDef_MessageSubDef(value_f));
3956 }
3957 }
3958
3959 jsonenc_putstr(e, "}");
3960 }
3961
jsonenc_listvalue(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3962 static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg,
3963 const upb_MessageDef* m) {
3964 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
3965 const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f);
3966 const upb_Array* values = upb_Message_Get(msg, values_f).array_val;
3967 size_t i;
3968 bool first = true;
3969
3970 jsonenc_putstr(e, "[");
3971
3972 if (values) {
3973 const size_t size = upb_Array_Size(values);
3974 for (i = 0; i < size; i++) {
3975 upb_MessageValue elem = upb_Array_Get(values, i);
3976
3977 jsonenc_putsep(e, ",", &first);
3978 jsonenc_value(e, elem.msg_val, values_m);
3979 }
3980 }
3981
3982 jsonenc_putstr(e, "]");
3983 }
3984
jsonenc_value(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3985 static void jsonenc_value(jsonenc* e, const upb_Message* msg,
3986 const upb_MessageDef* m) {
3987 /* TODO(haberman): do we want a reflection method to get oneof case? */
3988 size_t iter = kUpb_Message_Begin;
3989 const upb_FieldDef* f;
3990 upb_MessageValue val;
3991
3992 if (!upb_Message_Next(msg, m, NULL, &f, &val, &iter)) {
3993 jsonenc_err(e, "No value set in Value proto");
3994 }
3995
3996 switch (upb_FieldDef_Number(f)) {
3997 case 1:
3998 jsonenc_putstr(e, "null");
3999 break;
4000 case 2:
4001 upb_JsonEncode_Double(e, val.double_val);
4002 break;
4003 case 3:
4004 jsonenc_string(e, val.str_val);
4005 break;
4006 case 4:
4007 jsonenc_putstr(e, val.bool_val ? "true" : "false");
4008 break;
4009 case 5:
4010 jsonenc_struct(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4011 break;
4012 case 6:
4013 jsonenc_listvalue(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4014 break;
4015 }
4016 }
4017
jsonenc_msgfield(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)4018 static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
4019 const upb_MessageDef* m) {
4020 switch (upb_MessageDef_WellKnownType(m)) {
4021 case kUpb_WellKnown_Unspecified:
4022 jsonenc_msg(e, msg, m);
4023 break;
4024 case kUpb_WellKnown_Any:
4025 jsonenc_any(e, msg, m);
4026 break;
4027 case kUpb_WellKnown_FieldMask:
4028 jsonenc_fieldmask(e, msg, m);
4029 break;
4030 case kUpb_WellKnown_Duration:
4031 jsonenc_duration(e, msg, m);
4032 break;
4033 case kUpb_WellKnown_Timestamp:
4034 jsonenc_timestamp(e, msg, m);
4035 break;
4036 case kUpb_WellKnown_DoubleValue:
4037 case kUpb_WellKnown_FloatValue:
4038 case kUpb_WellKnown_Int64Value:
4039 case kUpb_WellKnown_UInt64Value:
4040 case kUpb_WellKnown_Int32Value:
4041 case kUpb_WellKnown_UInt32Value:
4042 case kUpb_WellKnown_StringValue:
4043 case kUpb_WellKnown_BytesValue:
4044 case kUpb_WellKnown_BoolValue:
4045 jsonenc_wrapper(e, msg, m);
4046 break;
4047 case kUpb_WellKnown_Value:
4048 jsonenc_value(e, msg, m);
4049 break;
4050 case kUpb_WellKnown_ListValue:
4051 jsonenc_listvalue(e, msg, m);
4052 break;
4053 case kUpb_WellKnown_Struct:
4054 jsonenc_struct(e, msg, m);
4055 break;
4056 }
4057 }
4058
jsonenc_scalar(jsonenc * e,upb_MessageValue val,const upb_FieldDef * f)4059 static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
4060 const upb_FieldDef* f) {
4061 switch (upb_FieldDef_CType(f)) {
4062 case kUpb_CType_Bool:
4063 jsonenc_putstr(e, val.bool_val ? "true" : "false");
4064 break;
4065 case kUpb_CType_Float:
4066 upb_JsonEncode_Float(e, val.float_val);
4067 break;
4068 case kUpb_CType_Double:
4069 upb_JsonEncode_Double(e, val.double_val);
4070 break;
4071 case kUpb_CType_Int32:
4072 jsonenc_printf(e, "%" PRId32, val.int32_val);
4073 break;
4074 case kUpb_CType_UInt32:
4075 jsonenc_printf(e, "%" PRIu32, val.uint32_val);
4076 break;
4077 case kUpb_CType_Int64:
4078 jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
4079 break;
4080 case kUpb_CType_UInt64:
4081 jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
4082 break;
4083 case kUpb_CType_String:
4084 jsonenc_string(e, val.str_val);
4085 break;
4086 case kUpb_CType_Bytes:
4087 jsonenc_bytes(e, val.str_val);
4088 break;
4089 case kUpb_CType_Enum:
4090 jsonenc_enum(val.int32_val, f, e);
4091 break;
4092 case kUpb_CType_Message:
4093 jsonenc_msgfield(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4094 break;
4095 }
4096 }
4097
jsonenc_mapkey(jsonenc * e,upb_MessageValue val,const upb_FieldDef * f)4098 static void jsonenc_mapkey(jsonenc* e, upb_MessageValue val,
4099 const upb_FieldDef* f) {
4100 jsonenc_putstr(e, "\"");
4101
4102 switch (upb_FieldDef_CType(f)) {
4103 case kUpb_CType_Bool:
4104 jsonenc_putstr(e, val.bool_val ? "true" : "false");
4105 break;
4106 case kUpb_CType_Int32:
4107 jsonenc_printf(e, "%" PRId32, val.int32_val);
4108 break;
4109 case kUpb_CType_UInt32:
4110 jsonenc_printf(e, "%" PRIu32, val.uint32_val);
4111 break;
4112 case kUpb_CType_Int64:
4113 jsonenc_printf(e, "%" PRId64, val.int64_val);
4114 break;
4115 case kUpb_CType_UInt64:
4116 jsonenc_printf(e, "%" PRIu64, val.uint64_val);
4117 break;
4118 case kUpb_CType_String:
4119 jsonenc_stringbody(e, val.str_val);
4120 break;
4121 default:
4122 UPB_UNREACHABLE();
4123 }
4124
4125 jsonenc_putstr(e, "\":");
4126 }
4127
jsonenc_array(jsonenc * e,const upb_Array * arr,const upb_FieldDef * f)4128 static void jsonenc_array(jsonenc* e, const upb_Array* arr,
4129 const upb_FieldDef* f) {
4130 size_t i;
4131 size_t size = arr ? upb_Array_Size(arr) : 0;
4132 bool first = true;
4133
4134 jsonenc_putstr(e, "[");
4135
4136 for (i = 0; i < size; i++) {
4137 jsonenc_putsep(e, ",", &first);
4138 jsonenc_scalar(e, upb_Array_Get(arr, i), f);
4139 }
4140
4141 jsonenc_putstr(e, "]");
4142 }
4143
jsonenc_map(jsonenc * e,const upb_Map * map,const upb_FieldDef * f)4144 static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) {
4145 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
4146 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
4147 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
4148 size_t iter = kUpb_Map_Begin;
4149 bool first = true;
4150
4151 jsonenc_putstr(e, "{");
4152
4153 if (map) {
4154 while (upb_MapIterator_Next(map, &iter)) {
4155 jsonenc_putsep(e, ",", &first);
4156 jsonenc_mapkey(e, upb_MapIterator_Key(map, iter), key_f);
4157 jsonenc_scalar(e, upb_MapIterator_Value(map, iter), val_f);
4158 }
4159 }
4160
4161 jsonenc_putstr(e, "}");
4162 }
4163
jsonenc_fieldval(jsonenc * e,const upb_FieldDef * f,upb_MessageValue val,bool * first)4164 static void jsonenc_fieldval(jsonenc* e, const upb_FieldDef* f,
4165 upb_MessageValue val, bool* first) {
4166 const char* name;
4167
4168 jsonenc_putsep(e, ",", first);
4169
4170 if (upb_FieldDef_IsExtension(f)) {
4171 // TODO: For MessageSet, I would have expected this to print the message
4172 // name here, but Python doesn't appear to do this. We should do more
4173 // research here about what various implementations do.
4174 jsonenc_printf(e, "\"[%s]\":", upb_FieldDef_FullName(f));
4175 } else {
4176 if (e->options & upb_JsonEncode_UseProtoNames) {
4177 name = upb_FieldDef_Name(f);
4178 } else {
4179 name = upb_FieldDef_JsonName(f);
4180 }
4181 jsonenc_printf(e, "\"%s\":", name);
4182 }
4183
4184 if (upb_FieldDef_IsMap(f)) {
4185 jsonenc_map(e, val.map_val, f);
4186 } else if (upb_FieldDef_IsRepeated(f)) {
4187 jsonenc_array(e, val.array_val, f);
4188 } else {
4189 jsonenc_scalar(e, val, f);
4190 }
4191 }
4192
jsonenc_msgfields(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m,bool first)4193 static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
4194 const upb_MessageDef* m, bool first) {
4195 upb_MessageValue val;
4196 const upb_FieldDef* f;
4197
4198 if (e->options & upb_JsonEncode_EmitDefaults) {
4199 /* Iterate over all fields. */
4200 int i = 0;
4201 int n = upb_MessageDef_FieldCount(m);
4202 for (i = 0; i < n; i++) {
4203 f = upb_MessageDef_Field(m, i);
4204 if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
4205 jsonenc_fieldval(e, f, upb_Message_Get(msg, f), &first);
4206 }
4207 }
4208 } else {
4209 /* Iterate over non-empty fields. */
4210 size_t iter = kUpb_Message_Begin;
4211 while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
4212 jsonenc_fieldval(e, f, val, &first);
4213 }
4214 }
4215 }
4216
jsonenc_msg(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)4217 static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
4218 const upb_MessageDef* m) {
4219 jsonenc_putstr(e, "{");
4220 jsonenc_msgfields(e, msg, m, true);
4221 jsonenc_putstr(e, "}");
4222 }
4223
jsonenc_nullz(jsonenc * e,size_t size)4224 static size_t jsonenc_nullz(jsonenc* e, size_t size) {
4225 size_t ret = e->ptr - e->buf + e->overflow;
4226
4227 if (size > 0) {
4228 if (e->ptr == e->end) e->ptr--;
4229 *e->ptr = '\0';
4230 }
4231
4232 return ret;
4233 }
4234
upb_JsonEncode(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,int options,char * buf,size_t size,upb_Status * status)4235 size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m,
4236 const upb_DefPool* ext_pool, int options, char* buf,
4237 size_t size, upb_Status* status) {
4238 jsonenc e;
4239
4240 e.buf = buf;
4241 e.ptr = buf;
4242 e.end = UPB_PTRADD(buf, size);
4243 e.overflow = 0;
4244 e.options = options;
4245 e.ext_pool = ext_pool;
4246 e.status = status;
4247 e.arena = NULL;
4248
4249 if (setjmp(e.err)) return -1;
4250
4251 jsonenc_msgfield(&e, msg, m);
4252 if (e.arena) upb_Arena_Free(e.arena);
4253 return jsonenc_nullz(&e, size);
4254 }
4255
4256 /** upb/mini_table.c ************************************************************/
4257
4258 #include <inttypes.h>
4259 #include <setjmp.h>
4260
4261
4262 // Must be last.
4263
4264 typedef enum {
4265 kUpb_EncodedType_Double = 0,
4266 kUpb_EncodedType_Float = 1,
4267 kUpb_EncodedType_Fixed32 = 2,
4268 kUpb_EncodedType_Fixed64 = 3,
4269 kUpb_EncodedType_SFixed32 = 4,
4270 kUpb_EncodedType_SFixed64 = 5,
4271 kUpb_EncodedType_Int32 = 6,
4272 kUpb_EncodedType_UInt32 = 7,
4273 kUpb_EncodedType_SInt32 = 8,
4274 kUpb_EncodedType_Int64 = 9,
4275 kUpb_EncodedType_UInt64 = 10,
4276 kUpb_EncodedType_SInt64 = 11,
4277 kUpb_EncodedType_Enum = 12,
4278 kUpb_EncodedType_Bool = 13,
4279 kUpb_EncodedType_Bytes = 14,
4280 kUpb_EncodedType_String = 15,
4281 kUpb_EncodedType_Group = 16,
4282 kUpb_EncodedType_Message = 17,
4283
4284 kUpb_EncodedType_RepeatedBase = 20,
4285 } upb_EncodedType;
4286
4287 typedef enum {
4288 kUpb_EncodedFieldModifier_FlipPacked = 1 << 0,
4289 kUpb_EncodedFieldModifier_IsClosedEnum = 1 << 1,
4290 // upb only.
4291 kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
4292 kUpb_EncodedFieldModifier_IsRequired = 1 << 3,
4293 } upb_EncodedFieldModifier;
4294
4295 enum {
4296 kUpb_EncodedValue_MinField = ' ',
4297 kUpb_EncodedValue_MaxField = 'K',
4298 kUpb_EncodedValue_MinModifier = 'L',
4299 kUpb_EncodedValue_MaxModifier = '[',
4300 kUpb_EncodedValue_End = '^',
4301 kUpb_EncodedValue_MinSkip = '_',
4302 kUpb_EncodedValue_MaxSkip = '~',
4303 kUpb_EncodedValue_OneofSeparator = '~',
4304 kUpb_EncodedValue_FieldSeparator = '|',
4305 kUpb_EncodedValue_MinOneofField = ' ',
4306 kUpb_EncodedValue_MaxOneofField = 'b',
4307 kUpb_EncodedValue_MaxEnumMask = 'A',
4308 };
4309
upb_ToBase92(int8_t ch)4310 char upb_ToBase92(int8_t ch) {
4311 static const char kUpb_ToBase92[] = {
4312 ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
4313 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
4314 '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
4315 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
4316 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
4317 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
4318 'w', 'x', 'y', 'z', '{', '|', '}', '~',
4319 };
4320
4321 UPB_ASSERT(0 <= ch && ch < 92);
4322 return kUpb_ToBase92[ch];
4323 }
4324
upb_FromBase92(uint8_t ch)4325 char upb_FromBase92(uint8_t ch) {
4326 static const int8_t kUpb_FromBase92[] = {
4327 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13,
4328 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
4329 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4330 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, 58, 59, 60,
4331 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
4332 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
4333 };
4334
4335 if (' ' > ch || ch > '~') return -1;
4336 return kUpb_FromBase92[ch - ' '];
4337 }
4338
upb_IsTypePackable(upb_FieldType type)4339 bool upb_IsTypePackable(upb_FieldType type) {
4340 // clang-format off
4341 static const unsigned kUnpackableTypes =
4342 (1 << kUpb_FieldType_String) |
4343 (1 << kUpb_FieldType_Bytes) |
4344 (1 << kUpb_FieldType_Message) |
4345 (1 << kUpb_FieldType_Group);
4346 // clang-format on
4347 return (1 << type) & ~kUnpackableTypes;
4348 }
4349
4350 /** upb_MtDataEncoder *********************************************************/
4351
4352 typedef struct {
4353 uint64_t present_values_mask;
4354 uint32_t last_written_value;
4355 } upb_MtDataEncoderInternal_EnumState;
4356
4357 typedef struct {
4358 uint64_t msg_modifiers;
4359 uint32_t last_field_num;
4360 enum {
4361 kUpb_OneofState_NotStarted,
4362 kUpb_OneofState_StartedOneof,
4363 kUpb_OneofState_EmittedOneofField,
4364 } oneof_state;
4365 } upb_MtDataEncoderInternal_MsgState;
4366
4367 typedef struct {
4368 char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize.
4369 union {
4370 upb_MtDataEncoderInternal_EnumState enum_state;
4371 upb_MtDataEncoderInternal_MsgState msg_state;
4372 } state;
4373 } upb_MtDataEncoderInternal;
4374
upb_MtDataEncoder_GetInternal(upb_MtDataEncoder * e,char * buf_start)4375 static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
4376 upb_MtDataEncoder* e, char* buf_start) {
4377 UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
4378 upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
4379 ret->buf_start = buf_start;
4380 return ret;
4381 }
4382
upb_MtDataEncoder_Put(upb_MtDataEncoder * e,char * ptr,char ch)4383 static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
4384 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
4385 UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
4386 if (ptr == e->end) return NULL;
4387 *ptr++ = upb_ToBase92(ch);
4388 return ptr;
4389 }
4390
upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder * e,char * ptr,uint32_t val,int min,int max)4391 static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
4392 uint32_t val, int min, int max) {
4393 int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
4394 UPB_ASSERT(shift <= 6);
4395 uint32_t mask = (1 << shift) - 1;
4396 do {
4397 uint32_t bits = val & mask;
4398 ptr = upb_MtDataEncoder_Put(e, ptr, bits + upb_FromBase92(min));
4399 if (!ptr) return NULL;
4400 val >>= shift;
4401 } while (val);
4402 return ptr;
4403 }
4404
upb_MtDataEncoder_PutModifier(upb_MtDataEncoder * e,char * ptr,uint64_t mod)4405 char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
4406 uint64_t mod) {
4407 if (mod) {
4408 ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
4409 kUpb_EncodedValue_MinModifier,
4410 kUpb_EncodedValue_MaxModifier);
4411 }
4412 return ptr;
4413 }
4414
upb_MtDataEncoder_StartMessage(upb_MtDataEncoder * e,char * ptr,uint64_t msg_mod)4415 char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
4416 uint64_t msg_mod) {
4417 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4418 in->state.msg_state.msg_modifiers = msg_mod;
4419 in->state.msg_state.last_field_num = 0;
4420 in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
4421 return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
4422 }
4423
upb_MtDataEncoder_PutField(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)4424 char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
4425 upb_FieldType type, uint32_t field_num,
4426 uint64_t field_mod) {
4427 static const char kUpb_TypeToEncoded[] = {
4428 [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
4429 [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
4430 [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
4431 [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
4432 [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
4433 [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
4434 [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
4435 [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
4436 [kUpb_FieldType_String] = kUpb_EncodedType_String,
4437 [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
4438 [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
4439 [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
4440 [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
4441 [kUpb_FieldType_Enum] = kUpb_EncodedType_Enum,
4442 [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
4443 [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
4444 [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
4445 [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
4446 };
4447
4448 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4449 if (field_num <= in->state.msg_state.last_field_num) return NULL;
4450 if (in->state.msg_state.last_field_num + 1 != field_num) {
4451 // Put skip.
4452 UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
4453 uint32_t skip = field_num - in->state.msg_state.last_field_num;
4454 ptr = upb_MtDataEncoder_PutBase92Varint(
4455 e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
4456 if (!ptr) return NULL;
4457 }
4458 in->state.msg_state.last_field_num = field_num;
4459
4460 uint32_t encoded_modifiers = 0;
4461
4462 // Put field type.
4463 if (type == kUpb_FieldType_Enum &&
4464 !(field_mod & kUpb_FieldModifier_IsClosedEnum)) {
4465 type = kUpb_FieldType_Int32;
4466 }
4467
4468 int encoded_type = kUpb_TypeToEncoded[type];
4469 if (field_mod & kUpb_FieldModifier_IsRepeated) {
4470 // Repeated fields shift the type number up (unlike other modifiers which
4471 // are bit flags).
4472 encoded_type += kUpb_EncodedType_RepeatedBase;
4473
4474 if (upb_IsTypePackable(type)) {
4475 bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
4476 bool default_is_packed = in->state.msg_state.msg_modifiers &
4477 kUpb_MessageModifier_DefaultIsPacked;
4478 if (field_is_packed != default_is_packed) {
4479 encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
4480 }
4481 }
4482 }
4483 ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type);
4484 if (!ptr) return NULL;
4485
4486 if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
4487 encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
4488 }
4489 if (field_mod & kUpb_FieldModifier_IsRequired) {
4490 encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
4491 }
4492 return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
4493 }
4494
upb_MtDataEncoder_StartOneof(upb_MtDataEncoder * e,char * ptr)4495 char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
4496 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4497 if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
4498 ptr = upb_MtDataEncoder_Put(e, ptr, upb_FromBase92(kUpb_EncodedValue_End));
4499 } else {
4500 ptr = upb_MtDataEncoder_Put(
4501 e, ptr, upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
4502 }
4503 in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
4504 return ptr;
4505 }
4506
upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)4507 char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
4508 uint32_t field_num) {
4509 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4510 if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
4511 ptr = upb_MtDataEncoder_Put(
4512 e, ptr, upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
4513 if (!ptr) return NULL;
4514 }
4515 ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, upb_ToBase92(0),
4516 upb_ToBase92(63));
4517 in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
4518 return ptr;
4519 }
4520
upb_MtDataEncoder_StartEnum(upb_MtDataEncoder * e)4521 void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e) {
4522 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, NULL);
4523 in->state.enum_state.present_values_mask = 0;
4524 in->state.enum_state.last_written_value = 0;
4525 }
4526
upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder * e,char * ptr)4527 static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
4528 char* ptr) {
4529 upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
4530 ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
4531 in->state.enum_state.present_values_mask = 0;
4532 in->state.enum_state.last_written_value += 5;
4533 return ptr;
4534 }
4535
upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder * e,char * ptr,uint32_t val)4536 char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
4537 uint32_t val) {
4538 // TODO(b/229641772): optimize this encoding.
4539 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4540 UPB_ASSERT(val >= in->state.enum_state.last_written_value);
4541 uint32_t delta = val - in->state.enum_state.last_written_value;
4542 if (delta >= 5 && in->state.enum_state.present_values_mask) {
4543 ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
4544 delta -= 5;
4545 }
4546
4547 if (delta >= 5) {
4548 ptr = upb_MtDataEncoder_PutBase92Varint(
4549 e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
4550 in->state.enum_state.last_written_value += delta;
4551 delta = 0;
4552 }
4553
4554 UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
4555 in->state.enum_state.present_values_mask |= 1ULL << delta;
4556 return ptr;
4557 }
4558
upb_MtDataEncoder_EndEnum(upb_MtDataEncoder * e,char * ptr)4559 char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
4560 upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4561 if (!in->state.enum_state.present_values_mask) return ptr;
4562 return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
4563 }
4564
upb_MiniTable_FindFieldByNumber(const upb_MiniTable * table,uint32_t number)4565 const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
4566 const upb_MiniTable* table, uint32_t number) {
4567 int n = table->field_count;
4568 for (int i = 0; i < n; i++) {
4569 if (table->fields[i].number == number) {
4570 return &table->fields[i];
4571 }
4572 }
4573 return NULL;
4574 }
4575
4576 /** Data decoder **************************************************************/
4577
4578 // Note: we sort by this number when calculating layout order.
4579 typedef enum {
4580 kUpb_LayoutItemType_OneofCase, // Oneof case.
4581 kUpb_LayoutItemType_OneofField, // Oneof field data.
4582 kUpb_LayoutItemType_Field, // Non-oneof field data.
4583
4584 kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
4585 } upb_LayoutItemType;
4586
4587 #define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
4588
4589 typedef struct {
4590 // Index of the corresponding field. When this is a oneof field, the field's
4591 // offset will be the index of the next field in a linked list.
4592 uint16_t field_index;
4593 uint16_t offset;
4594 upb_FieldRep rep;
4595 upb_LayoutItemType type;
4596 } upb_LayoutItem;
4597
4598 typedef struct {
4599 upb_LayoutItem* data;
4600 size_t size;
4601 size_t capacity;
4602 } upb_LayoutItemVector;
4603
4604 typedef struct {
4605 const char* end;
4606 upb_MiniTable* table;
4607 upb_MiniTable_Field* fields;
4608 upb_MiniTablePlatform platform;
4609 upb_LayoutItemVector vec;
4610 upb_Arena* arena;
4611 upb_Status* status;
4612 jmp_buf err;
4613 } upb_MtDecoder;
4614
4615 UPB_PRINTF(2, 3)
upb_MtDecoder_ErrorFormat(upb_MtDecoder * d,const char * fmt,...)4616 UPB_NORETURN static void upb_MtDecoder_ErrorFormat(upb_MtDecoder* d,
4617 const char* fmt, ...) {
4618 va_list argp;
4619 upb_Status_SetErrorMessage(d->status, "Error building mini table: ");
4620 va_start(argp, fmt);
4621 upb_Status_VAppendErrorFormat(d->status, fmt, argp);
4622 va_end(argp);
4623 UPB_LONGJMP(d->err, 1);
4624 }
4625
upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder * d,const void * ptr)4626 static void upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder* d, const void* ptr) {
4627 if (!ptr) upb_MtDecoder_ErrorFormat(d, "Out of memory");
4628 }
4629
4630 // In each field's offset, we temporarily store a presence classifier:
4631 enum PresenceClass {
4632 kNoPresence = 0,
4633 kHasbitPresence = 1,
4634 kRequiredPresence = 2,
4635 kOneofBase = 3,
4636 // Negative values refer to a specific oneof with that number. Positive
4637 // values >= kOneofBase indicate that this field is in a oneof, and specify
4638 // the next field in this oneof's linked list.
4639 };
4640
upb_MiniTable_DecodeBase92Varint(upb_MtDecoder * d,const char * ptr,char first_ch,uint8_t min,uint8_t max,uint32_t * out_val)4641 static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
4642 const char* ptr,
4643 char first_ch, uint8_t min,
4644 uint8_t max,
4645 uint32_t* out_val) {
4646 uint32_t val = 0;
4647 uint32_t shift = 0;
4648 const int bits_per_char =
4649 _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min));
4650 char ch = first_ch;
4651 while (1) {
4652 uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min);
4653 UPB_ASSERT(shift < 32);
4654 val |= bits << shift;
4655 if (ptr == d->end || *ptr < min || max < *ptr) {
4656 *out_val = val;
4657 return ptr;
4658 }
4659 ch = *ptr++;
4660 shift += bits_per_char;
4661 }
4662 }
4663
upb_MiniTable_HasSub(upb_MiniTable_Field * field,uint64_t msg_modifiers)4664 static bool upb_MiniTable_HasSub(upb_MiniTable_Field* field,
4665 uint64_t msg_modifiers) {
4666 switch (field->descriptortype) {
4667 case kUpb_FieldType_Message:
4668 case kUpb_FieldType_Group:
4669 case kUpb_FieldType_Enum:
4670 return true;
4671 case kUpb_FieldType_String:
4672 if (!(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
4673 field->descriptortype = kUpb_FieldType_Bytes;
4674 }
4675 return false;
4676 default:
4677 return false;
4678 }
4679 }
4680
upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field * field)4681 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field* field) {
4682 return (field->mode & kUpb_FieldMode_Array) &&
4683 upb_IsTypePackable(field->descriptortype);
4684 }
4685
upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field * field,upb_FieldType type,uint32_t * sub_count,uint64_t msg_modifiers)4686 static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field,
4687 upb_FieldType type, uint32_t* sub_count,
4688 uint64_t msg_modifiers) {
4689 field->descriptortype = type;
4690 if (upb_MiniTable_HasSub(field, msg_modifiers)) {
4691 field->submsg_index = sub_count ? (*sub_count)++ : 0;
4692 } else {
4693 field->submsg_index = kUpb_NoSub;
4694 }
4695
4696 if (upb_MtDecoder_FieldIsPackable(field) &&
4697 (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
4698 field->mode |= kUpb_LabelFlags_IsPacked;
4699 }
4700 }
4701
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTable_Field * field,uint64_t msg_modifiers,uint32_t * sub_count)4702 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
4703 upb_MiniTable_Field* field,
4704 uint64_t msg_modifiers,
4705 uint32_t* sub_count) {
4706 static const char kUpb_EncodedToFieldRep[] = {
4707 [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
4708 [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
4709 [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
4710 [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
4711 [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
4712 [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
4713 [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
4714 [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
4715 [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
4716 [kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer,
4717 [kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer,
4718 [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
4719 [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
4720 [kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte,
4721 [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
4722 [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
4723 [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
4724 [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
4725 };
4726
4727 static const char kUpb_EncodedToType[] = {
4728 [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
4729 [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
4730 [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
4731 [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
4732 [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
4733 [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
4734 [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
4735 [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
4736 [kUpb_EncodedType_String] = kUpb_FieldType_String,
4737 [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
4738 [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
4739 [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
4740 [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
4741 [kUpb_EncodedType_Enum] = kUpb_FieldType_Enum,
4742 [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
4743 [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
4744 [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
4745 [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
4746 };
4747
4748 int8_t type = upb_FromBase92(ch);
4749 if (ch >= upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
4750 type -= kUpb_EncodedType_RepeatedBase;
4751 field->mode = kUpb_FieldMode_Array;
4752 field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
4753 field->offset = kNoPresence;
4754 } else {
4755 field->mode = kUpb_FieldMode_Scalar;
4756 field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
4757 field->offset = kHasbitPresence;
4758 }
4759 if (type >= 18) {
4760 upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
4761 UPB_UNREACHABLE();
4762 }
4763 upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_count,
4764 msg_modifiers);
4765 }
4766
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTable_Field * field)4767 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
4768 uint32_t message_modifiers,
4769 uint32_t field_modifiers,
4770 upb_MiniTable_Field* field) {
4771 if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
4772 if (!upb_MtDecoder_FieldIsPackable(field)) {
4773 upb_MtDecoder_ErrorFormat(
4774 d, "Cannot flip packed on unpackable field %" PRIu32, field->number);
4775 UPB_UNREACHABLE();
4776 }
4777 field->mode ^= kUpb_LabelFlags_IsPacked;
4778 }
4779
4780 bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
4781 bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
4782
4783 // Validate.
4784 if ((singular || required) && field->offset != kHasbitPresence) {
4785 upb_MtDecoder_ErrorFormat(
4786 d, "Invalid modifier(s) for repeated field %" PRIu32, field->number);
4787 UPB_UNREACHABLE();
4788 }
4789 if (singular && required) {
4790 upb_MtDecoder_ErrorFormat(
4791 d, "Field %" PRIu32 " cannot be both singular and required",
4792 field->number);
4793 UPB_UNREACHABLE();
4794 }
4795
4796 if (singular) field->offset = kNoPresence;
4797 if (required) {
4798 field->offset = kRequiredPresence;
4799 }
4800 }
4801
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)4802 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
4803 if (d->vec.size == d->vec.capacity) {
4804 size_t new_cap = UPB_MAX(8, d->vec.size * 2);
4805 d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
4806 upb_MtDecoder_CheckOutOfMemory(d, d->vec.data);
4807 d->vec.capacity = new_cap;
4808 }
4809 d->vec.data[d->vec.size++] = item;
4810 }
4811
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)4812 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
4813 if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
4814 upb_MtDecoder_ErrorFormat(d, "Empty oneof");
4815 UPB_UNREACHABLE();
4816 }
4817 item.field_index -= kOneofBase;
4818
4819 // Push oneof data.
4820 item.type = kUpb_LayoutItemType_OneofField;
4821 upb_MtDecoder_PushItem(d, item);
4822
4823 // Push oneof case.
4824 item.rep = kUpb_FieldRep_4Byte; // Field Number.
4825 item.type = kUpb_LayoutItemType_OneofCase;
4826 upb_MtDecoder_PushItem(d, item);
4827 }
4828
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)4829 size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
4830 upb_MiniTablePlatform platform) {
4831 static const uint8_t kRepToSize32[] = {
4832 [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
4833 [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 8,
4834 [kUpb_FieldRep_8Byte] = 8,
4835 };
4836 static const uint8_t kRepToSize64[] = {
4837 [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
4838 [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 16,
4839 [kUpb_FieldRep_8Byte] = 8,
4840 };
4841 UPB_ASSERT(sizeof(upb_StringView) ==
4842 UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
4843 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
4844 : kRepToSize64[rep];
4845 }
4846
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)4847 size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
4848 upb_MiniTablePlatform platform) {
4849 static const uint8_t kRepToAlign32[] = {
4850 [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
4851 [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 4,
4852 [kUpb_FieldRep_8Byte] = 8,
4853 };
4854 static const uint8_t kRepToAlign64[] = {
4855 [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
4856 [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 8,
4857 [kUpb_FieldRep_8Byte] = 8,
4858 };
4859 UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
4860 UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
4861 return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
4862 : kRepToAlign64[rep];
4863 }
4864
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)4865 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
4866 const char* ptr,
4867 char first_ch,
4868 upb_LayoutItem* item) {
4869 uint32_t field_num;
4870 ptr = upb_MiniTable_DecodeBase92Varint(
4871 d, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
4872 kUpb_EncodedValue_MaxOneofField, &field_num);
4873 upb_MiniTable_Field* f =
4874 (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
4875
4876 if (!f) {
4877 upb_MtDecoder_ErrorFormat(d,
4878 "Couldn't add field number %" PRIu32
4879 " to oneof, no such field number.",
4880 field_num);
4881 UPB_UNREACHABLE();
4882 }
4883 if (f->offset != kHasbitPresence) {
4884 upb_MtDecoder_ErrorFormat(
4885 d,
4886 "Cannot add repeated, required, or singular field %" PRIu32
4887 " to oneof.",
4888 field_num);
4889 UPB_UNREACHABLE();
4890 }
4891
4892 // Oneof storage must be large enough to accommodate the largest member.
4893 int rep = f->mode >> kUpb_FieldRep_Shift;
4894 if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
4895 upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
4896 item->rep = rep;
4897 }
4898 // Prepend this field to the linked list.
4899 f->offset = item->field_index;
4900 item->field_index = (f - d->fields) + kOneofBase;
4901 return ptr;
4902 }
4903
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)4904 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
4905 const char* ptr) {
4906 upb_LayoutItem item = {.rep = 0,
4907 .field_index = kUpb_LayoutItem_IndexSentinel};
4908 while (ptr < d->end) {
4909 char ch = *ptr++;
4910 if (ch == kUpb_EncodedValue_FieldSeparator) {
4911 // Field separator, no action needed.
4912 } else if (ch == kUpb_EncodedValue_OneofSeparator) {
4913 // End of oneof.
4914 upb_MtDecoder_PushOneof(d, item);
4915 item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
4916 } else {
4917 ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
4918 }
4919 }
4920
4921 // Push final oneof.
4922 upb_MtDecoder_PushOneof(d, item);
4923 return ptr;
4924 }
4925
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTable_Field * last_field,uint64_t * msg_modifiers)4926 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
4927 const char* ptr, char first_ch,
4928 upb_MiniTable_Field* last_field,
4929 uint64_t* msg_modifiers) {
4930 uint32_t mod;
4931 ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, first_ch,
4932 kUpb_EncodedValue_MinModifier,
4933 kUpb_EncodedValue_MaxModifier, &mod);
4934 if (last_field) {
4935 upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
4936 } else {
4937 if (!d->table) {
4938 upb_MtDecoder_ErrorFormat(d, "Extensions cannot have message modifiers");
4939 UPB_UNREACHABLE();
4940 }
4941 *msg_modifiers = mod;
4942 }
4943
4944 return ptr;
4945 }
4946
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,uint32_t sub_count)4947 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) {
4948 size_t subs_bytes = sizeof(*d->table->subs) * sub_count;
4949 d->table->subs = upb_Arena_Malloc(d->arena, subs_bytes);
4950 upb_MtDecoder_CheckOutOfMemory(d, d->table->subs);
4951 }
4952
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,uint32_t * sub_count)4953 static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
4954 void* fields, size_t field_size,
4955 uint16_t* field_count, uint32_t* sub_count) {
4956 uint64_t msg_modifiers = 0;
4957 uint32_t last_field_number = 0;
4958 upb_MiniTable_Field* last_field = NULL;
4959 bool need_dense_below = d->table != NULL;
4960
4961 d->end = UPB_PTRADD(ptr, len);
4962
4963 while (ptr < d->end) {
4964 char ch = *ptr++;
4965 if (ch <= kUpb_EncodedValue_MaxField) {
4966 upb_MiniTable_Field* field = fields;
4967 *field_count += 1;
4968 fields = (char*)fields + field_size;
4969 field->number = ++last_field_number;
4970 last_field = field;
4971 upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_count);
4972 } else if (kUpb_EncodedValue_MinModifier <= ch &&
4973 ch <= kUpb_EncodedValue_MaxModifier) {
4974 ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
4975 if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
4976 d->table->ext |= kUpb_ExtMode_Extendable;
4977 }
4978 } else if (ch == kUpb_EncodedValue_End) {
4979 if (!d->table) {
4980 upb_MtDecoder_ErrorFormat(d, "Extensions cannot have oneofs.");
4981 UPB_UNREACHABLE();
4982 }
4983 ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
4984 } else if (kUpb_EncodedValue_MinSkip <= ch &&
4985 ch <= kUpb_EncodedValue_MaxSkip) {
4986 if (need_dense_below) {
4987 d->table->dense_below = d->table->field_count;
4988 need_dense_below = false;
4989 }
4990 uint32_t skip;
4991 ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, ch,
4992 kUpb_EncodedValue_MinSkip,
4993 kUpb_EncodedValue_MaxSkip, &skip);
4994 last_field_number += skip;
4995 last_field_number--; // Next field seen will increment.
4996 }
4997 }
4998
4999 if (need_dense_below) {
5000 d->table->dense_below = d->table->field_count;
5001 }
5002 }
5003
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)5004 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
5005 size_t len) {
5006 // Buffer length is an upper bound on the number of fields. We will return
5007 // what we don't use.
5008 d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
5009 upb_MtDecoder_CheckOutOfMemory(d, d->fields);
5010
5011 uint32_t sub_count = 0;
5012 d->table->field_count = 0;
5013 d->table->fields = d->fields;
5014 upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
5015 &d->table->field_count, &sub_count);
5016
5017 upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
5018 sizeof(*d->fields) * d->table->field_count);
5019 d->table->fields = d->fields;
5020 upb_MtDecoder_AllocateSubs(d, sub_count);
5021 }
5022
upb_MtDecoder_CompareFields(const void * _a,const void * _b)5023 int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
5024 const upb_LayoutItem* a = _a;
5025 const upb_LayoutItem* b = _b;
5026 // Currently we just sort by:
5027 // 1. rep (smallest fields first)
5028 // 2. type (oneof cases first)
5029 // 2. field_index (smallest numbers first)
5030 // The main goal of this is to reduce space lost to padding.
5031 // Later we may have more subtle reasons to prefer a different ordering.
5032 const int rep_bits = _upb_Log2Ceiling(kUpb_FieldRep_Max);
5033 const int type_bits = _upb_Log2Ceiling(kUpb_LayoutItemType_Max);
5034 const int idx_bits = (sizeof(a->field_index) * 8);
5035 UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
5036 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
5037 uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
5038 uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
5039 assert(a_packed != b_packed);
5040 #undef UPB_COMBINE
5041 return a_packed < b_packed ? -1 : 1;
5042 }
5043
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)5044 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
5045 // Add items for all non-oneof fields (oneofs were already added).
5046 int n = d->table->field_count;
5047 for (int i = 0; i < n; i++) {
5048 upb_MiniTable_Field* f = &d->fields[i];
5049 if (f->offset >= kOneofBase) continue;
5050 upb_LayoutItem item = {.field_index = i,
5051 .rep = f->mode >> kUpb_FieldRep_Shift,
5052 .type = kUpb_LayoutItemType_Field};
5053 upb_MtDecoder_PushItem(d, item);
5054 }
5055
5056 if (d->vec.size) {
5057 qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
5058 upb_MtDecoder_CompareFields);
5059 }
5060
5061 return true;
5062 }
5063
upb_MiniTable_DivideRoundUp(size_t n,size_t d)5064 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
5065 return (n + d - 1) / d;
5066 }
5067
upb_MtDecoder_AssignHasbits(upb_MiniTable * ret)5068 static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) {
5069 int n = ret->field_count;
5070 int last_hasbit = 0; // 0 cannot be used.
5071
5072 // First assign required fields, which must have the lowest hasbits.
5073 for (int i = 0; i < n; i++) {
5074 upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
5075 if (field->offset == kRequiredPresence) {
5076 field->presence = ++last_hasbit;
5077 } else if (field->offset == kNoPresence) {
5078 field->presence = 0;
5079 }
5080 }
5081 ret->required_count = last_hasbit;
5082
5083 // Next assign non-required hasbit fields.
5084 for (int i = 0; i < n; i++) {
5085 upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
5086 if (field->offset == kHasbitPresence) {
5087 field->presence = ++last_hasbit;
5088 }
5089 }
5090
5091 ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
5092 }
5093
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)5094 size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
5095 size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
5096 size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
5097 size_t ret = UPB_ALIGN_UP(d->table->size, align);
5098 d->table->size = ret + size;
5099 return ret;
5100 }
5101
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)5102 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
5103 upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
5104
5105 // Compute offsets.
5106 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5107 item->offset = upb_MtDecoder_Place(d, item->rep);
5108 }
5109
5110 // Assign oneof case offsets. We must do these first, since assigning
5111 // actual offsets will overwrite the links of the linked list.
5112 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5113 if (item->type != kUpb_LayoutItemType_OneofCase) continue;
5114 upb_MiniTable_Field* f = &d->fields[item->field_index];
5115 while (true) {
5116 f->presence = ~item->offset;
5117 if (f->offset == kUpb_LayoutItem_IndexSentinel) break;
5118 UPB_ASSERT(f->offset - kOneofBase < d->table->field_count);
5119 f = &d->fields[f->offset - kOneofBase];
5120 }
5121 }
5122
5123 // Assign offsets.
5124 for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5125 upb_MiniTable_Field* f = &d->fields[item->field_index];
5126 switch (item->type) {
5127 case kUpb_LayoutItemType_OneofField:
5128 while (true) {
5129 uint16_t next_offset = f->offset;
5130 f->offset = item->offset;
5131 if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
5132 f = &d->fields[next_offset - kOneofBase];
5133 }
5134 break;
5135 case kUpb_LayoutItemType_Field:
5136 f->offset = item->offset;
5137 break;
5138 default:
5139 break;
5140 }
5141 }
5142
5143 // The fasttable parser (supported on 64-bit only) depends on this being a
5144 // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
5145 //
5146 // On 32-bit we could potentially make this smaller, but there is no
5147 // compelling reason to optimize this right now.
5148 d->table->size = UPB_ALIGN_UP(d->table->size, 8);
5149 }
5150
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)5151 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
5152 upb_MiniTablePlatform platform,
5153 upb_Arena* arena, void** buf,
5154 size_t* buf_size,
5155 upb_Status* status) {
5156 upb_MtDecoder decoder = {
5157 .platform = platform,
5158 .vec =
5159 {
5160 .data = *buf,
5161 .capacity = *buf_size / sizeof(*decoder.vec.data),
5162 .size = 0,
5163 },
5164 .arena = arena,
5165 .status = status,
5166 .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
5167 };
5168
5169 if (UPB_SETJMP(decoder.err)) {
5170 decoder.table = NULL;
5171 goto done;
5172 }
5173
5174 upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
5175
5176 decoder.table->size = 0;
5177 decoder.table->field_count = 0;
5178 decoder.table->ext = kUpb_ExtMode_NonExtendable;
5179 decoder.table->dense_below = 0;
5180 decoder.table->table_mask = -1;
5181 decoder.table->required_count = 0;
5182
5183 upb_MtDecoder_ParseMessage(&decoder, data, len);
5184 upb_MtDecoder_AssignHasbits(decoder.table);
5185 upb_MtDecoder_SortLayoutItems(&decoder);
5186 upb_MtDecoder_AssignOffsets(&decoder);
5187
5188 done:
5189 *buf = decoder.vec.data;
5190 *buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data);
5191 return decoder.table;
5192 }
5193
upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,upb_Arena * arena)5194 upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
5195 upb_Arena* arena) {
5196 upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
5197 if (!ret) return NULL;
5198
5199 ret->size = 0;
5200 ret->field_count = 0;
5201 ret->ext = kUpb_ExtMode_IsMessageSet;
5202 ret->dense_below = 0;
5203 ret->table_mask = -1;
5204 ret->required_count = 0;
5205 return ret;
5206 }
5207
upb_MiniTable_BuildMapEntry(upb_FieldType key_type,upb_FieldType value_type,bool value_is_proto3_enum,upb_MiniTablePlatform platform,upb_Arena * arena)5208 upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
5209 upb_FieldType value_type,
5210 bool value_is_proto3_enum,
5211 upb_MiniTablePlatform platform,
5212 upb_Arena* arena) {
5213 upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
5214 upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2);
5215 if (!ret || !fields) return NULL;
5216
5217 upb_MiniTable_Sub* subs = NULL;
5218 if (value_is_proto3_enum) value_type = kUpb_FieldType_Int32;
5219 if (value_type == kUpb_FieldType_Message ||
5220 value_type == kUpb_FieldType_Group || value_type == kUpb_FieldType_Enum) {
5221 subs = upb_Arena_Malloc(arena, sizeof(*subs));
5222 if (!subs) return NULL;
5223 }
5224
5225 size_t field_size =
5226 upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform);
5227
5228 fields[0].number = 1;
5229 fields[1].number = 2;
5230 fields[0].mode = kUpb_FieldMode_Scalar;
5231 fields[1].mode = kUpb_FieldMode_Scalar;
5232 fields[0].presence = 0;
5233 fields[1].presence = 0;
5234 fields[0].offset = 0;
5235 fields[1].offset = field_size;
5236
5237 upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0);
5238 upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0);
5239
5240 ret->size = UPB_ALIGN_UP(2 * field_size, 8);
5241 ret->field_count = 2;
5242 ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
5243 ret->dense_below = 2;
5244 ret->table_mask = -1;
5245 ret->required_count = 0;
5246 ret->subs = subs;
5247 ret->fields = fields;
5248 return ret;
5249 }
5250
upb_MiniTable_BuildEnumValue(upb_MtDecoder * d,upb_MiniTable_Enum * table,uint32_t val,upb_Arena * arena)5251 static bool upb_MiniTable_BuildEnumValue(upb_MtDecoder* d,
5252 upb_MiniTable_Enum* table,
5253 uint32_t val, upb_Arena* arena) {
5254 if (val < 64) {
5255 table->mask |= 1ULL << val;
5256 return true;
5257 }
5258
5259 int32_t* values = (void*)table->values;
5260 values = upb_Arena_Realloc(arena, values, table->value_count * 4,
5261 (table->value_count + 1) * 4);
5262 upb_MtDecoder_CheckOutOfMemory(d, values);
5263 values[table->value_count++] = (int32_t)val;
5264 table->values = values;
5265 return true;
5266 }
5267
upb_MiniTable_BuildEnum(const char * data,size_t len,upb_Arena * arena,upb_Status * status)5268 upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
5269 upb_Arena* arena,
5270 upb_Status* status) {
5271 upb_MtDecoder d = {
5272 .status = status,
5273 .end = UPB_PTRADD(data, len),
5274 };
5275
5276 if (UPB_SETJMP(d.err)) {
5277 return NULL;
5278 }
5279
5280 upb_MiniTable_Enum* table = upb_Arena_Malloc(arena, sizeof(*table));
5281 upb_MtDecoder_CheckOutOfMemory(&d, table);
5282
5283 table->mask = 0;
5284 table->value_count = 0;
5285 table->values = NULL;
5286
5287 const char* ptr = data;
5288 uint32_t base = 0;
5289
5290 while (ptr < d.end) {
5291 char ch = *ptr++;
5292 if (ch <= kUpb_EncodedValue_MaxEnumMask) {
5293 uint32_t mask = upb_FromBase92(ch);
5294 for (int i = 0; i < 5; i++, base++, mask >>= 1) {
5295 if (mask & 1) {
5296 if (!upb_MiniTable_BuildEnumValue(&d, table, base, arena)) {
5297 return NULL;
5298 }
5299 }
5300 }
5301 } else if (kUpb_EncodedValue_MinSkip <= ch &&
5302 ch <= kUpb_EncodedValue_MaxSkip) {
5303 uint32_t skip;
5304 ptr = upb_MiniTable_DecodeBase92Varint(&d, ptr, ch,
5305 kUpb_EncodedValue_MinSkip,
5306 kUpb_EncodedValue_MaxSkip, &skip);
5307 base += skip;
5308 } else {
5309 upb_Status_SetErrorFormat(status, "Unexpected character: %c", ch);
5310 return NULL;
5311 }
5312 }
5313
5314 return table;
5315 }
5316
upb_MiniTable_BuildExtension(const char * data,size_t len,upb_MiniTable_Extension * ext,upb_MiniTable_Sub sub,upb_Status * status)5317 bool upb_MiniTable_BuildExtension(const char* data, size_t len,
5318 upb_MiniTable_Extension* ext,
5319 upb_MiniTable_Sub sub, upb_Status* status) {
5320 upb_MtDecoder decoder = {
5321 .arena = NULL,
5322 .status = status,
5323 .table = NULL,
5324 };
5325
5326 if (UPB_SETJMP(decoder.err)) {
5327 return false;
5328 }
5329
5330 uint16_t count = 0;
5331 upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL);
5332 ext->field.mode |= kUpb_LabelFlags_IsExtension;
5333 ext->field.offset = 0;
5334 return true;
5335 }
5336
upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)5337 upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
5338 upb_MiniTablePlatform platform,
5339 upb_Arena* arena, upb_Status* status) {
5340 void* buf = NULL;
5341 size_t size = 0;
5342 upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
5343 &buf, &size, status);
5344 free(buf);
5345 return ret;
5346 }
5347
upb_MiniTable_SetSubMessage(upb_MiniTable * table,upb_MiniTable_Field * field,const upb_MiniTable * sub)5348 void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
5349 upb_MiniTable_Field* field,
5350 const upb_MiniTable* sub) {
5351 UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
5352 (uintptr_t)field <
5353 (uintptr_t)(table->fields + table->field_count));
5354 if (sub->ext & kUpb_ExtMode_IsMapEntry) {
5355 field->mode =
5356 (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift) | kUpb_FieldMode_Map;
5357 }
5358 upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
5359 table_sub->submsg = sub;
5360 }
5361
upb_MiniTable_SetSubEnum(upb_MiniTable * table,upb_MiniTable_Field * field,const upb_MiniTable_Enum * sub)5362 void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
5363 const upb_MiniTable_Enum* sub) {
5364 UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
5365 (uintptr_t)field <
5366 (uintptr_t)(table->fields + table->field_count));
5367 upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
5368 table_sub->subenum = sub;
5369 }
5370
5371 /** upb/def.c ************************************************************/
5372
5373 #include <ctype.h>
5374 #include <errno.h>
5375 #include <setjmp.h>
5376 #include <stdlib.h>
5377 #include <string.h>
5378
5379
5380 /* Must be last. */
5381
5382 typedef struct {
5383 size_t len;
5384 char str[1]; /* Null-terminated string data follows. */
5385 } str_t;
5386
5387 /* The upb core does not generally have a concept of default instances. However
5388 * for descriptor options we make an exception since the max size is known and
5389 * modest (<200 bytes). All types can share a default instance since it is
5390 * initialized to zeroes.
5391 *
5392 * We have to allocate an extra pointer for upb's internal metadata. */
5393 static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
5394 static const char* opt_default = &opt_default_buf[sizeof(void*)];
5395
5396 struct upb_FieldDef {
5397 const google_protobuf_FieldOptions* opts;
5398 const upb_FileDef* file;
5399 const upb_MessageDef* msgdef;
5400 const char* full_name;
5401 const char* json_name;
5402 union {
5403 int64_t sint;
5404 uint64_t uint;
5405 double dbl;
5406 float flt;
5407 bool boolean;
5408 str_t* str;
5409 } defaultval;
5410 union {
5411 const upb_OneofDef* oneof;
5412 const upb_MessageDef* extension_scope;
5413 } scope;
5414 union {
5415 const upb_MessageDef* msgdef;
5416 const upb_EnumDef* enumdef;
5417 const google_protobuf_FieldDescriptorProto* unresolved;
5418 } sub;
5419 uint32_t number_;
5420 uint16_t index_;
5421 uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */
5422 bool has_default;
5423 bool is_extension_;
5424 bool packed_;
5425 bool proto3_optional_;
5426 bool has_json_name_;
5427 upb_FieldType type_;
5428 upb_Label label_;
5429 #if UINTPTR_MAX == 0xffffffff
5430 uint32_t padding; // Increase size to a multiple of 8.
5431 #endif
5432 };
5433
5434 struct upb_ExtensionRange {
5435 const google_protobuf_ExtensionRangeOptions* opts;
5436 int32_t start;
5437 int32_t end;
5438 };
5439
5440 struct upb_MessageDef {
5441 const google_protobuf_MessageOptions* opts;
5442 const upb_MiniTable* layout;
5443 const upb_FileDef* file;
5444 const upb_MessageDef* containing_type;
5445 const char* full_name;
5446
5447 /* Tables for looking up fields by number and name. */
5448 upb_inttable itof;
5449 upb_strtable ntof;
5450
5451 /* All nested defs.
5452 * MEM: We could save some space here by putting nested defs in a contiguous
5453 * region and calculating counts from offsets or vice-versa. */
5454 const upb_FieldDef* fields;
5455 const upb_OneofDef* oneofs;
5456 const upb_ExtensionRange* ext_ranges;
5457 const upb_StringView* res_names;
5458 const upb_MessageDef* nested_msgs;
5459 const upb_MessageReservedRange* res_ranges;
5460 const upb_EnumDef* nested_enums;
5461 const upb_FieldDef* nested_exts;
5462 int field_count;
5463 int real_oneof_count;
5464 int oneof_count;
5465 int ext_range_count;
5466 int res_range_count;
5467 int res_name_count;
5468 int nested_msg_count;
5469 int nested_enum_count;
5470 int nested_ext_count;
5471 bool in_message_set;
5472 upb_WellKnown well_known_type;
5473 #if UINTPTR_MAX == 0xffffffff
5474 uint32_t padding; // Increase size to a multiple of 8.
5475 #endif
5476 };
5477
5478 struct upb_EnumDef {
5479 const google_protobuf_EnumOptions* opts;
5480 const upb_MiniTable_Enum* layout; // Only for proto2.
5481 const upb_FileDef* file;
5482 const upb_MessageDef* containing_type; // Could be merged with "file".
5483 const char* full_name;
5484 upb_strtable ntoi;
5485 upb_inttable iton;
5486 const upb_EnumValueDef* values;
5487 const upb_EnumReservedRange* res_ranges;
5488 const upb_StringView* res_names;
5489 int value_count;
5490 int res_range_count;
5491 int res_name_count;
5492 int32_t defaultval;
5493 #if UINTPTR_MAX == 0xffffffff
5494 uint32_t padding; // Increase size to a multiple of 8.
5495 #endif
5496 };
5497
5498 struct upb_EnumValueDef {
5499 const google_protobuf_EnumValueOptions* opts;
5500 const upb_EnumDef* parent;
5501 const char* full_name;
5502 int32_t number;
5503 };
5504
5505 struct upb_OneofDef {
5506 const google_protobuf_OneofOptions* opts;
5507 const upb_MessageDef* parent;
5508 const char* full_name;
5509 int field_count;
5510 bool synthetic;
5511 const upb_FieldDef** fields;
5512 upb_strtable ntof;
5513 upb_inttable itof;
5514 #if UINTPTR_MAX == 0xffffffff
5515 uint32_t padding; // Increase size to a multiple of 8.
5516 #endif
5517 };
5518
5519 struct upb_FileDef {
5520 const google_protobuf_FileOptions* opts;
5521 const char* name;
5522 const char* package;
5523
5524 const upb_FileDef** deps;
5525 const int32_t* public_deps;
5526 const int32_t* weak_deps;
5527 const upb_MessageDef* top_lvl_msgs;
5528 const upb_EnumDef* top_lvl_enums;
5529 const upb_FieldDef* top_lvl_exts;
5530 const upb_ServiceDef* services;
5531 const upb_MiniTable_Extension** ext_layouts;
5532 const upb_DefPool* symtab;
5533
5534 int dep_count;
5535 int public_dep_count;
5536 int weak_dep_count;
5537 int top_lvl_msg_count;
5538 int top_lvl_enum_count;
5539 int top_lvl_ext_count;
5540 int service_count;
5541 int ext_count; /* All exts in the file. */
5542 upb_Syntax syntax;
5543 };
5544
5545 struct upb_MethodDef {
5546 const google_protobuf_MethodOptions* opts;
5547 upb_ServiceDef* service;
5548 const char* full_name;
5549 const upb_MessageDef* input_type;
5550 const upb_MessageDef* output_type;
5551 int index;
5552 bool client_streaming;
5553 bool server_streaming;
5554 };
5555
5556 struct upb_ServiceDef {
5557 const google_protobuf_ServiceOptions* opts;
5558 const upb_FileDef* file;
5559 const char* full_name;
5560 upb_MethodDef* methods;
5561 int method_count;
5562 int index;
5563 };
5564
5565 struct upb_DefPool {
5566 upb_Arena* arena;
5567 upb_strtable syms; /* full_name -> packed def ptr */
5568 upb_strtable files; /* file_name -> upb_FileDef* */
5569 upb_inttable exts; /* upb_MiniTable_Extension* -> upb_FieldDef* */
5570 upb_ExtensionRegistry* extreg;
5571 size_t bytes_loaded;
5572 };
5573
5574 /* Inside a symtab we store tagged pointers to specific def types. */
5575 typedef enum {
5576 UPB_DEFTYPE_MASK = 7,
5577
5578 /* Only inside symtab table. */
5579 UPB_DEFTYPE_EXT = 0,
5580 UPB_DEFTYPE_MSG = 1,
5581 UPB_DEFTYPE_ENUM = 2,
5582 UPB_DEFTYPE_ENUMVAL = 3,
5583 UPB_DEFTYPE_SERVICE = 4,
5584
5585 /* Only inside message table. */
5586 UPB_DEFTYPE_FIELD = 0,
5587 UPB_DEFTYPE_ONEOF = 1,
5588 UPB_DEFTYPE_FIELD_JSONNAME = 2,
5589
5590 /* Only inside file table. */
5591 UPB_DEFTYPE_FILE = 0,
5592 UPB_DEFTYPE_LAYOUT = 1
5593 } upb_deftype_t;
5594
5595 #define FIELD_TYPE_UNSPECIFIED 0
5596
5597 struct upb_MessageReservedRange {
5598 int32_t start;
5599 int32_t end;
5600 };
5601
5602 struct symtab_addctx {
5603 upb_DefPool* symtab;
5604 upb_FileDef* file; /* File we are building. */
5605 upb_Arena* arena; /* Allocate defs here. */
5606 upb_Arena* tmp_arena; /* For temporary allocations. */
5607 const upb_MiniTable_File* layout; /* NULL if we should build layouts. */
5608 int enum_count; /* Count of enums built so far. */
5609 int msg_count; /* Count of messages built so far. */
5610 int ext_count; /* Count of extensions built so far. */
5611 upb_Status* status; /* Record errors here. */
5612 jmp_buf err; /* longjmp() on error. */
5613 };
5614
deftype(upb_value v)5615 static upb_deftype_t deftype(upb_value v) {
5616 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
5617 return num & UPB_DEFTYPE_MASK;
5618 }
5619
unpack_def(upb_value v,upb_deftype_t type)5620 static const void* unpack_def(upb_value v, upb_deftype_t type) {
5621 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
5622 return (num & UPB_DEFTYPE_MASK) == type
5623 ? (const void*)(num & ~UPB_DEFTYPE_MASK)
5624 : NULL;
5625 }
5626
pack_def(const void * ptr,upb_deftype_t type)5627 static upb_value pack_def(const void* ptr, upb_deftype_t type) {
5628 // Our 3-bit pointer tagging requires all pointers to be multiples of 8.
5629 // The arena will always yield 8-byte-aligned addresses, however we put
5630 // the defs into arrays. For each element in the array to be 8-byte-aligned,
5631 // the sizes of each def type must also be a multiple of 8.
5632 //
5633 // If any of these asserts fail, we need to add or remove padding on 32-bit
5634 // machines (64-bit machines will have 8-byte alignment already due to
5635 // pointers, which all of these structs have).
5636 UPB_ASSERT((sizeof(upb_FieldDef) & UPB_DEFTYPE_MASK) == 0);
5637 UPB_ASSERT((sizeof(upb_MessageDef) & UPB_DEFTYPE_MASK) == 0);
5638 UPB_ASSERT((sizeof(upb_EnumDef) & UPB_DEFTYPE_MASK) == 0);
5639 UPB_ASSERT((sizeof(upb_EnumValueDef) & UPB_DEFTYPE_MASK) == 0);
5640 UPB_ASSERT((sizeof(upb_ServiceDef) & UPB_DEFTYPE_MASK) == 0);
5641 UPB_ASSERT((sizeof(upb_OneofDef) & UPB_DEFTYPE_MASK) == 0);
5642 uintptr_t num = (uintptr_t)ptr;
5643 UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
5644 num |= type;
5645 return upb_value_constptr((const void*)num);
5646 }
5647
5648 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)5649 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
5650 return c >= low && c <= high;
5651 }
5652
upb_ascii_lower(char ch)5653 static char upb_ascii_lower(char ch) {
5654 // Per ASCII this will lower-case a letter. If the result is a letter, the
5655 // input was definitely a letter. If the output is not a letter, this may
5656 // have transformed the character unpredictably.
5657 return ch | 0x20;
5658 }
5659
upb_isletter(char c)5660 static bool upb_isletter(char c) {
5661 char lower = upb_ascii_lower(c);
5662 return upb_isbetween(lower, 'a', 'z') || c == '_';
5663 }
5664
upb_isalphanum(char c)5665 static bool upb_isalphanum(char c) {
5666 return upb_isletter(c) || upb_isbetween(c, '0', '9');
5667 }
5668
shortdefname(const char * fullname)5669 static const char* shortdefname(const char* fullname) {
5670 const char* p;
5671
5672 if (fullname == NULL) {
5673 return NULL;
5674 } else if ((p = strrchr(fullname, '.')) == NULL) {
5675 /* No '.' in the name, return the full string. */
5676 return fullname;
5677 } else {
5678 /* Return one past the last '.'. */
5679 return p + 1;
5680 }
5681 }
5682
5683 /* All submessage fields are lower than all other fields.
5684 * Secondly, fields are increasing in order. */
field_rank(const upb_FieldDef * f)5685 uint32_t field_rank(const upb_FieldDef* f) {
5686 uint32_t ret = upb_FieldDef_Number(f);
5687 const uint32_t high_bit = 1 << 30;
5688 UPB_ASSERT(ret < high_bit);
5689 if (!upb_FieldDef_IsSubMessage(f)) ret |= high_bit;
5690 return ret;
5691 }
5692
cmp_fields(const void * p1,const void * p2)5693 int cmp_fields(const void* p1, const void* p2) {
5694 const upb_FieldDef* f1 = *(upb_FieldDef* const*)p1;
5695 const upb_FieldDef* f2 = *(upb_FieldDef* const*)p2;
5696 return field_rank(f1) - field_rank(f2);
5697 }
5698
upb_Status_setoom(upb_Status * status)5699 static void upb_Status_setoom(upb_Status* status) {
5700 upb_Status_SetErrorMessage(status, "out of memory");
5701 }
5702
assign_msg_wellknowntype(upb_MessageDef * m)5703 static void assign_msg_wellknowntype(upb_MessageDef* m) {
5704 const char* name = upb_MessageDef_FullName(m);
5705 if (name == NULL) {
5706 m->well_known_type = kUpb_WellKnown_Unspecified;
5707 return;
5708 }
5709 if (!strcmp(name, "google.protobuf.Any")) {
5710 m->well_known_type = kUpb_WellKnown_Any;
5711 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
5712 m->well_known_type = kUpb_WellKnown_FieldMask;
5713 } else if (!strcmp(name, "google.protobuf.Duration")) {
5714 m->well_known_type = kUpb_WellKnown_Duration;
5715 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
5716 m->well_known_type = kUpb_WellKnown_Timestamp;
5717 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
5718 m->well_known_type = kUpb_WellKnown_DoubleValue;
5719 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
5720 m->well_known_type = kUpb_WellKnown_FloatValue;
5721 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
5722 m->well_known_type = kUpb_WellKnown_Int64Value;
5723 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
5724 m->well_known_type = kUpb_WellKnown_UInt64Value;
5725 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
5726 m->well_known_type = kUpb_WellKnown_Int32Value;
5727 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
5728 m->well_known_type = kUpb_WellKnown_UInt32Value;
5729 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
5730 m->well_known_type = kUpb_WellKnown_BoolValue;
5731 } else if (!strcmp(name, "google.protobuf.StringValue")) {
5732 m->well_known_type = kUpb_WellKnown_StringValue;
5733 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
5734 m->well_known_type = kUpb_WellKnown_BytesValue;
5735 } else if (!strcmp(name, "google.protobuf.Value")) {
5736 m->well_known_type = kUpb_WellKnown_Value;
5737 } else if (!strcmp(name, "google.protobuf.ListValue")) {
5738 m->well_known_type = kUpb_WellKnown_ListValue;
5739 } else if (!strcmp(name, "google.protobuf.Struct")) {
5740 m->well_known_type = kUpb_WellKnown_Struct;
5741 } else {
5742 m->well_known_type = kUpb_WellKnown_Unspecified;
5743 }
5744 }
5745
5746 /* upb_EnumDef ****************************************************************/
5747
upb_EnumDef_Options(const upb_EnumDef * e)5748 const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) {
5749 return e->opts;
5750 }
5751
upb_EnumDef_HasOptions(const upb_EnumDef * e)5752 bool upb_EnumDef_HasOptions(const upb_EnumDef* e) {
5753 return e->opts != (void*)opt_default;
5754 }
5755
upb_EnumDef_FullName(const upb_EnumDef * e)5756 const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; }
5757
upb_EnumDef_Name(const upb_EnumDef * e)5758 const char* upb_EnumDef_Name(const upb_EnumDef* e) {
5759 return shortdefname(e->full_name);
5760 }
5761
upb_EnumDef_File(const upb_EnumDef * e)5762 const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; }
5763
upb_EnumDef_ContainingType(const upb_EnumDef * e)5764 const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) {
5765 return e->containing_type;
5766 }
5767
upb_EnumDef_Default(const upb_EnumDef * e)5768 int32_t upb_EnumDef_Default(const upb_EnumDef* e) {
5769 UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval));
5770 return e->defaultval;
5771 }
5772
upb_EnumDef_ReservedRangeCount(const upb_EnumDef * e)5773 int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) {
5774 return e->res_range_count;
5775 }
5776
5777 /* upb_EnumReservedRange ******************************************************/
5778
5779 struct upb_EnumReservedRange {
5780 int32_t start;
5781 int32_t end;
5782 };
5783
_upb_EnumReservedRange_At(const upb_EnumReservedRange * r,int i)5784 upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r,
5785 int i) {
5786 return (upb_EnumReservedRange*)&r[i];
5787 }
5788
upb_EnumReservedRange_Start(const upb_EnumReservedRange * r)5789 int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) {
5790 return r->start;
5791 }
upb_EnumReservedRange_End(const upb_EnumReservedRange * r)5792 int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) {
5793 return r->end;
5794 }
5795
symtab_errf(symtab_addctx * ctx,const char * fmt,...)5796 UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) static void symtab_errf(
5797 symtab_addctx* ctx, const char* fmt, ...) {
5798 va_list argp;
5799 va_start(argp, fmt);
5800 upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
5801 va_end(argp);
5802 UPB_LONGJMP(ctx->err, 1);
5803 }
5804
_upb_EnumReservedRanges_New(symtab_addctx * ctx,int n,const google_protobuf_EnumDescriptorProto_EnumReservedRange * const * protos,const upb_EnumDef * e)5805 upb_EnumReservedRange* _upb_EnumReservedRanges_New(
5806 symtab_addctx* ctx, int n,
5807 const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* protos,
5808 const upb_EnumDef* e) {
5809 upb_EnumReservedRange* r =
5810 upb_Arena_Malloc(ctx->arena, sizeof(upb_EnumReservedRange) * n);
5811
5812 for (int i = 0; i < n; i++) {
5813 const int32_t start =
5814 google_protobuf_EnumDescriptorProto_EnumReservedRange_start(protos[i]);
5815 const int32_t end =
5816 google_protobuf_EnumDescriptorProto_EnumReservedRange_end(protos[i]);
5817
5818 // A full validation would also check that each range is disjoint, and that
5819 // none of the fields overlap with the extension ranges, but we are just
5820 // sanity checking here.
5821
5822 // Note: Not a typo! Unlike extension ranges and message reserved ranges,
5823 // the end value of an enum reserved range is *inclusive*!
5824 if (end < start) {
5825 symtab_errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n",
5826 (int)start, (int)end, upb_EnumDef_FullName(e));
5827 }
5828
5829 r[i].start = start;
5830 r[i].end = end;
5831 }
5832
5833 return r;
5834 }
5835
upb_EnumDef_ReservedRange(const upb_EnumDef * e,int i)5836 const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e,
5837 int i) {
5838 UPB_ASSERT(0 <= i && i < e->res_range_count);
5839 return _upb_EnumReservedRange_At(e->res_ranges, i);
5840 }
5841
upb_EnumDef_ReservedNameCount(const upb_EnumDef * e)5842 int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) {
5843 return e->res_name_count;
5844 }
5845
upb_EnumDef_ReservedName(const upb_EnumDef * e,int i)5846 upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) {
5847 UPB_ASSERT(0 <= i && i < e->res_name_count);
5848 return e->res_names[i];
5849 }
5850
upb_EnumDef_ValueCount(const upb_EnumDef * e)5851 int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }
5852
upb_EnumDef_FindValueByNameWithSize(const upb_EnumDef * def,const char * name,size_t len)5853 const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
5854 const upb_EnumDef* def, const char* name, size_t len) {
5855 upb_value v;
5856 return upb_strtable_lookup2(&def->ntoi, name, len, &v)
5857 ? upb_value_getconstptr(v)
5858 : NULL;
5859 }
5860
upb_EnumDef_FindValueByNumber(const upb_EnumDef * def,int32_t num)5861 const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* def,
5862 int32_t num) {
5863 upb_value v;
5864 return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getconstptr(v)
5865 : NULL;
5866 }
5867
upb_EnumDef_CheckNumber(const upb_EnumDef * e,int32_t num)5868 bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) {
5869 // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect
5870 // this to be faster (especially for small numbers).
5871 return upb_MiniTable_Enum_CheckValue(e->layout, num);
5872 }
5873
upb_EnumDef_Value(const upb_EnumDef * e,int i)5874 const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) {
5875 UPB_ASSERT(0 <= i && i < e->value_count);
5876 return &e->values[i];
5877 }
5878
5879 /* upb_EnumValueDef ***********************************************************/
5880
upb_EnumValueDef_Options(const upb_EnumValueDef * e)5881 const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options(
5882 const upb_EnumValueDef* e) {
5883 return e->opts;
5884 }
5885
upb_EnumValueDef_HasOptions(const upb_EnumValueDef * e)5886 bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e) {
5887 return e->opts != (void*)opt_default;
5888 }
5889
upb_EnumValueDef_Enum(const upb_EnumValueDef * ev)5890 const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* ev) {
5891 return ev->parent;
5892 }
5893
upb_EnumValueDef_FullName(const upb_EnumValueDef * ev)5894 const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* ev) {
5895 return ev->full_name;
5896 }
5897
upb_EnumValueDef_Name(const upb_EnumValueDef * ev)5898 const char* upb_EnumValueDef_Name(const upb_EnumValueDef* ev) {
5899 return shortdefname(ev->full_name);
5900 }
5901
upb_EnumValueDef_Number(const upb_EnumValueDef * ev)5902 int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* ev) {
5903 return ev->number;
5904 }
5905
upb_EnumValueDef_Index(const upb_EnumValueDef * ev)5906 uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* ev) {
5907 // Compute index in our parent's array.
5908 return ev - ev->parent->values;
5909 }
5910
5911 /* upb_ExtensionRange
5912 * ***************************************************************/
5913
upb_ExtensionRange_Options(const upb_ExtensionRange * r)5914 const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options(
5915 const upb_ExtensionRange* r) {
5916 return r->opts;
5917 }
5918
upb_ExtensionRange_HasOptions(const upb_ExtensionRange * r)5919 bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) {
5920 return r->opts != (void*)opt_default;
5921 }
5922
upb_ExtensionRange_Start(const upb_ExtensionRange * e)5923 int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* e) {
5924 return e->start;
5925 }
5926
upb_ExtensionRange_End(const upb_ExtensionRange * e)5927 int32_t upb_ExtensionRange_End(const upb_ExtensionRange* e) { return e->end; }
5928
5929 /* upb_FieldDef ***************************************************************/
5930
upb_FieldDef_Options(const upb_FieldDef * f)5931 const google_protobuf_FieldOptions* upb_FieldDef_Options(
5932 const upb_FieldDef* f) {
5933 return f->opts;
5934 }
5935
upb_FieldDef_HasOptions(const upb_FieldDef * f)5936 bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
5937 return f->opts != (void*)opt_default;
5938 }
5939
upb_FieldDef_FullName(const upb_FieldDef * f)5940 const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
5941 return f->full_name;
5942 }
5943
upb_FieldDef_CType(const upb_FieldDef * f)5944 upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
5945 switch (f->type_) {
5946 case kUpb_FieldType_Double:
5947 return kUpb_CType_Double;
5948 case kUpb_FieldType_Float:
5949 return kUpb_CType_Float;
5950 case kUpb_FieldType_Int64:
5951 case kUpb_FieldType_SInt64:
5952 case kUpb_FieldType_SFixed64:
5953 return kUpb_CType_Int64;
5954 case kUpb_FieldType_Int32:
5955 case kUpb_FieldType_SFixed32:
5956 case kUpb_FieldType_SInt32:
5957 return kUpb_CType_Int32;
5958 case kUpb_FieldType_UInt64:
5959 case kUpb_FieldType_Fixed64:
5960 return kUpb_CType_UInt64;
5961 case kUpb_FieldType_UInt32:
5962 case kUpb_FieldType_Fixed32:
5963 return kUpb_CType_UInt32;
5964 case kUpb_FieldType_Enum:
5965 return kUpb_CType_Enum;
5966 case kUpb_FieldType_Bool:
5967 return kUpb_CType_Bool;
5968 case kUpb_FieldType_String:
5969 return kUpb_CType_String;
5970 case kUpb_FieldType_Bytes:
5971 return kUpb_CType_Bytes;
5972 case kUpb_FieldType_Group:
5973 case kUpb_FieldType_Message:
5974 return kUpb_CType_Message;
5975 }
5976 UPB_UNREACHABLE();
5977 }
5978
upb_FieldDef_Type(const upb_FieldDef * f)5979 upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
5980
upb_FieldDef_Index(const upb_FieldDef * f)5981 uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
5982
upb_FieldDef_Label(const upb_FieldDef * f)5983 upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
5984
upb_FieldDef_Number(const upb_FieldDef * f)5985 uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
5986
upb_FieldDef_IsExtension(const upb_FieldDef * f)5987 bool upb_FieldDef_IsExtension(const upb_FieldDef* f) {
5988 return f->is_extension_;
5989 }
5990
upb_FieldDef_IsPacked(const upb_FieldDef * f)5991 bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; }
5992
upb_FieldDef_Name(const upb_FieldDef * f)5993 const char* upb_FieldDef_Name(const upb_FieldDef* f) {
5994 return shortdefname(f->full_name);
5995 }
5996
upb_FieldDef_JsonName(const upb_FieldDef * f)5997 const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
5998 return f->json_name;
5999 }
6000
upb_FieldDef_HasJsonName(const upb_FieldDef * f)6001 bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
6002 return f->has_json_name_;
6003 }
6004
upb_FieldDef_File(const upb_FieldDef * f)6005 const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
6006
upb_FieldDef_ContainingType(const upb_FieldDef * f)6007 const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
6008 return f->msgdef;
6009 }
6010
upb_FieldDef_ExtensionScope(const upb_FieldDef * f)6011 const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
6012 return f->is_extension_ ? f->scope.extension_scope : NULL;
6013 }
6014
upb_FieldDef_ContainingOneof(const upb_FieldDef * f)6015 const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
6016 return f->is_extension_ ? NULL : f->scope.oneof;
6017 }
6018
upb_FieldDef_RealContainingOneof(const upb_FieldDef * f)6019 const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
6020 const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
6021 if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
6022 return oneof;
6023 }
6024
upb_FieldDef_Default(const upb_FieldDef * f)6025 upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
6026 UPB_ASSERT(!upb_FieldDef_IsSubMessage(f));
6027 upb_MessageValue ret;
6028
6029 switch (upb_FieldDef_CType(f)) {
6030 case kUpb_CType_Bool:
6031 return (upb_MessageValue){.bool_val = f->defaultval.boolean};
6032 case kUpb_CType_Int64:
6033 return (upb_MessageValue){.int64_val = f->defaultval.sint};
6034 case kUpb_CType_UInt64:
6035 return (upb_MessageValue){.uint64_val = f->defaultval.uint};
6036 case kUpb_CType_Enum:
6037 case kUpb_CType_Int32:
6038 return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
6039 case kUpb_CType_UInt32:
6040 return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
6041 case kUpb_CType_Float:
6042 return (upb_MessageValue){.float_val = f->defaultval.flt};
6043 case kUpb_CType_Double:
6044 return (upb_MessageValue){.double_val = f->defaultval.dbl};
6045 case kUpb_CType_String:
6046 case kUpb_CType_Bytes: {
6047 str_t* str = f->defaultval.str;
6048 if (str) {
6049 return (upb_MessageValue){
6050 .str_val = (upb_StringView){.data = str->str, .size = str->len}};
6051 } else {
6052 return (upb_MessageValue){
6053 .str_val = (upb_StringView){.data = NULL, .size = 0}};
6054 }
6055 }
6056 default:
6057 UPB_UNREACHABLE();
6058 }
6059
6060 return ret;
6061 }
6062
upb_FieldDef_MessageSubDef(const upb_FieldDef * f)6063 const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
6064 return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL;
6065 }
6066
upb_FieldDef_EnumSubDef(const upb_FieldDef * f)6067 const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
6068 return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL;
6069 }
6070
upb_FieldDef_MiniTable(const upb_FieldDef * f)6071 const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
6072 UPB_ASSERT(!upb_FieldDef_IsExtension(f));
6073 return &f->msgdef->layout->fields[f->layout_index];
6074 }
6075
_upb_FieldDef_ExtensionMiniTable(const upb_FieldDef * f)6076 const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable(
6077 const upb_FieldDef* f) {
6078 UPB_ASSERT(upb_FieldDef_IsExtension(f));
6079 return f->file->ext_layouts[f->layout_index];
6080 }
6081
_upb_FieldDef_IsProto3Optional(const upb_FieldDef * f)6082 bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
6083 return f->proto3_optional_;
6084 }
6085
upb_FieldDef_IsSubMessage(const upb_FieldDef * f)6086 bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
6087 return upb_FieldDef_CType(f) == kUpb_CType_Message;
6088 }
6089
upb_FieldDef_IsString(const upb_FieldDef * f)6090 bool upb_FieldDef_IsString(const upb_FieldDef* f) {
6091 return upb_FieldDef_CType(f) == kUpb_CType_String ||
6092 upb_FieldDef_CType(f) == kUpb_CType_Bytes;
6093 }
6094
upb_FieldDef_IsRepeated(const upb_FieldDef * f)6095 bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
6096 return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
6097 }
6098
upb_FieldDef_IsPrimitive(const upb_FieldDef * f)6099 bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
6100 return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
6101 }
6102
upb_FieldDef_IsMap(const upb_FieldDef * f)6103 bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
6104 return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
6105 upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
6106 }
6107
upb_FieldDef_HasDefault(const upb_FieldDef * f)6108 bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
6109
upb_FieldDef_HasSubDef(const upb_FieldDef * f)6110 bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
6111 return upb_FieldDef_IsSubMessage(f) ||
6112 upb_FieldDef_CType(f) == kUpb_CType_Enum;
6113 }
6114
upb_FieldDef_HasPresence(const upb_FieldDef * f)6115 bool upb_FieldDef_HasPresence(const upb_FieldDef* f) {
6116 if (upb_FieldDef_IsRepeated(f)) return false;
6117 return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) ||
6118 f->file->syntax == kUpb_Syntax_Proto2;
6119 }
6120
between(int32_t x,int32_t low,int32_t high)6121 static bool between(int32_t x, int32_t low, int32_t high) {
6122 return x >= low && x <= high;
6123 }
6124
upb_FieldDef_checklabel(int32_t label)6125 bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_FieldDef_checktype(int32_t type)6126 bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
upb_FieldDef_checkintfmt(int32_t fmt)6127 bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
6128
upb_FieldDef_checkdescriptortype(int32_t type)6129 bool upb_FieldDef_checkdescriptortype(int32_t type) {
6130 return between(type, 1, 18);
6131 }
6132
6133 /* upb_MessageDef
6134 * *****************************************************************/
6135
upb_MessageDef_Options(const upb_MessageDef * m)6136 const google_protobuf_MessageOptions* upb_MessageDef_Options(
6137 const upb_MessageDef* m) {
6138 return m->opts;
6139 }
6140
upb_MessageDef_HasOptions(const upb_MessageDef * m)6141 bool upb_MessageDef_HasOptions(const upb_MessageDef* m) {
6142 return m->opts != (void*)opt_default;
6143 }
6144
upb_MessageDef_FullName(const upb_MessageDef * m)6145 const char* upb_MessageDef_FullName(const upb_MessageDef* m) {
6146 return m->full_name;
6147 }
6148
upb_MessageDef_File(const upb_MessageDef * m)6149 const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) {
6150 return m->file;
6151 }
6152
upb_MessageDef_ContainingType(const upb_MessageDef * m)6153 const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) {
6154 return m->containing_type;
6155 }
6156
upb_MessageDef_Name(const upb_MessageDef * m)6157 const char* upb_MessageDef_Name(const upb_MessageDef* m) {
6158 return shortdefname(m->full_name);
6159 }
6160
upb_MessageDef_Syntax(const upb_MessageDef * m)6161 upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
6162 return m->file->syntax;
6163 }
6164
upb_MessageDef_FindFieldByNumber(const upb_MessageDef * m,uint32_t i)6165 const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
6166 uint32_t i) {
6167 upb_value val;
6168 return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
6169 : NULL;
6170 }
6171
upb_MessageDef_FindFieldByNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6172 const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
6173 const upb_MessageDef* m, const char* name, size_t len) {
6174 upb_value val;
6175
6176 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6177 return NULL;
6178 }
6179
6180 return unpack_def(val, UPB_DEFTYPE_FIELD);
6181 }
6182
upb_MessageDef_FindOneofByNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6183 const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
6184 const upb_MessageDef* m, const char* name, size_t len) {
6185 upb_value val;
6186
6187 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6188 return NULL;
6189 }
6190
6191 return unpack_def(val, UPB_DEFTYPE_ONEOF);
6192 }
6193
upb_MessageDef_FindByNameWithSize(const upb_MessageDef * m,const char * name,size_t len,const upb_FieldDef ** out_f,const upb_OneofDef ** out_o)6194 bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
6195 const char* name, size_t len,
6196 const upb_FieldDef** out_f,
6197 const upb_OneofDef** out_o) {
6198 upb_value val;
6199
6200 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6201 return false;
6202 }
6203
6204 const upb_FieldDef* f = unpack_def(val, UPB_DEFTYPE_FIELD);
6205 const upb_OneofDef* o = unpack_def(val, UPB_DEFTYPE_ONEOF);
6206 if (out_f) *out_f = f;
6207 if (out_o) *out_o = o;
6208 return f || o; /* False if this was a JSON name. */
6209 }
6210
upb_MessageDef_FindByJsonNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6211 const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
6212 const upb_MessageDef* m, const char* name, size_t len) {
6213 upb_value val;
6214 const upb_FieldDef* f;
6215
6216 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6217 return NULL;
6218 }
6219
6220 f = unpack_def(val, UPB_DEFTYPE_FIELD);
6221 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
6222
6223 return f;
6224 }
6225
upb_MessageDef_numfields(const upb_MessageDef * m)6226 int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; }
6227
upb_MessageDef_numoneofs(const upb_MessageDef * m)6228 int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; }
6229
upb_MessageDef_numrealoneofs(const upb_MessageDef * m)6230 int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) {
6231 return m->real_oneof_count;
6232 }
6233
upb_MessageDef_ExtensionRangeCount(const upb_MessageDef * m)6234 int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) {
6235 return m->ext_range_count;
6236 }
6237
upb_MessageDef_ReservedRangeCount(const upb_MessageDef * m)6238 int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) {
6239 return m->res_range_count;
6240 }
6241
upb_MessageDef_ReservedNameCount(const upb_MessageDef * m)6242 int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) {
6243 return m->res_name_count;
6244 }
6245
upb_MessageDef_FieldCount(const upb_MessageDef * m)6246 int upb_MessageDef_FieldCount(const upb_MessageDef* m) {
6247 return m->field_count;
6248 }
6249
upb_MessageDef_OneofCount(const upb_MessageDef * m)6250 int upb_MessageDef_OneofCount(const upb_MessageDef* m) {
6251 return m->oneof_count;
6252 }
6253
upb_MessageDef_NestedMessageCount(const upb_MessageDef * m)6254 int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) {
6255 return m->nested_msg_count;
6256 }
6257
upb_MessageDef_NestedEnumCount(const upb_MessageDef * m)6258 int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) {
6259 return m->nested_enum_count;
6260 }
6261
upb_MessageDef_NestedExtensionCount(const upb_MessageDef * m)6262 int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) {
6263 return m->nested_ext_count;
6264 }
6265
upb_MessageDef_realoneofcount(const upb_MessageDef * m)6266 int upb_MessageDef_realoneofcount(const upb_MessageDef* m) {
6267 return m->real_oneof_count;
6268 }
6269
upb_MessageDef_MiniTable(const upb_MessageDef * m)6270 const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) {
6271 return m->layout;
6272 }
6273
upb_MessageDef_ExtensionRange(const upb_MessageDef * m,int i)6274 const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
6275 int i) {
6276 UPB_ASSERT(0 <= i && i < m->ext_range_count);
6277 return &m->ext_ranges[i];
6278 }
6279
_upb_MessageReservedRange_At(const upb_MessageReservedRange * r,int i)6280 upb_MessageReservedRange* _upb_MessageReservedRange_At(
6281 const upb_MessageReservedRange* r, int i) {
6282 return (upb_MessageReservedRange*)&r[i];
6283 }
6284
upb_MessageDef_ReservedRange(const upb_MessageDef * m,int i)6285 const upb_MessageReservedRange* upb_MessageDef_ReservedRange(
6286 const upb_MessageDef* m, int i) {
6287 UPB_ASSERT(0 <= i && i < m->res_range_count);
6288 return _upb_MessageReservedRange_At(m->res_ranges, i);
6289 }
6290
upb_MessageDef_ReservedName(const upb_MessageDef * m,int i)6291 upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) {
6292 UPB_ASSERT(0 <= i && i < m->res_name_count);
6293 return m->res_names[i];
6294 }
6295
upb_MessageReservedRange_Start(const upb_MessageReservedRange * r)6296 int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) {
6297 return r->start;
6298 }
upb_MessageReservedRange_End(const upb_MessageReservedRange * r)6299 int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) {
6300 return r->end;
6301 }
6302
_upb_MessageReservedRanges_New(symtab_addctx * ctx,int n,const google_protobuf_DescriptorProto_ReservedRange * const * protos,const upb_MessageDef * m)6303 upb_MessageReservedRange* _upb_MessageReservedRanges_New(
6304 symtab_addctx* ctx, int n,
6305 const google_protobuf_DescriptorProto_ReservedRange* const* protos,
6306 const upb_MessageDef* m) {
6307 upb_MessageReservedRange* r =
6308 upb_Arena_Malloc(ctx->arena, sizeof(upb_MessageReservedRange) * n);
6309
6310 for (int i = 0; i < n; i++) {
6311 const int32_t start = google_protobuf_DescriptorProto_ReservedRange_start(protos[i]);
6312 const int32_t end = google_protobuf_DescriptorProto_ReservedRange_end(protos[i]);
6313 const int32_t max = kUpb_MaxFieldNumber + 1;
6314
6315 // A full validation would also check that each range is disjoint, and that
6316 // none of the fields overlap with the extension ranges, but we are just
6317 // sanity checking here.
6318 if (start < 1 || end <= start || end > max) {
6319 symtab_errf(ctx,
6320 "Reserved range (%d, %d) is invalid, message=%s\n",
6321 (int)start, (int)end, upb_MessageDef_FullName(m));
6322 }
6323
6324 r[i].start = start;
6325 r[i].end = end;
6326 }
6327
6328 return r;
6329 }
6330
upb_MessageDef_Field(const upb_MessageDef * m,int i)6331 const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) {
6332 UPB_ASSERT(0 <= i && i < m->field_count);
6333 return &m->fields[i];
6334 }
6335
upb_MessageDef_Oneof(const upb_MessageDef * m,int i)6336 const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) {
6337 UPB_ASSERT(0 <= i && i < m->oneof_count);
6338 return &m->oneofs[i];
6339 }
6340
upb_MessageDef_NestedMessage(const upb_MessageDef * m,int i)6341 const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
6342 int i) {
6343 UPB_ASSERT(0 <= i && i < m->nested_msg_count);
6344 return &m->nested_msgs[i];
6345 }
6346
upb_MessageDef_NestedEnum(const upb_MessageDef * m,int i)6347 const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) {
6348 UPB_ASSERT(0 <= i && i < m->nested_enum_count);
6349 return &m->nested_enums[i];
6350 }
6351
upb_MessageDef_NestedExtension(const upb_MessageDef * m,int i)6352 const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
6353 int i) {
6354 UPB_ASSERT(0 <= i && i < m->nested_ext_count);
6355 return &m->nested_exts[i];
6356 }
6357
upb_MessageDef_WellKnownType(const upb_MessageDef * m)6358 upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) {
6359 return m->well_known_type;
6360 }
6361
6362 /* upb_OneofDef ***************************************************************/
6363
upb_OneofDef_Options(const upb_OneofDef * o)6364 const google_protobuf_OneofOptions* upb_OneofDef_Options(
6365 const upb_OneofDef* o) {
6366 return o->opts;
6367 }
6368
upb_OneofDef_HasOptions(const upb_OneofDef * o)6369 bool upb_OneofDef_HasOptions(const upb_OneofDef* o) {
6370 return o->opts != (void*)opt_default;
6371 }
6372
upb_OneofDef_Name(const upb_OneofDef * o)6373 const char* upb_OneofDef_Name(const upb_OneofDef* o) {
6374 return shortdefname(o->full_name);
6375 }
6376
upb_OneofDef_ContainingType(const upb_OneofDef * o)6377 const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) {
6378 return o->parent;
6379 }
6380
upb_OneofDef_FieldCount(const upb_OneofDef * o)6381 int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; }
6382
upb_OneofDef_Field(const upb_OneofDef * o,int i)6383 const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) {
6384 UPB_ASSERT(i < o->field_count);
6385 return o->fields[i];
6386 }
6387
upb_OneofDef_numfields(const upb_OneofDef * o)6388 int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; }
6389
upb_OneofDef_Index(const upb_OneofDef * o)6390 uint32_t upb_OneofDef_Index(const upb_OneofDef* o) {
6391 // Compute index in our parent's array.
6392 return o - o->parent->oneofs;
6393 }
6394
upb_OneofDef_IsSynthetic(const upb_OneofDef * o)6395 bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; }
6396
upb_OneofDef_LookupNameWithSize(const upb_OneofDef * o,const char * name,size_t length)6397 const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
6398 const char* name,
6399 size_t length) {
6400 upb_value val;
6401 return upb_strtable_lookup2(&o->ntof, name, length, &val)
6402 ? upb_value_getptr(val)
6403 : NULL;
6404 }
6405
upb_OneofDef_LookupNumber(const upb_OneofDef * o,uint32_t num)6406 const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
6407 uint32_t num) {
6408 upb_value val;
6409 return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
6410 : NULL;
6411 }
6412
6413 /* upb_FileDef ****************************************************************/
6414
upb_FileDef_Options(const upb_FileDef * f)6415 const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) {
6416 return f->opts;
6417 }
6418
upb_FileDef_HasOptions(const upb_FileDef * f)6419 bool upb_FileDef_HasOptions(const upb_FileDef* f) {
6420 return f->opts != (void*)opt_default;
6421 }
6422
upb_FileDef_Name(const upb_FileDef * f)6423 const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; }
6424
upb_FileDef_Package(const upb_FileDef * f)6425 const char* upb_FileDef_Package(const upb_FileDef* f) {
6426 return f->package ? f->package : "";
6427 }
6428
upb_FileDef_Syntax(const upb_FileDef * f)6429 upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
6430
upb_FileDef_TopLevelMessageCount(const upb_FileDef * f)6431 int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) {
6432 return f->top_lvl_msg_count;
6433 }
6434
upb_FileDef_DependencyCount(const upb_FileDef * f)6435 int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; }
6436
upb_FileDef_PublicDependencyCount(const upb_FileDef * f)6437 int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) {
6438 return f->public_dep_count;
6439 }
6440
upb_FileDef_WeakDependencyCount(const upb_FileDef * f)6441 int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) {
6442 return f->weak_dep_count;
6443 }
6444
_upb_FileDef_PublicDependencyIndexes(const upb_FileDef * f)6445 const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) {
6446 return f->public_deps;
6447 }
6448
_upb_FileDef_WeakDependencyIndexes(const upb_FileDef * f)6449 const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) {
6450 return f->weak_deps;
6451 }
6452
upb_FileDef_TopLevelEnumCount(const upb_FileDef * f)6453 int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) {
6454 return f->top_lvl_enum_count;
6455 }
6456
upb_FileDef_TopLevelExtensionCount(const upb_FileDef * f)6457 int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) {
6458 return f->top_lvl_ext_count;
6459 }
6460
upb_FileDef_ServiceCount(const upb_FileDef * f)6461 int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; }
6462
upb_FileDef_Dependency(const upb_FileDef * f,int i)6463 const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) {
6464 UPB_ASSERT(0 <= i && i < f->dep_count);
6465 return f->deps[i];
6466 }
6467
upb_FileDef_PublicDependency(const upb_FileDef * f,int i)6468 const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) {
6469 UPB_ASSERT(0 <= i && i < f->public_dep_count);
6470 return f->deps[f->public_deps[i]];
6471 }
6472
upb_FileDef_WeakDependency(const upb_FileDef * f,int i)6473 const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) {
6474 UPB_ASSERT(0 <= i && i < f->public_dep_count);
6475 return f->deps[f->weak_deps[i]];
6476 }
6477
upb_FileDef_TopLevelMessage(const upb_FileDef * f,int i)6478 const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) {
6479 UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count);
6480 return &f->top_lvl_msgs[i];
6481 }
6482
upb_FileDef_TopLevelEnum(const upb_FileDef * f,int i)6483 const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) {
6484 UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count);
6485 return &f->top_lvl_enums[i];
6486 }
6487
upb_FileDef_TopLevelExtension(const upb_FileDef * f,int i)6488 const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) {
6489 UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count);
6490 return &f->top_lvl_exts[i];
6491 }
6492
upb_FileDef_Service(const upb_FileDef * f,int i)6493 const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) {
6494 UPB_ASSERT(0 <= i && i < f->service_count);
6495 return &f->services[i];
6496 }
6497
upb_FileDef_Pool(const upb_FileDef * f)6498 const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }
6499
6500 /* upb_MethodDef **************************************************************/
6501
upb_MethodDef_Options(const upb_MethodDef * m)6502 const google_protobuf_MethodOptions* upb_MethodDef_Options(
6503 const upb_MethodDef* m) {
6504 return m->opts;
6505 }
6506
upb_MethodDef_HasOptions(const upb_MethodDef * m)6507 bool upb_MethodDef_HasOptions(const upb_MethodDef* m) {
6508 return m->opts != (void*)opt_default;
6509 }
6510
upb_MethodDef_FullName(const upb_MethodDef * m)6511 const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
6512 return m->full_name;
6513 }
6514
upb_MethodDef_Index(const upb_MethodDef * m)6515 int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; }
6516
upb_MethodDef_Name(const upb_MethodDef * m)6517 const char* upb_MethodDef_Name(const upb_MethodDef* m) {
6518 return shortdefname(m->full_name);
6519 }
6520
upb_MethodDef_Service(const upb_MethodDef * m)6521 const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) {
6522 return m->service;
6523 }
6524
upb_MethodDef_InputType(const upb_MethodDef * m)6525 const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) {
6526 return m->input_type;
6527 }
6528
upb_MethodDef_OutputType(const upb_MethodDef * m)6529 const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) {
6530 return m->output_type;
6531 }
6532
upb_MethodDef_ClientStreaming(const upb_MethodDef * m)6533 bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) {
6534 return m->client_streaming;
6535 }
6536
upb_MethodDef_ServerStreaming(const upb_MethodDef * m)6537 bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {
6538 return m->server_streaming;
6539 }
6540
6541 /* upb_ServiceDef *************************************************************/
6542
upb_ServiceDef_Options(const upb_ServiceDef * s)6543 const google_protobuf_ServiceOptions* upb_ServiceDef_Options(
6544 const upb_ServiceDef* s) {
6545 return s->opts;
6546 }
6547
upb_ServiceDef_HasOptions(const upb_ServiceDef * s)6548 bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) {
6549 return s->opts != (void*)opt_default;
6550 }
6551
upb_ServiceDef_FullName(const upb_ServiceDef * s)6552 const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) {
6553 return s->full_name;
6554 }
6555
upb_ServiceDef_Name(const upb_ServiceDef * s)6556 const char* upb_ServiceDef_Name(const upb_ServiceDef* s) {
6557 return shortdefname(s->full_name);
6558 }
6559
upb_ServiceDef_Index(const upb_ServiceDef * s)6560 int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; }
6561
upb_ServiceDef_File(const upb_ServiceDef * s)6562 const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) {
6563 return s->file;
6564 }
6565
upb_ServiceDef_MethodCount(const upb_ServiceDef * s)6566 int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) {
6567 return s->method_count;
6568 }
6569
upb_ServiceDef_Method(const upb_ServiceDef * s,int i)6570 const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) {
6571 return i < 0 || i >= s->method_count ? NULL : &s->methods[i];
6572 }
6573
upb_ServiceDef_FindMethodByName(const upb_ServiceDef * s,const char * name)6574 const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
6575 const char* name) {
6576 for (int i = 0; i < s->method_count; i++) {
6577 if (strcmp(name, upb_MethodDef_Name(&s->methods[i])) == 0) {
6578 return &s->methods[i];
6579 }
6580 }
6581 return NULL;
6582 }
6583
6584 /* upb_DefPool ****************************************************************/
6585
upb_DefPool_Free(upb_DefPool * s)6586 void upb_DefPool_Free(upb_DefPool* s) {
6587 upb_Arena_Free(s->arena);
6588 upb_gfree(s);
6589 }
6590
upb_DefPool_New(void)6591 upb_DefPool* upb_DefPool_New(void) {
6592 upb_DefPool* s = upb_gmalloc(sizeof(*s));
6593
6594 if (!s) {
6595 return NULL;
6596 }
6597
6598 s->arena = upb_Arena_New();
6599 s->bytes_loaded = 0;
6600
6601 if (!upb_strtable_init(&s->syms, 32, s->arena) ||
6602 !upb_strtable_init(&s->files, 4, s->arena) ||
6603 !upb_inttable_init(&s->exts, s->arena)) {
6604 goto err;
6605 }
6606
6607 s->extreg = upb_ExtensionRegistry_New(s->arena);
6608 if (!s->extreg) goto err;
6609 return s;
6610
6611 err:
6612 upb_Arena_Free(s->arena);
6613 upb_gfree(s);
6614 return NULL;
6615 }
6616
symtab_lookup(const upb_DefPool * s,const char * sym,upb_deftype_t type)6617 static const void* symtab_lookup(const upb_DefPool* s, const char* sym,
6618 upb_deftype_t type) {
6619 upb_value v;
6620 return upb_strtable_lookup(&s->syms, sym, &v) ? unpack_def(v, type) : NULL;
6621 }
6622
symtab_lookup2(const upb_DefPool * s,const char * sym,size_t size,upb_deftype_t type)6623 static const void* symtab_lookup2(const upb_DefPool* s, const char* sym,
6624 size_t size, upb_deftype_t type) {
6625 upb_value v;
6626 return upb_strtable_lookup2(&s->syms, sym, size, &v) ? unpack_def(v, type)
6627 : NULL;
6628 }
6629
upb_DefPool_FindMessageByName(const upb_DefPool * s,const char * sym)6630 const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s,
6631 const char* sym) {
6632 return symtab_lookup(s, sym, UPB_DEFTYPE_MSG);
6633 }
6634
upb_DefPool_FindMessageByNameWithSize(const upb_DefPool * s,const char * sym,size_t len)6635 const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
6636 const upb_DefPool* s, const char* sym, size_t len) {
6637 return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG);
6638 }
6639
upb_DefPool_FindEnumByName(const upb_DefPool * s,const char * sym)6640 const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
6641 const char* sym) {
6642 return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM);
6643 }
6644
upb_DefPool_FindEnumByNameval(const upb_DefPool * s,const char * sym)6645 const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
6646 const char* sym) {
6647 return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL);
6648 }
6649
upb_DefPool_FindFileByName(const upb_DefPool * s,const char * name)6650 const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
6651 const char* name) {
6652 upb_value v;
6653 return upb_strtable_lookup(&s->files, name, &v)
6654 ? unpack_def(v, UPB_DEFTYPE_FILE)
6655 : NULL;
6656 }
6657
upb_DefPool_FindFileByNameWithSize(const upb_DefPool * s,const char * name,size_t len)6658 const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
6659 const char* name,
6660 size_t len) {
6661 upb_value v;
6662 return upb_strtable_lookup2(&s->files, name, len, &v)
6663 ? unpack_def(v, UPB_DEFTYPE_FILE)
6664 : NULL;
6665 }
6666
upb_DefPool_FindExtensionByNameWithSize(const upb_DefPool * s,const char * name,size_t size)6667 const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
6668 const upb_DefPool* s, const char* name, size_t size) {
6669 upb_value v;
6670 if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL;
6671
6672 switch (deftype(v)) {
6673 case UPB_DEFTYPE_FIELD:
6674 return unpack_def(v, UPB_DEFTYPE_FIELD);
6675 case UPB_DEFTYPE_MSG: {
6676 const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6677 return m->in_message_set ? &m->nested_exts[0] : NULL;
6678 }
6679 default:
6680 break;
6681 }
6682
6683 return NULL;
6684 }
6685
upb_DefPool_FindExtensionByName(const upb_DefPool * s,const char * sym)6686 const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
6687 const char* sym) {
6688 return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym));
6689 }
6690
upb_DefPool_FindServiceByName(const upb_DefPool * s,const char * name)6691 const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
6692 const char* name) {
6693 return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE);
6694 }
6695
upb_DefPool_FindServiceByNameWithSize(const upb_DefPool * s,const char * name,size_t size)6696 const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize(
6697 const upb_DefPool* s, const char* name, size_t size) {
6698 return symtab_lookup2(s, name, size, UPB_DEFTYPE_SERVICE);
6699 }
6700
upb_DefPool_FindFileContainingSymbol(const upb_DefPool * s,const char * name)6701 const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s,
6702 const char* name) {
6703 upb_value v;
6704 // TODO(haberman): non-extension fields and oneofs.
6705 if (upb_strtable_lookup(&s->syms, name, &v)) {
6706 switch (deftype(v)) {
6707 case UPB_DEFTYPE_EXT: {
6708 const upb_FieldDef* f = unpack_def(v, UPB_DEFTYPE_EXT);
6709 return upb_FieldDef_File(f);
6710 }
6711 case UPB_DEFTYPE_MSG: {
6712 const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6713 return upb_MessageDef_File(m);
6714 }
6715 case UPB_DEFTYPE_ENUM: {
6716 const upb_EnumDef* e = unpack_def(v, UPB_DEFTYPE_ENUM);
6717 return upb_EnumDef_File(e);
6718 }
6719 case UPB_DEFTYPE_ENUMVAL: {
6720 const upb_EnumValueDef* ev = unpack_def(v, UPB_DEFTYPE_ENUMVAL);
6721 return upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
6722 }
6723 case UPB_DEFTYPE_SERVICE: {
6724 const upb_ServiceDef* service = unpack_def(v, UPB_DEFTYPE_SERVICE);
6725 return upb_ServiceDef_File(service);
6726 }
6727 default:
6728 UPB_UNREACHABLE();
6729 }
6730 }
6731
6732 const char* last_dot = strrchr(name, '.');
6733 if (last_dot) {
6734 const upb_MessageDef* parent =
6735 upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name);
6736 if (parent) {
6737 const char* shortname = last_dot + 1;
6738 if (upb_MessageDef_FindByNameWithSize(parent, shortname,
6739 strlen(shortname), NULL, NULL)) {
6740 return upb_MessageDef_File(parent);
6741 }
6742 }
6743 }
6744
6745 return NULL;
6746 }
6747
6748 /* Code to build defs from descriptor protos. *********************************/
6749
6750 /* There is a question of how much validation to do here. It will be difficult
6751 * to perfectly match the amount of validation performed by proto2. But since
6752 * this code is used to directly build defs from Ruby (for example) we do need
6753 * to validate important constraints like uniqueness of names and numbers. */
6754
6755 #define CHK_OOM(x) \
6756 if (!(x)) { \
6757 symtab_oomerr(ctx); \
6758 }
6759
symtab_oomerr(symtab_addctx * ctx)6760 UPB_NORETURN UPB_NOINLINE static void symtab_oomerr(symtab_addctx* ctx) {
6761 upb_Status_setoom(ctx->status);
6762 UPB_LONGJMP(ctx->err, 1);
6763 }
6764
symtab_alloc(symtab_addctx * ctx,size_t bytes)6765 void* symtab_alloc(symtab_addctx* ctx, size_t bytes) {
6766 if (bytes == 0) return NULL;
6767 void* ret = upb_Arena_Malloc(ctx->arena, bytes);
6768 if (!ret) symtab_oomerr(ctx);
6769 return ret;
6770 }
6771
6772 // We want to copy the options verbatim into the destination options proto.
6773 // We use serialize+parse as our deep copy.
6774 #define SET_OPTIONS(target, desc_type, options_type, proto) \
6775 if (google_protobuf_##desc_type##_has_options(proto)) { \
6776 size_t size; \
6777 char* pb = google_protobuf_##options_type##_serialize( \
6778 google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \
6779 CHK_OOM(pb); \
6780 target = google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
6781 CHK_OOM(target); \
6782 } else { \
6783 target = (const google_protobuf_##options_type*)opt_default; \
6784 }
6785
check_ident(symtab_addctx * ctx,upb_StringView name,bool full)6786 static void check_ident(symtab_addctx* ctx, upb_StringView name, bool full) {
6787 const char* str = name.data;
6788 size_t len = name.size;
6789 bool start = true;
6790 size_t i;
6791 for (i = 0; i < len; i++) {
6792 char c = str[i];
6793 if (c == '.') {
6794 if (start || !full) {
6795 symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
6796 }
6797 start = true;
6798 } else if (start) {
6799 if (!upb_isletter(c)) {
6800 symtab_errf(
6801 ctx,
6802 "invalid name: path components must start with a letter (%.*s)",
6803 (int)len, str);
6804 }
6805 start = false;
6806 } else {
6807 if (!upb_isalphanum(c)) {
6808 symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
6809 (int)len, str);
6810 }
6811 }
6812 }
6813 if (start) {
6814 symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
6815 }
6816 }
6817
div_round_up(size_t n,size_t d)6818 static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; }
6819
upb_MessageValue_sizeof(upb_CType type)6820 static size_t upb_MessageValue_sizeof(upb_CType type) {
6821 switch (type) {
6822 case kUpb_CType_Double:
6823 case kUpb_CType_Int64:
6824 case kUpb_CType_UInt64:
6825 return 8;
6826 case kUpb_CType_Enum:
6827 case kUpb_CType_Int32:
6828 case kUpb_CType_UInt32:
6829 case kUpb_CType_Float:
6830 return 4;
6831 case kUpb_CType_Bool:
6832 return 1;
6833 case kUpb_CType_Message:
6834 return sizeof(void*);
6835 case kUpb_CType_Bytes:
6836 case kUpb_CType_String:
6837 return sizeof(upb_StringView);
6838 }
6839 UPB_UNREACHABLE();
6840 }
6841
upb_msg_fielddefsize(const upb_FieldDef * f)6842 static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) {
6843 if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) {
6844 upb_MapEntry ent;
6845 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
6846 return sizeof(ent.k);
6847 } else if (upb_FieldDef_IsRepeated(f)) {
6848 return sizeof(void*);
6849 } else {
6850 return upb_MessageValue_sizeof(upb_FieldDef_CType(f));
6851 }
6852 }
6853
upb_MiniTable_place(symtab_addctx * ctx,upb_MiniTable * l,size_t size,const upb_MessageDef * m)6854 static uint32_t upb_MiniTable_place(symtab_addctx* ctx, upb_MiniTable* l,
6855 size_t size, const upb_MessageDef* m) {
6856 size_t ofs = UPB_ALIGN_UP(l->size, size);
6857 size_t next = ofs + size;
6858
6859 if (next > UINT16_MAX) {
6860 symtab_errf(ctx, "size of message %s exceeded max size of %zu bytes",
6861 upb_MessageDef_FullName(m), (size_t)UINT16_MAX);
6862 }
6863
6864 l->size = next;
6865 return ofs;
6866 }
6867
field_number_cmp(const void * p1,const void * p2)6868 static int field_number_cmp(const void* p1, const void* p2) {
6869 const upb_MiniTable_Field* f1 = p1;
6870 const upb_MiniTable_Field* f2 = p2;
6871 return f1->number - f2->number;
6872 }
6873
assign_layout_indices(const upb_MessageDef * m,upb_MiniTable * l,upb_MiniTable_Field * fields)6874 static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l,
6875 upb_MiniTable_Field* fields) {
6876 int i;
6877 int n = upb_MessageDef_numfields(m);
6878 int dense_below = 0;
6879 for (i = 0; i < n; i++) {
6880 upb_FieldDef* f =
6881 (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number);
6882 UPB_ASSERT(f);
6883 f->layout_index = i;
6884 if (i < UINT8_MAX && fields[i].number == i + 1 &&
6885 (i == 0 || fields[i - 1].number == i)) {
6886 dense_below = i + 1;
6887 }
6888 }
6889 l->dense_below = dense_below;
6890 }
6891
map_descriptortype(const upb_FieldDef * f)6892 static uint8_t map_descriptortype(const upb_FieldDef* f) {
6893 uint8_t type = upb_FieldDef_Type(f);
6894 /* See TableDescriptorType() in upbc/generator.cc for details and
6895 * rationale of these exceptions. */
6896 if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) {
6897 return kUpb_FieldType_Bytes;
6898 } else if (type == kUpb_FieldType_Enum &&
6899 (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 ||
6900 UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 ||
6901 // TODO(https://github.com/protocolbuffers/upb/issues/541):
6902 // fix map enum values to check for unknown enum values and put
6903 // them in the unknown field set.
6904 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) {
6905 return kUpb_FieldType_Int32;
6906 }
6907 return type;
6908 }
6909
fill_fieldlayout(upb_MiniTable_Field * field,const upb_FieldDef * f)6910 static void fill_fieldlayout(upb_MiniTable_Field* field,
6911 const upb_FieldDef* f) {
6912 field->number = upb_FieldDef_Number(f);
6913 field->descriptortype = map_descriptortype(f);
6914
6915 if (upb_FieldDef_IsMap(f)) {
6916 field->mode =
6917 kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
6918 } else if (upb_FieldDef_IsRepeated(f)) {
6919 field->mode =
6920 kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
6921 } else {
6922 /* Maps descriptor type -> elem_size_lg2. */
6923 static const uint8_t sizes[] = {
6924 -1, /* invalid descriptor type */
6925 kUpb_FieldRep_8Byte, /* DOUBLE */
6926 kUpb_FieldRep_4Byte, /* FLOAT */
6927 kUpb_FieldRep_8Byte, /* INT64 */
6928 kUpb_FieldRep_8Byte, /* UINT64 */
6929 kUpb_FieldRep_4Byte, /* INT32 */
6930 kUpb_FieldRep_8Byte, /* FIXED64 */
6931 kUpb_FieldRep_4Byte, /* FIXED32 */
6932 kUpb_FieldRep_1Byte, /* BOOL */
6933 kUpb_FieldRep_StringView, /* STRING */
6934 kUpb_FieldRep_Pointer, /* GROUP */
6935 kUpb_FieldRep_Pointer, /* MESSAGE */
6936 kUpb_FieldRep_StringView, /* BYTES */
6937 kUpb_FieldRep_4Byte, /* UINT32 */
6938 kUpb_FieldRep_4Byte, /* ENUM */
6939 kUpb_FieldRep_4Byte, /* SFIXED32 */
6940 kUpb_FieldRep_8Byte, /* SFIXED64 */
6941 kUpb_FieldRep_4Byte, /* SINT32 */
6942 kUpb_FieldRep_8Byte, /* SINT64 */
6943 };
6944 field->mode = kUpb_FieldMode_Scalar |
6945 (sizes[field->descriptortype] << kUpb_FieldRep_Shift);
6946 }
6947
6948 if (upb_FieldDef_IsPacked(f)) {
6949 field->mode |= kUpb_LabelFlags_IsPacked;
6950 }
6951
6952 if (upb_FieldDef_IsExtension(f)) {
6953 field->mode |= kUpb_LabelFlags_IsExtension;
6954 }
6955 }
6956
6957 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
6958 * It computes a dynamic layout for all of the fields in |m|. */
make_layout(symtab_addctx * ctx,const upb_MessageDef * m)6959 static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
6960 upb_MiniTable* l = (upb_MiniTable*)m->layout;
6961 size_t field_count = upb_MessageDef_numfields(m);
6962 size_t sublayout_count = 0;
6963 upb_MiniTable_Sub* subs;
6964 upb_MiniTable_Field* fields;
6965
6966 memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry));
6967
6968 /* Count sub-messages. */
6969 for (size_t i = 0; i < field_count; i++) {
6970 const upb_FieldDef* f = &m->fields[i];
6971 if (upb_FieldDef_IsSubMessage(f)) {
6972 sublayout_count++;
6973 }
6974 if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
6975 f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
6976 sublayout_count++;
6977 }
6978 }
6979
6980 fields = symtab_alloc(ctx, field_count * sizeof(*fields));
6981 subs = symtab_alloc(ctx, sublayout_count * sizeof(*subs));
6982
6983 l->field_count = upb_MessageDef_numfields(m);
6984 l->fields = fields;
6985 l->subs = subs;
6986 l->table_mask = 0;
6987 l->required_count = 0;
6988
6989 if (upb_MessageDef_ExtensionRangeCount(m) > 0) {
6990 if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) {
6991 l->ext = kUpb_ExtMode_IsMessageSet;
6992 } else {
6993 l->ext = kUpb_ExtMode_Extendable;
6994 }
6995 } else {
6996 l->ext = kUpb_ExtMode_NonExtendable;
6997 }
6998
6999 /* TODO(haberman): initialize fast tables so that reflection-based parsing
7000 * can get the same speeds as linked-in types. */
7001 l->fasttable[0].field_parser = &fastdecode_generic;
7002 l->fasttable[0].field_data = 0;
7003
7004 if (upb_MessageDef_IsMapEntry(m)) {
7005 /* TODO(haberman): refactor this method so this special case is more
7006 * elegant. */
7007 const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1);
7008 const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2);
7009 if (key == NULL || val == NULL) {
7010 symtab_errf(ctx, "Malformed map entry from message: %s",
7011 upb_MessageDef_FullName(m));
7012 }
7013 fields[0].number = 1;
7014 fields[1].number = 2;
7015 fields[0].mode = kUpb_FieldMode_Scalar;
7016 fields[1].mode = kUpb_FieldMode_Scalar;
7017 fields[0].presence = 0;
7018 fields[1].presence = 0;
7019 fields[0].descriptortype = map_descriptortype(key);
7020 fields[1].descriptortype = map_descriptortype(val);
7021 fields[0].offset = 0;
7022 fields[1].offset = sizeof(upb_StringView);
7023 fields[1].submsg_index = 0;
7024
7025 if (upb_FieldDef_CType(val) == kUpb_CType_Message) {
7026 subs[0].submsg = upb_FieldDef_MessageSubDef(val)->layout;
7027 }
7028
7029 upb_FieldDef* fielddefs = (upb_FieldDef*)&m->fields[0];
7030 UPB_ASSERT(fielddefs[0].number_ == 1);
7031 UPB_ASSERT(fielddefs[1].number_ == 2);
7032 fielddefs[0].layout_index = 0;
7033 fielddefs[1].layout_index = 1;
7034
7035 l->field_count = 2;
7036 l->size = 2 * sizeof(upb_StringView);
7037 l->size = UPB_ALIGN_UP(l->size, 8);
7038 l->dense_below = 2;
7039 return;
7040 }
7041
7042 /* Allocate data offsets in three stages:
7043 *
7044 * 1. hasbits.
7045 * 2. regular fields.
7046 * 3. oneof fields.
7047 *
7048 * OPT: There is a lot of room for optimization here to minimize the size.
7049 */
7050
7051 /* Assign hasbits for required fields first. */
7052 size_t hasbit = 0;
7053
7054 for (int i = 0; i < m->field_count; i++) {
7055 const upb_FieldDef* f = &m->fields[i];
7056 upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
7057 if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
7058 field->presence = ++hasbit;
7059 if (hasbit >= 63) {
7060 symtab_errf(ctx, "Message with >=63 required fields: %s",
7061 upb_MessageDef_FullName(m));
7062 }
7063 l->required_count++;
7064 }
7065 }
7066
7067 /* Allocate hasbits and set basic field attributes. */
7068 sublayout_count = 0;
7069 for (int i = 0; i < m->field_count; i++) {
7070 const upb_FieldDef* f = &m->fields[i];
7071 upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
7072
7073 fill_fieldlayout(field, f);
7074
7075 if (field->descriptortype == kUpb_FieldType_Message ||
7076 field->descriptortype == kUpb_FieldType_Group) {
7077 field->submsg_index = sublayout_count++;
7078 subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
7079 } else if (field->descriptortype == kUpb_FieldType_Enum) {
7080 field->submsg_index = sublayout_count++;
7081 subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
7082 UPB_ASSERT(subs[field->submsg_index].subenum);
7083 }
7084
7085 if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
7086 /* Hasbit was already assigned. */
7087 } else if (upb_FieldDef_HasPresence(f) &&
7088 !upb_FieldDef_RealContainingOneof(f)) {
7089 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
7090 * table. This wastes one hasbit, but we don't worry about it for now. */
7091 field->presence = ++hasbit;
7092 } else {
7093 field->presence = 0;
7094 }
7095 }
7096
7097 /* Account for space used by hasbits. */
7098 l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0;
7099
7100 /* Allocate non-oneof fields. */
7101 for (int i = 0; i < m->field_count; i++) {
7102 const upb_FieldDef* f = &m->fields[i];
7103 size_t field_size = upb_msg_fielddefsize(f);
7104 size_t index = upb_FieldDef_Index(f);
7105
7106 if (upb_FieldDef_RealContainingOneof(f)) {
7107 /* Oneofs are handled separately below. */
7108 continue;
7109 }
7110
7111 fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m);
7112 }
7113
7114 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
7115 * and space for the actual data. */
7116 for (int i = 0; i < m->oneof_count; i++) {
7117 const upb_OneofDef* o = &m->oneofs[i];
7118 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
7119 size_t field_size = 0;
7120 uint32_t case_offset;
7121 uint32_t data_offset;
7122
7123 if (upb_OneofDef_IsSynthetic(o)) continue;
7124
7125 if (o->field_count == 0) {
7126 symtab_errf(ctx, "Oneof must have at least one field (%s)", o->full_name);
7127 }
7128
7129 /* Calculate field size: the max of all field sizes. */
7130 for (int j = 0; j < o->field_count; j++) {
7131 const upb_FieldDef* f = o->fields[j];
7132 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
7133 }
7134
7135 /* Align and allocate case offset. */
7136 case_offset = upb_MiniTable_place(ctx, l, case_size, m);
7137 data_offset = upb_MiniTable_place(ctx, l, field_size, m);
7138
7139 for (int i = 0; i < o->field_count; i++) {
7140 const upb_FieldDef* f = o->fields[i];
7141 fields[upb_FieldDef_Index(f)].offset = data_offset;
7142 fields[upb_FieldDef_Index(f)].presence = ~case_offset;
7143 }
7144 }
7145
7146 /* Size of the entire structure should be a multiple of its greatest
7147 * alignment. TODO: track overall alignment for real? */
7148 l->size = UPB_ALIGN_UP(l->size, 8);
7149
7150 /* Sort fields by number. */
7151 if (fields) {
7152 qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields),
7153 field_number_cmp);
7154 }
7155 assign_layout_indices(m, l, fields);
7156 }
7157
strviewdup(symtab_addctx * ctx,upb_StringView view)7158 static char* strviewdup(symtab_addctx* ctx, upb_StringView view) {
7159 char* ret = upb_strdup2(view.data, view.size, ctx->arena);
7160 CHK_OOM(ret);
7161 return ret;
7162 }
7163
streql2(const char * a,size_t n,const char * b)7164 static bool streql2(const char* a, size_t n, const char* b) {
7165 return n == strlen(b) && memcmp(a, b, n) == 0;
7166 }
7167
streql_view(upb_StringView view,const char * b)7168 static bool streql_view(upb_StringView view, const char* b) {
7169 return streql2(view.data, view.size, b);
7170 }
7171
makefullname(symtab_addctx * ctx,const char * prefix,upb_StringView name)7172 static const char* makefullname(symtab_addctx* ctx, const char* prefix,
7173 upb_StringView name) {
7174 if (prefix) {
7175 /* ret = prefix + '.' + name; */
7176 size_t n = strlen(prefix);
7177 char* ret = symtab_alloc(ctx, n + name.size + 2);
7178 strcpy(ret, prefix);
7179 ret[n] = '.';
7180 memcpy(&ret[n + 1], name.data, name.size);
7181 ret[n + 1 + name.size] = '\0';
7182 return ret;
7183 } else {
7184 return strviewdup(ctx, name);
7185 }
7186 }
7187
finalize_oneofs(symtab_addctx * ctx,upb_MessageDef * m)7188 static void finalize_oneofs(symtab_addctx* ctx, upb_MessageDef* m) {
7189 int i;
7190 int synthetic_count = 0;
7191 upb_OneofDef* mutable_oneofs = (upb_OneofDef*)m->oneofs;
7192
7193 for (i = 0; i < m->oneof_count; i++) {
7194 upb_OneofDef* o = &mutable_oneofs[i];
7195
7196 if (o->synthetic && o->field_count != 1) {
7197 symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
7198 o->field_count, upb_OneofDef_Name(o));
7199 }
7200
7201 if (o->synthetic) {
7202 synthetic_count++;
7203 } else if (synthetic_count != 0) {
7204 symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
7205 upb_OneofDef_Name(o));
7206 }
7207
7208 o->fields = symtab_alloc(ctx, sizeof(upb_FieldDef*) * o->field_count);
7209 o->field_count = 0;
7210 }
7211
7212 for (i = 0; i < m->field_count; i++) {
7213 const upb_FieldDef* f = &m->fields[i];
7214 upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f);
7215 if (o) {
7216 o->fields[o->field_count++] = f;
7217 }
7218 }
7219
7220 m->real_oneof_count = m->oneof_count - synthetic_count;
7221 }
7222
getjsonname(const char * name,char * buf,size_t len)7223 size_t getjsonname(const char* name, char* buf, size_t len) {
7224 size_t src, dst = 0;
7225 bool ucase_next = false;
7226
7227 #define WRITE(byte) \
7228 ++dst; \
7229 if (dst < len) \
7230 buf[dst - 1] = byte; \
7231 else if (dst == len) \
7232 buf[dst - 1] = '\0'
7233
7234 if (!name) {
7235 WRITE('\0');
7236 return 0;
7237 }
7238
7239 /* Implement the transformation as described in the spec:
7240 * 1. upper case all letters after an underscore.
7241 * 2. remove all underscores.
7242 */
7243 for (src = 0; name[src]; src++) {
7244 if (name[src] == '_') {
7245 ucase_next = true;
7246 continue;
7247 }
7248
7249 if (ucase_next) {
7250 WRITE(toupper(name[src]));
7251 ucase_next = false;
7252 } else {
7253 WRITE(name[src]);
7254 }
7255 }
7256
7257 WRITE('\0');
7258 return dst;
7259
7260 #undef WRITE
7261 }
7262
makejsonname(symtab_addctx * ctx,const char * name)7263 static char* makejsonname(symtab_addctx* ctx, const char* name) {
7264 size_t size = getjsonname(name, NULL, 0);
7265 char* json_name = symtab_alloc(ctx, size);
7266 getjsonname(name, json_name, size);
7267 return json_name;
7268 }
7269
7270 /* Adds a symbol |v| to the symtab, which must be a def pointer previously
7271 * packed with pack_def(). The def's pointer to upb_FileDef* must be set before
7272 * adding, so we know which entries to remove if building this file fails. */
symtab_add(symtab_addctx * ctx,const char * name,upb_value v)7273 static void symtab_add(symtab_addctx* ctx, const char* name, upb_value v) {
7274 // TODO: table should support an operation "tryinsert" to avoid the double
7275 // lookup.
7276 if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
7277 symtab_errf(ctx, "duplicate symbol '%s'", name);
7278 }
7279 size_t len = strlen(name);
7280 CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v,
7281 ctx->symtab->arena));
7282 }
7283
remove_component(char * base,size_t * len)7284 static bool remove_component(char* base, size_t* len) {
7285 if (*len == 0) return false;
7286
7287 for (size_t i = *len - 1; i > 0; i--) {
7288 if (base[i] == '.') {
7289 *len = i;
7290 return true;
7291 }
7292 }
7293
7294 *len = 0;
7295 return true;
7296 }
7297
7298 /* Given a symbol and the base symbol inside which it is defined, find the
7299 * symbol's definition in t. */
symtab_resolveany(symtab_addctx * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)7300 static const void* symtab_resolveany(symtab_addctx* ctx,
7301 const char* from_name_dbg,
7302 const char* base, upb_StringView sym,
7303 upb_deftype_t* type) {
7304 const upb_strtable* t = &ctx->symtab->syms;
7305 if (sym.size == 0) goto notfound;
7306 upb_value v;
7307 if (sym.data[0] == '.') {
7308 /* Symbols starting with '.' are absolute, so we do a single lookup.
7309 * Slice to omit the leading '.' */
7310 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
7311 goto notfound;
7312 }
7313 } else {
7314 /* Remove components from base until we find an entry or run out. */
7315 size_t baselen = base ? strlen(base) : 0;
7316 char* tmp = malloc(sym.size + baselen + 1);
7317 while (1) {
7318 char* p = tmp;
7319 if (baselen) {
7320 memcpy(p, base, baselen);
7321 p[baselen] = '.';
7322 p += baselen + 1;
7323 }
7324 memcpy(p, sym.data, sym.size);
7325 p += sym.size;
7326 if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) {
7327 break;
7328 }
7329 if (!remove_component(tmp, &baselen)) {
7330 free(tmp);
7331 goto notfound;
7332 }
7333 }
7334 free(tmp);
7335 }
7336
7337 *type = deftype(v);
7338 return unpack_def(v, *type);
7339
7340 notfound:
7341 symtab_errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
7342 UPB_STRINGVIEW_ARGS(sym));
7343 }
7344
symtab_resolve(symtab_addctx * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)7345 static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg,
7346 const char* base, upb_StringView sym,
7347 upb_deftype_t type) {
7348 upb_deftype_t found_type;
7349 const void* ret =
7350 symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type);
7351 if (ret && found_type != type) {
7352 symtab_errf(ctx,
7353 "type mismatch when resolving %s: couldn't find "
7354 "name " UPB_STRINGVIEW_FORMAT " with type=%d",
7355 from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
7356 }
7357 return ret;
7358 }
7359
create_oneofdef(symtab_addctx * ctx,upb_MessageDef * m,const google_protobuf_OneofDescriptorProto * oneof_proto,const upb_OneofDef * _o)7360 static void create_oneofdef(
7361 symtab_addctx* ctx, upb_MessageDef* m,
7362 const google_protobuf_OneofDescriptorProto* oneof_proto,
7363 const upb_OneofDef* _o) {
7364 upb_OneofDef* o = (upb_OneofDef*)_o;
7365 upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
7366 upb_value v;
7367
7368 o->parent = m;
7369 o->full_name = makefullname(ctx, m->full_name, name);
7370 o->field_count = 0;
7371 o->synthetic = false;
7372
7373 SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
7374
7375 upb_value existing_v;
7376 if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) {
7377 symtab_errf(ctx, "duplicate oneof name (%s)", o->full_name);
7378 }
7379
7380 v = pack_def(o, UPB_DEFTYPE_ONEOF);
7381 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
7382
7383 CHK_OOM(upb_inttable_init(&o->itof, ctx->arena));
7384 CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena));
7385 }
7386
newstr(symtab_addctx * ctx,const char * data,size_t len)7387 static str_t* newstr(symtab_addctx* ctx, const char* data, size_t len) {
7388 str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7389 CHK_OOM(ret);
7390 ret->len = len;
7391 if (len) memcpy(ret->str, data, len);
7392 ret->str[len] = '\0';
7393 return ret;
7394 }
7395
upb_DefPool_TryGetChar(const char ** src,const char * end,char * ch)7396 static bool upb_DefPool_TryGetChar(const char** src, const char* end,
7397 char* ch) {
7398 if (*src == end) return false;
7399 *ch = **src;
7400 *src += 1;
7401 return true;
7402 }
7403
upb_DefPool_TryGetHexDigit(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7404 static char upb_DefPool_TryGetHexDigit(symtab_addctx* ctx,
7405 const upb_FieldDef* f, const char** src,
7406 const char* end) {
7407 char ch;
7408 if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7409 if ('0' <= ch && ch <= '9') {
7410 return ch - '0';
7411 }
7412 ch = upb_ascii_lower(ch);
7413 if ('a' <= ch && ch <= 'f') {
7414 return ch - 'a' + 0xa;
7415 }
7416 *src -= 1; // Char wasn't actually a hex digit.
7417 return -1;
7418 }
7419
upb_DefPool_ParseHexEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7420 static char upb_DefPool_ParseHexEscape(symtab_addctx* ctx,
7421 const upb_FieldDef* f, const char** src,
7422 const char* end) {
7423 char hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end);
7424 if (hex_digit < 0) {
7425 symtab_errf(ctx,
7426 "\\x cannot be followed by non-hex digit in field '%s' default",
7427 upb_FieldDef_FullName(f));
7428 return 0;
7429 }
7430 unsigned int ret = hex_digit;
7431 while ((hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end)) >= 0) {
7432 ret = (ret << 4) | hex_digit;
7433 }
7434 if (ret > 0xff) {
7435 symtab_errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
7436 upb_FieldDef_FullName(f));
7437 return 0;
7438 }
7439 return ret;
7440 }
7441
upb_DefPool_TryGetOctalDigit(const char ** src,const char * end)7442 char upb_DefPool_TryGetOctalDigit(const char** src, const char* end) {
7443 char ch;
7444 if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7445 if ('0' <= ch && ch <= '7') {
7446 return ch - '0';
7447 }
7448 *src -= 1; // Char wasn't actually an octal digit.
7449 return -1;
7450 }
7451
upb_DefPool_ParseOctalEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7452 static char upb_DefPool_ParseOctalEscape(symtab_addctx* ctx,
7453 const upb_FieldDef* f,
7454 const char** src, const char* end) {
7455 char ch = 0;
7456 for (int i = 0; i < 3; i++) {
7457 char digit;
7458 if ((digit = upb_DefPool_TryGetOctalDigit(src, end)) >= 0) {
7459 ch = (ch << 3) | digit;
7460 }
7461 }
7462 return ch;
7463 }
7464
upb_DefPool_ParseEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7465 static char upb_DefPool_ParseEscape(symtab_addctx* ctx, const upb_FieldDef* f,
7466 const char** src, const char* end) {
7467 char ch;
7468 if (!upb_DefPool_TryGetChar(src, end, &ch)) {
7469 symtab_errf(ctx, "unterminated escape sequence in field %s",
7470 upb_FieldDef_FullName(f));
7471 return 0;
7472 }
7473 switch (ch) {
7474 case 'a':
7475 return '\a';
7476 case 'b':
7477 return '\b';
7478 case 'f':
7479 return '\f';
7480 case 'n':
7481 return '\n';
7482 case 'r':
7483 return '\r';
7484 case 't':
7485 return '\t';
7486 case 'v':
7487 return '\v';
7488 case '\\':
7489 return '\\';
7490 case '\'':
7491 return '\'';
7492 case '\"':
7493 return '\"';
7494 case '?':
7495 return '\?';
7496 case 'x':
7497 case 'X':
7498 return upb_DefPool_ParseHexEscape(ctx, f, src, end);
7499 case '0':
7500 case '1':
7501 case '2':
7502 case '3':
7503 case '4':
7504 case '5':
7505 case '6':
7506 case '7':
7507 *src -= 1;
7508 return upb_DefPool_ParseOctalEscape(ctx, f, src, end);
7509 }
7510 symtab_errf(ctx, "Unknown escape sequence: \\%c", ch);
7511 }
7512
unescape(symtab_addctx * ctx,const upb_FieldDef * f,const char * data,size_t len)7513 static str_t* unescape(symtab_addctx* ctx, const upb_FieldDef* f,
7514 const char* data, size_t len) {
7515 // Size here is an upper bound; escape sequences could ultimately shrink it.
7516 str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7517 char* dst = &ret->str[0];
7518 const char* src = data;
7519 const char* end = data + len;
7520
7521 while (src < end) {
7522 if (*src == '\\') {
7523 src++;
7524 *dst++ = upb_DefPool_ParseEscape(ctx, f, &src, end);
7525 } else {
7526 *dst++ = *src++;
7527 }
7528 }
7529
7530 ret->len = dst - &ret->str[0];
7531 return ret;
7532 }
7533
parse_default(symtab_addctx * ctx,const char * str,size_t len,upb_FieldDef * f)7534 static void parse_default(symtab_addctx* ctx, const char* str, size_t len,
7535 upb_FieldDef* f) {
7536 char* end;
7537 char nullz[64];
7538 errno = 0;
7539
7540 switch (upb_FieldDef_CType(f)) {
7541 case kUpb_CType_Int32:
7542 case kUpb_CType_Int64:
7543 case kUpb_CType_UInt32:
7544 case kUpb_CType_UInt64:
7545 case kUpb_CType_Double:
7546 case kUpb_CType_Float:
7547 /* Standard C number parsing functions expect null-terminated strings. */
7548 if (len >= sizeof(nullz) - 1) {
7549 symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
7550 }
7551 memcpy(nullz, str, len);
7552 nullz[len] = '\0';
7553 str = nullz;
7554 break;
7555 default:
7556 break;
7557 }
7558
7559 switch (upb_FieldDef_CType(f)) {
7560 case kUpb_CType_Int32: {
7561 long val = strtol(str, &end, 0);
7562 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
7563 goto invalid;
7564 }
7565 f->defaultval.sint = val;
7566 break;
7567 }
7568 case kUpb_CType_Enum: {
7569 const upb_EnumDef* e = f->sub.enumdef;
7570 const upb_EnumValueDef* ev =
7571 upb_EnumDef_FindValueByNameWithSize(e, str, len);
7572 if (!ev) {
7573 goto invalid;
7574 }
7575 f->defaultval.sint = ev->number;
7576 break;
7577 }
7578 case kUpb_CType_Int64: {
7579 long long val = strtoll(str, &end, 0);
7580 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
7581 goto invalid;
7582 }
7583 f->defaultval.sint = val;
7584 break;
7585 }
7586 case kUpb_CType_UInt32: {
7587 unsigned long val = strtoul(str, &end, 0);
7588 if (val > UINT32_MAX || errno == ERANGE || *end) {
7589 goto invalid;
7590 }
7591 f->defaultval.uint = val;
7592 break;
7593 }
7594 case kUpb_CType_UInt64: {
7595 unsigned long long val = strtoull(str, &end, 0);
7596 if (val > UINT64_MAX || errno == ERANGE || *end) {
7597 goto invalid;
7598 }
7599 f->defaultval.uint = val;
7600 break;
7601 }
7602 case kUpb_CType_Double: {
7603 double val = strtod(str, &end);
7604 if (errno == ERANGE || *end) {
7605 goto invalid;
7606 }
7607 f->defaultval.dbl = val;
7608 break;
7609 }
7610 case kUpb_CType_Float: {
7611 float val = strtof(str, &end);
7612 if (errno == ERANGE || *end) {
7613 goto invalid;
7614 }
7615 f->defaultval.flt = val;
7616 break;
7617 }
7618 case kUpb_CType_Bool: {
7619 if (streql2(str, len, "false")) {
7620 f->defaultval.boolean = false;
7621 } else if (streql2(str, len, "true")) {
7622 f->defaultval.boolean = true;
7623 } else {
7624 goto invalid;
7625 }
7626 break;
7627 }
7628 case kUpb_CType_String:
7629 f->defaultval.str = newstr(ctx, str, len);
7630 break;
7631 case kUpb_CType_Bytes:
7632 f->defaultval.str = unescape(ctx, f, str, len);
7633 break;
7634 case kUpb_CType_Message:
7635 /* Should not have a default value. */
7636 symtab_errf(ctx, "Message should not have a default (%s)",
7637 upb_FieldDef_FullName(f));
7638 }
7639
7640 return;
7641
7642 invalid:
7643 symtab_errf(ctx, "Invalid default '%.*s' for field %s of type %d", (int)len,
7644 str, upb_FieldDef_FullName(f), (int)upb_FieldDef_Type(f));
7645 }
7646
set_default_default(symtab_addctx * ctx,upb_FieldDef * f)7647 static void set_default_default(symtab_addctx* ctx, upb_FieldDef* f) {
7648 switch (upb_FieldDef_CType(f)) {
7649 case kUpb_CType_Int32:
7650 case kUpb_CType_Int64:
7651 f->defaultval.sint = 0;
7652 break;
7653 case kUpb_CType_UInt64:
7654 case kUpb_CType_UInt32:
7655 f->defaultval.uint = 0;
7656 break;
7657 case kUpb_CType_Double:
7658 case kUpb_CType_Float:
7659 f->defaultval.dbl = 0;
7660 break;
7661 case kUpb_CType_String:
7662 case kUpb_CType_Bytes:
7663 f->defaultval.str = newstr(ctx, NULL, 0);
7664 break;
7665 case kUpb_CType_Bool:
7666 f->defaultval.boolean = false;
7667 break;
7668 case kUpb_CType_Enum:
7669 f->defaultval.sint = f->sub.enumdef->values[0].number;
7670 case kUpb_CType_Message:
7671 break;
7672 }
7673 }
7674
create_fielddef(symtab_addctx * ctx,const char * prefix,upb_MessageDef * m,const google_protobuf_FieldDescriptorProto * field_proto,const upb_FieldDef * _f,bool is_extension)7675 static void create_fielddef(
7676 symtab_addctx* ctx, const char* prefix, upb_MessageDef* m,
7677 const google_protobuf_FieldDescriptorProto* field_proto,
7678 const upb_FieldDef* _f, bool is_extension) {
7679 upb_FieldDef* f = (upb_FieldDef*)_f;
7680 upb_StringView name;
7681 const char* full_name;
7682 const char* json_name;
7683 const char* shortname;
7684 int32_t field_number;
7685
7686 f->file = ctx->file; /* Must happen prior to symtab_add(). */
7687
7688 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
7689 symtab_errf(ctx, "field has no name");
7690 }
7691
7692 name = google_protobuf_FieldDescriptorProto_name(field_proto);
7693 check_ident(ctx, name, false);
7694 full_name = makefullname(ctx, prefix, name);
7695 shortname = shortdefname(full_name);
7696
7697 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
7698 json_name = strviewdup(
7699 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
7700 f->has_json_name_ = true;
7701 } else {
7702 json_name = makejsonname(ctx, shortname);
7703 f->has_json_name_ = false;
7704 }
7705
7706 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
7707
7708 f->full_name = full_name;
7709 f->json_name = json_name;
7710 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
7711 f->number_ = field_number;
7712 f->scope.oneof = NULL;
7713 f->proto3_optional_ =
7714 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
7715
7716 bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto);
7717 bool has_type_name =
7718 google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
7719
7720 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
7721
7722 if (has_type) {
7723 switch (f->type_) {
7724 case kUpb_FieldType_Message:
7725 case kUpb_FieldType_Group:
7726 case kUpb_FieldType_Enum:
7727 if (!has_type_name) {
7728 symtab_errf(ctx, "field of type %d requires type name (%s)",
7729 (int)f->type_, full_name);
7730 }
7731 break;
7732 default:
7733 if (has_type_name) {
7734 symtab_errf(ctx, "invalid type for field with type_name set (%s, %d)",
7735 full_name, (int)f->type_);
7736 }
7737 }
7738 } else if (has_type_name) {
7739 f->type_ =
7740 FIELD_TYPE_UNSPECIFIED; // We'll fill this in in resolve_fielddef().
7741 }
7742
7743 if (!is_extension) {
7744 /* direct message field. */
7745 upb_value v, field_v, json_v, existing_v;
7746 size_t json_size;
7747
7748 if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) {
7749 symtab_errf(ctx, "invalid field number (%u)", field_number);
7750 }
7751
7752 f->index_ = f - m->fields;
7753 f->msgdef = m;
7754 f->is_extension_ = false;
7755
7756 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
7757 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
7758 v = upb_value_constptr(f);
7759 json_size = strlen(json_name);
7760
7761 if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) {
7762 symtab_errf(ctx, "duplicate field name (%s)", shortname);
7763 }
7764
7765 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v,
7766 ctx->arena));
7767
7768 if (strcmp(shortname, json_name) != 0) {
7769 if (upb_strtable_lookup(&m->ntof, json_name, &v)) {
7770 symtab_errf(ctx, "duplicate json_name (%s)", json_name);
7771 } else {
7772 CHK_OOM(upb_strtable_insert(&m->ntof, json_name, json_size, json_v,
7773 ctx->arena));
7774 }
7775 }
7776
7777 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
7778 symtab_errf(ctx, "duplicate field number (%u)", field_number);
7779 }
7780
7781 CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena));
7782
7783 if (ctx->layout) {
7784 const upb_MiniTable_Field* fields = m->layout->fields;
7785 int count = m->layout->field_count;
7786 bool found = false;
7787 for (int i = 0; i < count; i++) {
7788 if (fields[i].number == field_number) {
7789 f->layout_index = i;
7790 found = true;
7791 break;
7792 }
7793 }
7794 UPB_ASSERT(found);
7795 }
7796 } else {
7797 /* extension field. */
7798 f->is_extension_ = true;
7799 f->scope.extension_scope = m;
7800 symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_EXT));
7801 f->layout_index = ctx->ext_count++;
7802 if (ctx->layout) {
7803 UPB_ASSERT(ctx->file->ext_layouts[f->layout_index]->field.number ==
7804 field_number);
7805 }
7806 }
7807
7808 if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
7809 symtab_errf(ctx, "invalid type for field %s (%d)", f->full_name, f->type_);
7810 }
7811
7812 if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
7813 symtab_errf(ctx, "invalid label for field %s (%d)", f->full_name,
7814 f->label_);
7815 }
7816
7817 /* We can't resolve the subdef or (in the case of extensions) the containing
7818 * message yet, because it may not have been defined yet. We stash a pointer
7819 * to the field_proto until later when we can properly resolve it. */
7820 f->sub.unresolved = field_proto;
7821
7822 if (f->label_ == kUpb_Label_Required &&
7823 f->file->syntax == kUpb_Syntax_Proto3) {
7824 symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
7825 }
7826
7827 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
7828 uint32_t oneof_index = google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
7829 upb_OneofDef* oneof;
7830 upb_value v = upb_value_constptr(f);
7831
7832 if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
7833 symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
7834 f->full_name);
7835 }
7836
7837 if (!m) {
7838 symtab_errf(ctx, "oneof_index provided for extension field (%s)",
7839 f->full_name);
7840 }
7841
7842 if (oneof_index >= m->oneof_count) {
7843 symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
7844 }
7845
7846 oneof = (upb_OneofDef*)&m->oneofs[oneof_index];
7847 f->scope.oneof = oneof;
7848
7849 oneof->field_count++;
7850 if (f->proto3_optional_) {
7851 oneof->synthetic = true;
7852 }
7853 CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena));
7854 CHK_OOM(
7855 upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena));
7856 } else {
7857 if (f->proto3_optional_ && !is_extension) {
7858 symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
7859 f->full_name);
7860 }
7861 }
7862
7863 SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
7864
7865 if (google_protobuf_FieldOptions_has_packed(f->opts)) {
7866 f->packed_ = google_protobuf_FieldOptions_packed(f->opts);
7867 } else {
7868 /* Repeated fields default to packed for proto3 only. */
7869 f->packed_ = upb_FieldDef_IsPrimitive(f) &&
7870 f->label_ == kUpb_Label_Repeated &&
7871 f->file->syntax == kUpb_Syntax_Proto3;
7872 }
7873 }
7874
create_service(symtab_addctx * ctx,const google_protobuf_ServiceDescriptorProto * svc_proto,const upb_ServiceDef * _s)7875 static void create_service(
7876 symtab_addctx* ctx, const google_protobuf_ServiceDescriptorProto* svc_proto,
7877 const upb_ServiceDef* _s) {
7878 upb_ServiceDef* s = (upb_ServiceDef*)_s;
7879 upb_StringView name;
7880 const google_protobuf_MethodDescriptorProto* const* methods;
7881 size_t i, n;
7882
7883 s->file = ctx->file; /* Must happen prior to symtab_add. */
7884
7885 name = google_protobuf_ServiceDescriptorProto_name(svc_proto);
7886 check_ident(ctx, name, false);
7887 s->full_name = makefullname(ctx, ctx->file->package, name);
7888 symtab_add(ctx, s->full_name, pack_def(s, UPB_DEFTYPE_SERVICE));
7889
7890 methods = google_protobuf_ServiceDescriptorProto_method(svc_proto, &n);
7891
7892 s->method_count = n;
7893 s->methods = symtab_alloc(ctx, sizeof(*s->methods) * n);
7894
7895 SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto);
7896
7897 for (i = 0; i < n; i++) {
7898 const google_protobuf_MethodDescriptorProto* method_proto = methods[i];
7899 upb_MethodDef* m = (upb_MethodDef*)&s->methods[i];
7900 upb_StringView name =
7901 google_protobuf_MethodDescriptorProto_name(method_proto);
7902
7903 m->service = s;
7904 m->full_name = makefullname(ctx, s->full_name, name);
7905 m->index = i;
7906 m->client_streaming =
7907 google_protobuf_MethodDescriptorProto_client_streaming(method_proto);
7908 m->server_streaming =
7909 google_protobuf_MethodDescriptorProto_server_streaming(method_proto);
7910 m->input_type = symtab_resolve(
7911 ctx, m->full_name, m->full_name,
7912 google_protobuf_MethodDescriptorProto_input_type(method_proto),
7913 UPB_DEFTYPE_MSG);
7914 m->output_type = symtab_resolve(
7915 ctx, m->full_name, m->full_name,
7916 google_protobuf_MethodDescriptorProto_output_type(method_proto),
7917 UPB_DEFTYPE_MSG);
7918
7919 SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto);
7920 }
7921 }
7922
count_bits_debug(uint64_t x)7923 static int count_bits_debug(uint64_t x) {
7924 // For assertions only, speed does not matter.
7925 int n = 0;
7926 while (x) {
7927 if (x & 1) n++;
7928 x >>= 1;
7929 }
7930 return n;
7931 }
7932
compare_int32(const void * a_ptr,const void * b_ptr)7933 static int compare_int32(const void* a_ptr, const void* b_ptr) {
7934 int32_t a = *(int32_t*)a_ptr;
7935 int32_t b = *(int32_t*)b_ptr;
7936 return a < b ? -1 : (a == b ? 0 : 1);
7937 }
7938
create_enumlayout(symtab_addctx * ctx,const upb_EnumDef * e)7939 upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
7940 const upb_EnumDef* e) {
7941 int n = 0;
7942 uint64_t mask = 0;
7943
7944 for (int i = 0; i < e->value_count; i++) {
7945 uint32_t val = (uint32_t)e->values[i].number;
7946 if (val < 64) {
7947 mask |= 1ULL << val;
7948 } else {
7949 n++;
7950 }
7951 }
7952
7953 int32_t* values = symtab_alloc(ctx, sizeof(*values) * n);
7954
7955 if (n) {
7956 int32_t* p = values;
7957
7958 // Add values outside the bitmask range to the list, as described in the
7959 // comments for upb_MiniTable_Enum.
7960 for (int i = 0; i < e->value_count; i++) {
7961 int32_t val = e->values[i].number;
7962 if ((uint32_t)val >= 64) {
7963 *p++ = val;
7964 }
7965 }
7966 UPB_ASSERT(p == values + n);
7967 }
7968
7969 // Enums can have duplicate values; we must sort+uniq them.
7970 if (values) qsort(values, n, sizeof(*values), &compare_int32);
7971
7972 int dst = 0;
7973 for (int i = 0; i < n; dst++) {
7974 int32_t val = values[i];
7975 while (i < n && values[i] == val) i++; // Skip duplicates.
7976 values[dst] = val;
7977 }
7978 n = dst;
7979
7980 UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
7981
7982 upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
7983 layout->value_count = n;
7984 layout->mask = mask;
7985 layout->values = values;
7986
7987 return layout;
7988 }
7989
create_enumvaldef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumValueDescriptorProto * val_proto,upb_EnumDef * e,int i)7990 static void create_enumvaldef(
7991 symtab_addctx* ctx, const char* prefix,
7992 const google_protobuf_EnumValueDescriptorProto* val_proto, upb_EnumDef* e,
7993 int i) {
7994 upb_EnumValueDef* val = (upb_EnumValueDef*)&e->values[i];
7995 upb_StringView name =
7996 google_protobuf_EnumValueDescriptorProto_name(val_proto);
7997 upb_value v = upb_value_constptr(val);
7998
7999 val->parent = e; /* Must happen prior to symtab_add(). */
8000 val->full_name = makefullname(ctx, prefix, name);
8001 val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
8002 symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
8003
8004 SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
8005
8006 if (i == 0 && e->file->syntax == kUpb_Syntax_Proto3 && val->number != 0) {
8007 symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
8008 e->full_name);
8009 }
8010
8011 CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
8012
8013 // Multiple enumerators can have the same number, first one wins.
8014 if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
8015 CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
8016 }
8017 }
8018
_upb_EnumReservedNames_New(symtab_addctx * ctx,int n,const upb_StringView * protos)8019 static upb_StringView* _upb_EnumReservedNames_New(
8020 symtab_addctx* ctx, int n, const upb_StringView* protos) {
8021 upb_StringView* sv =
8022 upb_Arena_Malloc(ctx->arena, sizeof(upb_StringView) * n);
8023 for (size_t i = 0; i < n; i++) {
8024 sv[i].data =
8025 upb_strdup2(protos[i].data, protos[i].size, ctx->arena);
8026 sv[i].size = protos[i].size;
8027 }
8028 return sv;
8029 }
8030
create_enumdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto,const upb_MessageDef * containing_type,const upb_EnumDef * _e)8031 static void create_enumdef(
8032 symtab_addctx* ctx, const char* prefix,
8033 const google_protobuf_EnumDescriptorProto* enum_proto,
8034 const upb_MessageDef* containing_type, const upb_EnumDef* _e) {
8035 upb_EnumDef* e = (upb_EnumDef*)_e;
8036 ;
8037 const google_protobuf_EnumValueDescriptorProto* const* values;
8038 const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* res_ranges;
8039 const upb_StringView* res_names;
8040 upb_StringView name;
8041 size_t i, n, n_res_range, n_res_name;
8042
8043 e->file = ctx->file; /* Must happen prior to symtab_add() */
8044 e->containing_type = containing_type;
8045
8046 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
8047 check_ident(ctx, name, false);
8048
8049 e->full_name = makefullname(ctx, prefix, name);
8050 symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
8051
8052 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
8053 CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena));
8054 CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));
8055
8056 e->defaultval = 0;
8057 e->value_count = n;
8058 e->values = symtab_alloc(ctx, sizeof(*e->values) * n);
8059
8060 if (n == 0) {
8061 symtab_errf(ctx, "enums must contain at least one value (%s)",
8062 e->full_name);
8063 }
8064
8065 res_ranges =
8066 google_protobuf_EnumDescriptorProto_reserved_range(enum_proto, &n_res_range);
8067 e->res_range_count = n_res_range;
8068 e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e);
8069
8070 res_names = google_protobuf_EnumDescriptorProto_reserved_name(enum_proto, &n_res_name);
8071 e->res_name_count = n_res_name;
8072 e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names);
8073
8074 SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
8075
8076 for (i = 0; i < n; i++) {
8077 create_enumvaldef(ctx, prefix, values[i], e, i);
8078 }
8079
8080 upb_inttable_compact(&e->iton, ctx->arena);
8081
8082 if (e->file->syntax == kUpb_Syntax_Proto2) {
8083 if (ctx->layout) {
8084 UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
8085 e->layout = ctx->layout->enums[ctx->enum_count++];
8086 UPB_ASSERT(upb_inttable_count(&e->iton) ==
8087 e->layout->value_count + count_bits_debug(e->layout->mask));
8088 } else {
8089 e->layout = create_enumlayout(ctx, e);
8090 }
8091 } else {
8092 e->layout = NULL;
8093 }
8094 }
8095
8096 static void msgdef_create_nested(
8097 symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
8098 upb_MessageDef* m);
8099
_upb_ReservedNames_New(symtab_addctx * ctx,int n,const upb_StringView * protos)8100 static upb_StringView* _upb_ReservedNames_New(symtab_addctx* ctx, int n,
8101 const upb_StringView* protos) {
8102 upb_StringView* sv = upb_Arena_Malloc(ctx->arena, sizeof(upb_StringView) * n);
8103 for (size_t i = 0; i < n; i++) {
8104 sv[i].data =
8105 upb_strdup2(protos[i].data, protos[i].size, ctx->arena);
8106 sv[i].size = protos[i].size;
8107 }
8108 return sv;
8109 }
8110
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto,const upb_MessageDef * containing_type,const upb_MessageDef * _m)8111 static void create_msgdef(symtab_addctx* ctx, const char* prefix,
8112 const google_protobuf_DescriptorProto* msg_proto,
8113 const upb_MessageDef* containing_type,
8114 const upb_MessageDef* _m) {
8115 upb_MessageDef* m = (upb_MessageDef*)_m;
8116 const google_protobuf_OneofDescriptorProto* const* oneofs;
8117 const google_protobuf_FieldDescriptorProto* const* fields;
8118 const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges;
8119
8120 const google_protobuf_DescriptorProto_ReservedRange* const* res_ranges;
8121 const upb_StringView* res_names;
8122 size_t i, n_oneof, n_field, n_ext_range;
8123 size_t n_res_range, n_res_name;
8124 upb_StringView name;
8125
8126 m->file = ctx->file; /* Must happen prior to symtab_add(). */
8127 m->containing_type = containing_type;
8128
8129 name = google_protobuf_DescriptorProto_name(msg_proto);
8130 check_ident(ctx, name, false);
8131
8132 m->full_name = makefullname(ctx, prefix, name);
8133 symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
8134
8135 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
8136 fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
8137 ext_ranges =
8138 google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range);
8139 res_ranges = google_protobuf_DescriptorProto_reserved_range(msg_proto, &n_res_range);
8140 res_names = google_protobuf_DescriptorProto_reserved_name(msg_proto, &n_res_name);
8141
8142 CHK_OOM(upb_inttable_init(&m->itof, ctx->arena));
8143 CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena));
8144
8145 if (ctx->layout) {
8146 /* create_fielddef() below depends on this being set. */
8147 UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count);
8148 m->layout = ctx->layout->msgs[ctx->msg_count++];
8149 UPB_ASSERT(n_field == m->layout->field_count);
8150 } else {
8151 /* Allocate now (to allow cross-linking), populate later. */
8152 m->layout =
8153 symtab_alloc(ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry));
8154 }
8155
8156 SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto);
8157
8158 m->oneof_count = n_oneof;
8159 m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
8160 for (i = 0; i < n_oneof; i++) {
8161 create_oneofdef(ctx, m, oneofs[i], &m->oneofs[i]);
8162 }
8163
8164 m->field_count = n_field;
8165 m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
8166 for (i = 0; i < n_field; i++) {
8167 create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i],
8168 /* is_extension= */ false);
8169 }
8170
8171 m->ext_range_count = n_ext_range;
8172 m->ext_ranges = symtab_alloc(ctx, sizeof(*m->ext_ranges) * n_ext_range);
8173 for (i = 0; i < n_ext_range; i++) {
8174 const google_protobuf_DescriptorProto_ExtensionRange* r = ext_ranges[i];
8175 upb_ExtensionRange* r_def = (upb_ExtensionRange*)&m->ext_ranges[i];
8176 int32_t start = google_protobuf_DescriptorProto_ExtensionRange_start(r);
8177 int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(r);
8178 int32_t max =
8179 google_protobuf_MessageOptions_message_set_wire_format(m->opts)
8180 ? INT32_MAX
8181 : kUpb_MaxFieldNumber + 1;
8182
8183 // A full validation would also check that each range is disjoint, and that
8184 // none of the fields overlap with the extension ranges, but we are just
8185 // sanity checking here.
8186 if (start < 1 || end <= start || end > max) {
8187 symtab_errf(ctx, "Extension range (%d, %d) is invalid, message=%s\n",
8188 (int)start, (int)end, m->full_name);
8189 }
8190
8191 r_def->start = start;
8192 r_def->end = end;
8193 SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange,
8194 ExtensionRangeOptions, r);
8195 }
8196
8197 m->res_range_count = n_res_range;
8198 m->res_ranges =
8199 _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m);
8200
8201 m->res_name_count = n_res_name;
8202 m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names);
8203
8204 finalize_oneofs(ctx, m);
8205 assign_msg_wellknowntype(m);
8206 upb_inttable_compact(&m->itof, ctx->arena);
8207 msgdef_create_nested(ctx, msg_proto, m);
8208 }
8209
msgdef_create_nested(symtab_addctx * ctx,const google_protobuf_DescriptorProto * msg_proto,upb_MessageDef * m)8210 static void msgdef_create_nested(
8211 symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
8212 upb_MessageDef* m) {
8213 size_t n;
8214
8215 const google_protobuf_EnumDescriptorProto* const* enums =
8216 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
8217 m->nested_enum_count = n;
8218 m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n);
8219 for (size_t i = 0; i < n; i++) {
8220 m->nested_enum_count = i + 1;
8221 create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]);
8222 }
8223
8224 const google_protobuf_FieldDescriptorProto* const* exts =
8225 google_protobuf_DescriptorProto_extension(msg_proto, &n);
8226 m->nested_ext_count = n;
8227 m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n);
8228 for (size_t i = 0; i < n; i++) {
8229 create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i],
8230 /* is_extension= */ true);
8231 ((upb_FieldDef*)&m->nested_exts[i])->index_ = i;
8232 }
8233
8234 const google_protobuf_DescriptorProto* const* msgs =
8235 google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
8236 m->nested_msg_count = n;
8237 m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n);
8238 for (size_t i = 0; i < n; i++) {
8239 create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]);
8240 }
8241 }
8242
resolve_subdef(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f)8243 static void resolve_subdef(symtab_addctx* ctx, const char* prefix,
8244 upb_FieldDef* f) {
8245 const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
8246 upb_StringView name =
8247 google_protobuf_FieldDescriptorProto_type_name(field_proto);
8248 bool has_name =
8249 google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
8250 switch ((int)f->type_) {
8251 case FIELD_TYPE_UNSPECIFIED: {
8252 // Type was not specified and must be inferred.
8253 UPB_ASSERT(has_name);
8254 upb_deftype_t type;
8255 const void* def =
8256 symtab_resolveany(ctx, f->full_name, prefix, name, &type);
8257 switch (type) {
8258 case UPB_DEFTYPE_ENUM:
8259 f->sub.enumdef = def;
8260 f->type_ = kUpb_FieldType_Enum;
8261 break;
8262 case UPB_DEFTYPE_MSG:
8263 f->sub.msgdef = def;
8264 f->type_ = kUpb_FieldType_Message; // It appears there is no way of
8265 // this being a group.
8266 break;
8267 default:
8268 symtab_errf(ctx, "Couldn't resolve type name for field %s",
8269 f->full_name);
8270 }
8271 }
8272 case kUpb_FieldType_Message:
8273 case kUpb_FieldType_Group:
8274 UPB_ASSERT(has_name);
8275 f->sub.msgdef =
8276 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
8277 break;
8278 case kUpb_FieldType_Enum:
8279 UPB_ASSERT(has_name);
8280 f->sub.enumdef =
8281 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM);
8282 break;
8283 default:
8284 // No resolution necessary.
8285 break;
8286 }
8287 }
8288
resolve_extension(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f,const google_protobuf_FieldDescriptorProto * field_proto)8289 static void resolve_extension(
8290 symtab_addctx* ctx, const char* prefix, upb_FieldDef* f,
8291 const google_protobuf_FieldDescriptorProto* field_proto) {
8292 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
8293 symtab_errf(ctx, "extension for field '%s' had no extendee", f->full_name);
8294 }
8295
8296 upb_StringView name =
8297 google_protobuf_FieldDescriptorProto_extendee(field_proto);
8298 const upb_MessageDef* m =
8299 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
8300 f->msgdef = m;
8301
8302 bool found = false;
8303
8304 for (int i = 0, n = m->ext_range_count; i < n; i++) {
8305 const upb_ExtensionRange* r = &m->ext_ranges[i];
8306 if (r->start <= f->number_ && f->number_ < r->end) {
8307 found = true;
8308 break;
8309 }
8310 }
8311
8312 if (!found) {
8313 symtab_errf(ctx,
8314 "field number %u in extension %s has no extension range in "
8315 "message %s",
8316 (unsigned)f->number_, f->full_name, f->msgdef->full_name);
8317 }
8318
8319 const upb_MiniTable_Extension* ext = ctx->file->ext_layouts[f->layout_index];
8320 if (ctx->layout) {
8321 UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number);
8322 } else {
8323 upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext;
8324 fill_fieldlayout(&mut_ext->field, f);
8325 mut_ext->field.presence = 0;
8326 mut_ext->field.offset = 0;
8327 mut_ext->field.submsg_index = 0;
8328 mut_ext->extendee = f->msgdef->layout;
8329 mut_ext->sub.submsg = f->sub.msgdef->layout;
8330 }
8331
8332 CHK_OOM(upb_inttable_insert(&ctx->symtab->exts, (uintptr_t)ext,
8333 upb_value_constptr(f), ctx->arena));
8334 }
8335
resolve_default(symtab_addctx * ctx,upb_FieldDef * f,const google_protobuf_FieldDescriptorProto * field_proto)8336 static void resolve_default(
8337 symtab_addctx* ctx, upb_FieldDef* f,
8338 const google_protobuf_FieldDescriptorProto* field_proto) {
8339 // Have to delay resolving of the default value until now because of the enum
8340 // case, since enum defaults are specified with a label.
8341 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
8342 upb_StringView defaultval =
8343 google_protobuf_FieldDescriptorProto_default_value(field_proto);
8344
8345 if (f->file->syntax == kUpb_Syntax_Proto3) {
8346 symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
8347 f->full_name);
8348 }
8349
8350 if (upb_FieldDef_IsSubMessage(f)) {
8351 symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
8352 f->full_name);
8353 }
8354
8355 parse_default(ctx, defaultval.data, defaultval.size, f);
8356 f->has_default = true;
8357 } else {
8358 set_default_default(ctx, f);
8359 f->has_default = false;
8360 }
8361 }
8362
resolve_fielddef(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f)8363 static void resolve_fielddef(symtab_addctx* ctx, const char* prefix,
8364 upb_FieldDef* f) {
8365 // We have to stash this away since resolve_subdef() may overwrite it.
8366 const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
8367
8368 resolve_subdef(ctx, prefix, f);
8369 resolve_default(ctx, f, field_proto);
8370
8371 if (f->is_extension_) {
8372 resolve_extension(ctx, prefix, f, field_proto);
8373 }
8374 }
8375
resolve_msgdef(symtab_addctx * ctx,upb_MessageDef * m)8376 static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
8377 for (int i = 0; i < m->field_count; i++) {
8378 resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]);
8379 }
8380
8381 m->in_message_set = false;
8382 for (int i = 0; i < m->nested_ext_count; i++) {
8383 upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i];
8384 resolve_fielddef(ctx, m->full_name, ext);
8385 if (ext->type_ == kUpb_FieldType_Message &&
8386 ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m &&
8387 google_protobuf_MessageOptions_message_set_wire_format(
8388 ext->msgdef->opts)) {
8389 m->in_message_set = true;
8390 }
8391 }
8392
8393 if (!ctx->layout) make_layout(ctx, m);
8394
8395 for (int i = 0; i < m->nested_msg_count; i++) {
8396 resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]);
8397 }
8398 }
8399
count_exts_in_msg(const google_protobuf_DescriptorProto * msg_proto)8400 static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) {
8401 size_t n;
8402 google_protobuf_DescriptorProto_extension(msg_proto, &n);
8403 int ext_count = n;
8404
8405 const google_protobuf_DescriptorProto* const* nested_msgs =
8406 google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
8407 for (size_t i = 0; i < n; i++) {
8408 ext_count += count_exts_in_msg(nested_msgs[i]);
8409 }
8410
8411 return ext_count;
8412 }
8413
build_filedef(symtab_addctx * ctx,upb_FileDef * file,const google_protobuf_FileDescriptorProto * file_proto)8414 static void build_filedef(
8415 symtab_addctx* ctx, upb_FileDef* file,
8416 const google_protobuf_FileDescriptorProto* file_proto) {
8417 const google_protobuf_DescriptorProto* const* msgs;
8418 const google_protobuf_EnumDescriptorProto* const* enums;
8419 const google_protobuf_FieldDescriptorProto* const* exts;
8420 const google_protobuf_ServiceDescriptorProto* const* services;
8421 const upb_StringView* strs;
8422 const int32_t* public_deps;
8423 const int32_t* weak_deps;
8424 size_t i, n;
8425
8426 file->symtab = ctx->symtab;
8427
8428 /* Count all extensions in the file, to build a flat array of layouts. */
8429 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
8430 int ext_count = n;
8431 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8432 for (int i = 0; i < n; i++) {
8433 ext_count += count_exts_in_msg(msgs[i]);
8434 }
8435 file->ext_count = ext_count;
8436
8437 if (ctx->layout) {
8438 /* We are using the ext layouts that were passed in. */
8439 file->ext_layouts = ctx->layout->exts;
8440 if (ctx->layout->ext_count != file->ext_count) {
8441 symtab_errf(ctx, "Extension count did not match layout (%d vs %d)",
8442 ctx->layout->ext_count, file->ext_count);
8443 }
8444 } else {
8445 /* We are building ext layouts from scratch. */
8446 file->ext_layouts =
8447 symtab_alloc(ctx, sizeof(*file->ext_layouts) * file->ext_count);
8448 upb_MiniTable_Extension* ext =
8449 symtab_alloc(ctx, sizeof(*ext) * file->ext_count);
8450 for (int i = 0; i < file->ext_count; i++) {
8451 file->ext_layouts[i] = &ext[i];
8452 }
8453 }
8454
8455 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
8456 symtab_errf(ctx, "File has no name");
8457 }
8458
8459 file->name =
8460 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
8461
8462 upb_StringView package = google_protobuf_FileDescriptorProto_package(file_proto);
8463 if (package.size) {
8464 check_ident(ctx, package, true);
8465 file->package = strviewdup(ctx, package);
8466 } else {
8467 file->package = NULL;
8468 }
8469
8470 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
8471 upb_StringView syntax =
8472 google_protobuf_FileDescriptorProto_syntax(file_proto);
8473
8474 if (streql_view(syntax, "proto2")) {
8475 file->syntax = kUpb_Syntax_Proto2;
8476 } else if (streql_view(syntax, "proto3")) {
8477 file->syntax = kUpb_Syntax_Proto3;
8478 } else {
8479 symtab_errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'",
8480 UPB_STRINGVIEW_ARGS(syntax));
8481 }
8482 } else {
8483 file->syntax = kUpb_Syntax_Proto2;
8484 }
8485
8486 /* Read options. */
8487 SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto);
8488
8489 /* Verify dependencies. */
8490 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
8491 file->dep_count = n;
8492 file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
8493
8494 for (i = 0; i < n; i++) {
8495 upb_StringView str = strs[i];
8496 file->deps[i] =
8497 upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size);
8498 if (!file->deps[i]) {
8499 symtab_errf(ctx,
8500 "Depends on file '" UPB_STRINGVIEW_FORMAT
8501 "', but it has not been loaded",
8502 UPB_STRINGVIEW_ARGS(str));
8503 }
8504 }
8505
8506 public_deps =
8507 google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
8508 file->public_dep_count = n;
8509 file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n);
8510 int32_t* mutable_public_deps = (int32_t*)file->public_deps;
8511 for (i = 0; i < n; i++) {
8512 if (public_deps[i] >= file->dep_count) {
8513 symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
8514 }
8515 mutable_public_deps[i] = public_deps[i];
8516 }
8517
8518 weak_deps =
8519 google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
8520 file->weak_dep_count = n;
8521 file->weak_deps = symtab_alloc(ctx, sizeof(*file->weak_deps) * n);
8522 int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
8523 for (i = 0; i < n; i++) {
8524 if (weak_deps[i] >= file->dep_count) {
8525 symtab_errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]);
8526 }
8527 mutable_weak_deps[i] = weak_deps[i];
8528 }
8529
8530 /* Create enums. */
8531 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
8532 file->top_lvl_enum_count = n;
8533 file->top_lvl_enums = symtab_alloc(ctx, sizeof(*file->top_lvl_enums) * n);
8534 for (i = 0; i < n; i++) {
8535 create_enumdef(ctx, file->package, enums[i], NULL, &file->top_lvl_enums[i]);
8536 }
8537
8538 /* Create extensions. */
8539 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
8540 file->top_lvl_ext_count = n;
8541 file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n);
8542 for (i = 0; i < n; i++) {
8543 create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i],
8544 /* is_extension= */ true);
8545 ((upb_FieldDef*)&file->top_lvl_exts[i])->index_ = i;
8546 }
8547
8548 /* Create messages. */
8549 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8550 file->top_lvl_msg_count = n;
8551 file->top_lvl_msgs = symtab_alloc(ctx, sizeof(*file->top_lvl_msgs) * n);
8552 for (i = 0; i < n; i++) {
8553 create_msgdef(ctx, file->package, msgs[i], NULL, &file->top_lvl_msgs[i]);
8554 }
8555
8556 /* Create services. */
8557 services = google_protobuf_FileDescriptorProto_service(file_proto, &n);
8558 file->service_count = n;
8559 file->services = symtab_alloc(ctx, sizeof(*file->services) * n);
8560 for (i = 0; i < n; i++) {
8561 create_service(ctx, services[i], &file->services[i]);
8562 ((upb_ServiceDef*)&file->services[i])->index = i;
8563 }
8564
8565 /* Now that all names are in the table, build layouts and resolve refs. */
8566 for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) {
8567 resolve_fielddef(ctx, file->package, (upb_FieldDef*)&file->top_lvl_exts[i]);
8568 }
8569
8570 for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) {
8571 resolve_msgdef(ctx, (upb_MessageDef*)&file->top_lvl_msgs[i]);
8572 }
8573
8574 if (file->ext_count) {
8575 CHK_OOM(_upb_extreg_add(ctx->symtab->extreg, file->ext_layouts,
8576 file->ext_count));
8577 }
8578 }
8579
remove_filedef(upb_DefPool * s,upb_FileDef * file)8580 static void remove_filedef(upb_DefPool* s, upb_FileDef* file) {
8581 intptr_t iter = UPB_INTTABLE_BEGIN;
8582 upb_StringView key;
8583 upb_value val;
8584 while (upb_strtable_next2(&s->syms, &key, &val, &iter)) {
8585 const upb_FileDef* f;
8586 switch (deftype(val)) {
8587 case UPB_DEFTYPE_EXT:
8588 f = upb_FieldDef_File(unpack_def(val, UPB_DEFTYPE_EXT));
8589 break;
8590 case UPB_DEFTYPE_MSG:
8591 f = upb_MessageDef_File(unpack_def(val, UPB_DEFTYPE_MSG));
8592 break;
8593 case UPB_DEFTYPE_ENUM:
8594 f = upb_EnumDef_File(unpack_def(val, UPB_DEFTYPE_ENUM));
8595 break;
8596 case UPB_DEFTYPE_ENUMVAL:
8597 f = upb_EnumDef_File(
8598 upb_EnumValueDef_Enum(unpack_def(val, UPB_DEFTYPE_ENUMVAL)));
8599 break;
8600 case UPB_DEFTYPE_SERVICE:
8601 f = upb_ServiceDef_File(unpack_def(val, UPB_DEFTYPE_SERVICE));
8602 break;
8603 default:
8604 UPB_UNREACHABLE();
8605 }
8606
8607 if (f == file) upb_strtable_removeiter(&s->syms, &iter);
8608 }
8609 }
8610
_upb_DefPool_AddFile(upb_DefPool * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_MiniTable_File * layout,upb_Status * status)8611 static const upb_FileDef* _upb_DefPool_AddFile(
8612 upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8613 const upb_MiniTable_File* layout, upb_Status* status) {
8614 symtab_addctx ctx;
8615 upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto);
8616 upb_value v;
8617
8618 if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) {
8619 if (unpack_def(v, UPB_DEFTYPE_FILE)) {
8620 upb_Status_SetErrorFormat(status, "duplicate file name (%.*s)",
8621 UPB_STRINGVIEW_ARGS(name));
8622 return NULL;
8623 }
8624 const upb_MiniTable_File* registered = unpack_def(v, UPB_DEFTYPE_LAYOUT);
8625 UPB_ASSERT(registered);
8626 if (layout && layout != registered) {
8627 upb_Status_SetErrorFormat(
8628 status, "tried to build with a different layout (filename=%.*s)",
8629 UPB_STRINGVIEW_ARGS(name));
8630 return NULL;
8631 }
8632 layout = registered;
8633 }
8634
8635 ctx.symtab = s;
8636 ctx.layout = layout;
8637 ctx.msg_count = 0;
8638 ctx.enum_count = 0;
8639 ctx.ext_count = 0;
8640 ctx.status = status;
8641 ctx.file = NULL;
8642 ctx.arena = upb_Arena_New();
8643 ctx.tmp_arena = upb_Arena_New();
8644
8645 if (!ctx.arena || !ctx.tmp_arena) {
8646 if (ctx.arena) upb_Arena_Free(ctx.arena);
8647 if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena);
8648 upb_Status_setoom(status);
8649 return NULL;
8650 }
8651
8652 if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
8653 UPB_ASSERT(!upb_Status_IsOk(status));
8654 if (ctx.file) {
8655 remove_filedef(s, ctx.file);
8656 ctx.file = NULL;
8657 }
8658 } else {
8659 ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file));
8660 build_filedef(&ctx, ctx.file, file_proto);
8661 upb_strtable_insert(&s->files, name.data, name.size,
8662 pack_def(ctx.file, UPB_DEFTYPE_FILE), ctx.arena);
8663 UPB_ASSERT(upb_Status_IsOk(status));
8664 upb_Arena_Fuse(s->arena, ctx.arena);
8665 }
8666
8667 upb_Arena_Free(ctx.arena);
8668 upb_Arena_Free(ctx.tmp_arena);
8669 return ctx.file;
8670 }
8671
upb_DefPool_AddFile(upb_DefPool * s,const google_protobuf_FileDescriptorProto * file_proto,upb_Status * status)8672 const upb_FileDef* upb_DefPool_AddFile(
8673 upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8674 upb_Status* status) {
8675 return _upb_DefPool_AddFile(s, file_proto, NULL, status);
8676 }
8677
8678 /* Include here since we want most of this file to be stdio-free. */
8679 #include <stdio.h>
8680
_upb_DefPool_LoadDefInitEx(upb_DefPool * s,const _upb_DefPool_Init * init,bool rebuild_minitable)8681 bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
8682 bool rebuild_minitable) {
8683 /* Since this function should never fail (it would indicate a bug in upb) we
8684 * print errors to stderr instead of returning error status to the user. */
8685 _upb_DefPool_Init** deps = init->deps;
8686 google_protobuf_FileDescriptorProto* file;
8687 upb_Arena* arena;
8688 upb_Status status;
8689
8690 upb_Status_Clear(&status);
8691
8692 if (upb_DefPool_FindFileByName(s, init->filename)) {
8693 return true;
8694 }
8695
8696 arena = upb_Arena_New();
8697
8698 for (; *deps; deps++) {
8699 if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err;
8700 }
8701
8702 file = google_protobuf_FileDescriptorProto_parse_ex(
8703 init->descriptor.data, init->descriptor.size, NULL,
8704 kUpb_DecodeOption_AliasString, arena);
8705 s->bytes_loaded += init->descriptor.size;
8706
8707 if (!file) {
8708 upb_Status_SetErrorFormat(
8709 &status,
8710 "Failed to parse compiled-in descriptor for file '%s'. This should "
8711 "never happen.",
8712 init->filename);
8713 goto err;
8714 }
8715
8716 const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout;
8717 if (!_upb_DefPool_AddFile(s, file, mt, &status)) {
8718 goto err;
8719 }
8720
8721 upb_Arena_Free(arena);
8722 return true;
8723
8724 err:
8725 fprintf(stderr,
8726 "Error loading compiled-in descriptor for file '%s' (this should "
8727 "never happen): %s\n",
8728 init->filename, upb_Status_ErrorMessage(&status));
8729 upb_Arena_Free(arena);
8730 return false;
8731 }
8732
_upb_DefPool_BytesLoaded(const upb_DefPool * s)8733 size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) {
8734 return s->bytes_loaded;
8735 }
8736
_upb_DefPool_Arena(const upb_DefPool * s)8737 upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; }
8738
_upb_DefPool_FindExtensionByMiniTable(const upb_DefPool * s,const upb_MiniTable_Extension * ext)8739 const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable(
8740 const upb_DefPool* s, const upb_MiniTable_Extension* ext) {
8741 upb_value v;
8742 bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v);
8743 UPB_ASSERT(ok);
8744 return upb_value_getconstptr(v);
8745 }
8746
upb_DefPool_FindExtensionByNumber(const upb_DefPool * s,const upb_MessageDef * m,int32_t fieldnum)8747 const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
8748 const upb_MessageDef* m,
8749 int32_t fieldnum) {
8750 const upb_MiniTable* l = upb_MessageDef_MiniTable(m);
8751 const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum);
8752 return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL;
8753 }
8754
_upb_DefPool_registerlayout(upb_DefPool * s,const char * filename,const upb_MiniTable_File * file)8755 bool _upb_DefPool_registerlayout(upb_DefPool* s, const char* filename,
8756 const upb_MiniTable_File* file) {
8757 if (upb_DefPool_FindFileByName(s, filename)) return false;
8758 upb_value v = pack_def(file, UPB_DEFTYPE_LAYOUT);
8759 return upb_strtable_insert(&s->files, filename, strlen(filename), v,
8760 s->arena);
8761 }
8762
upb_DefPool_ExtensionRegistry(const upb_DefPool * s)8763 const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
8764 const upb_DefPool* s) {
8765 return s->extreg;
8766 }
8767
upb_DefPool_GetAllExtensions(const upb_DefPool * s,const upb_MessageDef * m,size_t * count)8768 const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
8769 const upb_MessageDef* m,
8770 size_t* count) {
8771 size_t n = 0;
8772 intptr_t iter = UPB_INTTABLE_BEGIN;
8773 uintptr_t key;
8774 upb_value val;
8775 // This is O(all exts) instead of O(exts for m). If we need this to be
8776 // efficient we may need to make extreg into a two-level table, or have a
8777 // second per-message index.
8778 while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8779 const upb_FieldDef* f = upb_value_getconstptr(val);
8780 if (upb_FieldDef_ContainingType(f) == m) n++;
8781 }
8782 const upb_FieldDef** exts = malloc(n * sizeof(*exts));
8783 iter = UPB_INTTABLE_BEGIN;
8784 size_t i = 0;
8785 while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8786 const upb_FieldDef* f = upb_value_getconstptr(val);
8787 if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f;
8788 }
8789 *count = n;
8790 return exts;
8791 }
8792
8793 #undef CHK_OOM
8794
8795 /** upb/reflection.c ************************************************************/
8796
8797 #include <string.h>
8798
8799
get_field_size(const upb_MiniTable_Field * f)8800 static size_t get_field_size(const upb_MiniTable_Field* f) {
8801 static unsigned char sizes[] = {
8802 0, /* 0 */
8803 8, /* kUpb_FieldType_Double */
8804 4, /* kUpb_FieldType_Float */
8805 8, /* kUpb_FieldType_Int64 */
8806 8, /* kUpb_FieldType_UInt64 */
8807 4, /* kUpb_FieldType_Int32 */
8808 8, /* kUpb_FieldType_Fixed64 */
8809 4, /* kUpb_FieldType_Fixed32 */
8810 1, /* kUpb_FieldType_Bool */
8811 sizeof(upb_StringView), /* kUpb_FieldType_String */
8812 sizeof(void*), /* kUpb_FieldType_Group */
8813 sizeof(void*), /* kUpb_FieldType_Message */
8814 sizeof(upb_StringView), /* kUpb_FieldType_Bytes */
8815 4, /* kUpb_FieldType_UInt32 */
8816 4, /* kUpb_FieldType_Enum */
8817 4, /* kUpb_FieldType_SFixed32 */
8818 8, /* kUpb_FieldType_SFixed64 */
8819 4, /* kUpb_FieldType_SInt32 */
8820 8, /* kUpb_FieldType_SInt64 */
8821 };
8822 return upb_IsRepeatedOrMap(f) ? sizeof(void*) : sizes[f->descriptortype];
8823 }
8824
8825 /** upb_Message
8826 * *******************************************************************/
8827
upb_Message_New(const upb_MessageDef * m,upb_Arena * a)8828 upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a) {
8829 return _upb_Message_New(upb_MessageDef_MiniTable(m), a);
8830 }
8831
in_oneof(const upb_MiniTable_Field * field)8832 static bool in_oneof(const upb_MiniTable_Field* field) {
8833 return field->presence < 0;
8834 }
8835
_upb_Message_Getraw(const upb_Message * msg,const upb_FieldDef * f)8836 static upb_MessageValue _upb_Message_Getraw(const upb_Message* msg,
8837 const upb_FieldDef* f) {
8838 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8839 const char* mem = UPB_PTR_AT(msg, field->offset, char);
8840 upb_MessageValue val = {0};
8841 memcpy(&val, mem, get_field_size(field));
8842 return val;
8843 }
8844
upb_Message_Has(const upb_Message * msg,const upb_FieldDef * f)8845 bool upb_Message_Has(const upb_Message* msg, const upb_FieldDef* f) {
8846 assert(upb_FieldDef_HasPresence(f));
8847 if (upb_FieldDef_IsExtension(f)) {
8848 const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f);
8849 return _upb_Message_Getext(msg, ext) != NULL;
8850 } else {
8851 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8852 if (in_oneof(field)) {
8853 return _upb_getoneofcase_field(msg, field) == field->number;
8854 } else if (field->presence > 0) {
8855 return _upb_hasbit_field(msg, field);
8856 } else {
8857 UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message ||
8858 field->descriptortype == kUpb_FieldType_Group);
8859 return _upb_Message_Getraw(msg, f).msg_val != NULL;
8860 }
8861 }
8862 }
8863
upb_Message_WhichOneof(const upb_Message * msg,const upb_OneofDef * o)8864 const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg,
8865 const upb_OneofDef* o) {
8866 const upb_FieldDef* f = upb_OneofDef_Field(o, 0);
8867 if (upb_OneofDef_IsSynthetic(o)) {
8868 UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1);
8869 return upb_Message_Has(msg, f) ? f : NULL;
8870 } else {
8871 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8872 uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
8873 f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL;
8874 UPB_ASSERT((f != NULL) == (oneof_case != 0));
8875 return f;
8876 }
8877 }
8878
upb_Message_Get(const upb_Message * msg,const upb_FieldDef * f)8879 upb_MessageValue upb_Message_Get(const upb_Message* msg,
8880 const upb_FieldDef* f) {
8881 if (upb_FieldDef_IsExtension(f)) {
8882 const upb_Message_Extension* ext =
8883 _upb_Message_Getext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8884 if (ext) {
8885 upb_MessageValue val;
8886 memcpy(&val, &ext->data, sizeof(val));
8887 return val;
8888 } else if (upb_FieldDef_IsRepeated(f)) {
8889 return (upb_MessageValue){.array_val = NULL};
8890 }
8891 } else if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
8892 return _upb_Message_Getraw(msg, f);
8893 }
8894 return upb_FieldDef_Default(f);
8895 }
8896
upb_Message_Mutable(upb_Message * msg,const upb_FieldDef * f,upb_Arena * a)8897 upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg,
8898 const upb_FieldDef* f,
8899 upb_Arena* a) {
8900 UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f));
8901 if (upb_FieldDef_HasPresence(f) && !upb_Message_Has(msg, f)) {
8902 // We need to skip the upb_Message_Get() call in this case.
8903 goto make;
8904 }
8905
8906 upb_MessageValue val = upb_Message_Get(msg, f);
8907 if (val.array_val) {
8908 return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val};
8909 }
8910
8911 upb_MutableMessageValue ret;
8912 make:
8913 if (!a) return (upb_MutableMessageValue){.array = NULL};
8914 if (upb_FieldDef_IsMap(f)) {
8915 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
8916 const upb_FieldDef* key =
8917 upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
8918 const upb_FieldDef* value =
8919 upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
8920 ret.map =
8921 upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value));
8922 } else if (upb_FieldDef_IsRepeated(f)) {
8923 ret.array = upb_Array_New(a, upb_FieldDef_CType(f));
8924 } else {
8925 UPB_ASSERT(upb_FieldDef_IsSubMessage(f));
8926 ret.msg = upb_Message_New(upb_FieldDef_MessageSubDef(f), a);
8927 }
8928
8929 val.array_val = ret.array;
8930 upb_Message_Set(msg, f, val, a);
8931
8932 return ret;
8933 }
8934
upb_Message_Set(upb_Message * msg,const upb_FieldDef * f,upb_MessageValue val,upb_Arena * a)8935 bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f,
8936 upb_MessageValue val, upb_Arena* a) {
8937 if (upb_FieldDef_IsExtension(f)) {
8938 upb_Message_Extension* ext = _upb_Message_GetOrCreateExtension(
8939 msg, _upb_FieldDef_ExtensionMiniTable(f), a);
8940 if (!ext) return false;
8941 memcpy(&ext->data, &val, sizeof(val));
8942 } else {
8943 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8944 char* mem = UPB_PTR_AT(msg, field->offset, char);
8945 memcpy(mem, &val, get_field_size(field));
8946 if (field->presence > 0) {
8947 _upb_sethas_field(msg, field);
8948 } else if (in_oneof(field)) {
8949 *_upb_oneofcase_field(msg, field) = field->number;
8950 }
8951 }
8952 return true;
8953 }
8954
upb_Message_ClearField(upb_Message * msg,const upb_FieldDef * f)8955 void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f) {
8956 if (upb_FieldDef_IsExtension(f)) {
8957 _upb_Message_Clearext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8958 } else {
8959 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8960 char* mem = UPB_PTR_AT(msg, field->offset, char);
8961
8962 if (field->presence > 0) {
8963 _upb_clearhas_field(msg, field);
8964 } else if (in_oneof(field)) {
8965 uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
8966 if (*oneof_case != field->number) return;
8967 *oneof_case = 0;
8968 }
8969
8970 memset(mem, 0, get_field_size(field));
8971 }
8972 }
8973
upb_Message_Clear(upb_Message * msg,const upb_MessageDef * m)8974 void upb_Message_Clear(upb_Message* msg, const upb_MessageDef* m) {
8975 _upb_Message_Clear(msg, upb_MessageDef_MiniTable(m));
8976 }
8977
upb_Message_Next(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,const upb_FieldDef ** out_f,upb_MessageValue * out_val,size_t * iter)8978 bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m,
8979 const upb_DefPool* ext_pool, const upb_FieldDef** out_f,
8980 upb_MessageValue* out_val, size_t* iter) {
8981 size_t i = *iter;
8982 size_t n = upb_MessageDef_FieldCount(m);
8983 const upb_MessageValue zero = {0};
8984 UPB_UNUSED(ext_pool);
8985
8986 /* Iterate over normal fields, returning the first one that is set. */
8987 while (++i < n) {
8988 const upb_FieldDef* f = upb_MessageDef_Field(m, i);
8989 upb_MessageValue val = _upb_Message_Getraw(msg, f);
8990
8991 /* Skip field if unset or empty. */
8992 if (upb_FieldDef_HasPresence(f)) {
8993 if (!upb_Message_Has(msg, f)) continue;
8994 } else {
8995 upb_MessageValue test = val;
8996 if (upb_FieldDef_IsString(f) && !upb_FieldDef_IsRepeated(f)) {
8997 /* Clear string pointer, only size matters (ptr could be non-NULL). */
8998 test.str_val.data = NULL;
8999 }
9000 /* Continue if NULL or 0. */
9001 if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
9002
9003 /* Continue on empty array or map. */
9004 if (upb_FieldDef_IsMap(f)) {
9005 if (upb_Map_Size(test.map_val) == 0) continue;
9006 } else if (upb_FieldDef_IsRepeated(f)) {
9007 if (upb_Array_Size(test.array_val) == 0) continue;
9008 }
9009 }
9010
9011 *out_val = val;
9012 *out_f = f;
9013 *iter = i;
9014 return true;
9015 }
9016
9017 if (ext_pool) {
9018 /* Return any extensions that are set. */
9019 size_t count;
9020 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count);
9021 if (i - n < count) {
9022 ext += count - 1 - (i - n);
9023 memcpy(out_val, &ext->data, sizeof(*out_val));
9024 *out_f = _upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext);
9025 *iter = i;
9026 return true;
9027 }
9028 }
9029
9030 *iter = i;
9031 return false;
9032 }
9033
_upb_Message_DiscardUnknown(upb_Message * msg,const upb_MessageDef * m,int depth)9034 bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
9035 int depth) {
9036 size_t iter = kUpb_Message_Begin;
9037 const upb_FieldDef* f;
9038 upb_MessageValue val;
9039 bool ret = true;
9040
9041 if (--depth == 0) return false;
9042
9043 _upb_Message_DiscardUnknown_shallow(msg);
9044
9045 while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) {
9046 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
9047 if (!subm) continue;
9048 if (upb_FieldDef_IsMap(f)) {
9049 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2);
9050 const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
9051 upb_Map* map = (upb_Map*)val.map_val;
9052 size_t iter = kUpb_Map_Begin;
9053
9054 if (!val_m) continue;
9055
9056 while (upb_MapIterator_Next(map, &iter)) {
9057 upb_MessageValue map_val = upb_MapIterator_Value(map, iter);
9058 if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m,
9059 depth)) {
9060 ret = false;
9061 }
9062 }
9063 } else if (upb_FieldDef_IsRepeated(f)) {
9064 const upb_Array* arr = val.array_val;
9065 size_t i, n = upb_Array_Size(arr);
9066 for (i = 0; i < n; i++) {
9067 upb_MessageValue elem = upb_Array_Get(arr, i);
9068 if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm,
9069 depth)) {
9070 ret = false;
9071 }
9072 }
9073 } else {
9074 if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm,
9075 depth)) {
9076 ret = false;
9077 }
9078 }
9079 }
9080
9081 return ret;
9082 }
9083
upb_Message_DiscardUnknown(upb_Message * msg,const upb_MessageDef * m,int maxdepth)9084 bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
9085 int maxdepth) {
9086 return _upb_Message_DiscardUnknown(msg, m, maxdepth);
9087 }
9088
9089 /** upb/decode.c ************************************************************/
9090
9091 #include <setjmp.h>
9092 #include <string.h>
9093
9094
9095 /* Must be last. */
9096
9097 /* Maps descriptor type -> elem_size_lg2. */
9098 static const uint8_t desctype_to_elem_size_lg2[] = {
9099 -1, /* invalid descriptor type */
9100 3, /* DOUBLE */
9101 2, /* FLOAT */
9102 3, /* INT64 */
9103 3, /* UINT64 */
9104 2, /* INT32 */
9105 3, /* FIXED64 */
9106 2, /* FIXED32 */
9107 0, /* BOOL */
9108 UPB_SIZE(3, 4), /* STRING */
9109 UPB_SIZE(2, 3), /* GROUP */
9110 UPB_SIZE(2, 3), /* MESSAGE */
9111 UPB_SIZE(3, 4), /* BYTES */
9112 2, /* UINT32 */
9113 2, /* ENUM */
9114 2, /* SFIXED32 */
9115 3, /* SFIXED64 */
9116 2, /* SINT32 */
9117 3, /* SINT64 */
9118 };
9119
9120 /* Maps descriptor type -> upb map size. */
9121 static const uint8_t desctype_to_mapsize[] = {
9122 -1, /* invalid descriptor type */
9123 8, /* DOUBLE */
9124 4, /* FLOAT */
9125 8, /* INT64 */
9126 8, /* UINT64 */
9127 4, /* INT32 */
9128 8, /* FIXED64 */
9129 4, /* FIXED32 */
9130 1, /* BOOL */
9131 UPB_MAPTYPE_STRING, /* STRING */
9132 sizeof(void*), /* GROUP */
9133 sizeof(void*), /* MESSAGE */
9134 UPB_MAPTYPE_STRING, /* BYTES */
9135 4, /* UINT32 */
9136 4, /* ENUM */
9137 4, /* SFIXED32 */
9138 8, /* SFIXED64 */
9139 4, /* SINT32 */
9140 8, /* SINT64 */
9141 };
9142
9143 static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) |
9144 (1 << kUpb_FieldType_Fixed32) |
9145 (1 << kUpb_FieldType_SFixed32);
9146
9147 static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) |
9148 (1 << kUpb_FieldType_Fixed64) |
9149 (1 << kUpb_FieldType_SFixed64);
9150
9151 /* Three fake field types for MessageSet. */
9152 #define TYPE_MSGSET_ITEM 19
9153 #define TYPE_COUNT 19
9154
9155 /* Op: an action to be performed for a wire-type/field-type combination. */
9156 #define OP_UNKNOWN -1 /* Unknown field. */
9157 #define OP_MSGSET_ITEM -2
9158 #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
9159 #define OP_ENUM 1
9160 #define OP_STRING 4
9161 #define OP_BYTES 5
9162 #define OP_SUBMSG 6
9163 /* Scalar fields use only ops above. Repeated fields can use any op. */
9164 #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
9165 #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
9166 #define OP_PACKED_ENUM 13
9167
9168 static const int8_t varint_ops[] = {
9169 OP_UNKNOWN, /* field not found */
9170 OP_UNKNOWN, /* DOUBLE */
9171 OP_UNKNOWN, /* FLOAT */
9172 OP_SCALAR_LG2(3), /* INT64 */
9173 OP_SCALAR_LG2(3), /* UINT64 */
9174 OP_SCALAR_LG2(2), /* INT32 */
9175 OP_UNKNOWN, /* FIXED64 */
9176 OP_UNKNOWN, /* FIXED32 */
9177 OP_SCALAR_LG2(0), /* BOOL */
9178 OP_UNKNOWN, /* STRING */
9179 OP_UNKNOWN, /* GROUP */
9180 OP_UNKNOWN, /* MESSAGE */
9181 OP_UNKNOWN, /* BYTES */
9182 OP_SCALAR_LG2(2), /* UINT32 */
9183 OP_ENUM, /* ENUM */
9184 OP_UNKNOWN, /* SFIXED32 */
9185 OP_UNKNOWN, /* SFIXED64 */
9186 OP_SCALAR_LG2(2), /* SINT32 */
9187 OP_SCALAR_LG2(3), /* SINT64 */
9188 OP_UNKNOWN, /* MSGSET_ITEM */
9189 };
9190
9191 static const int8_t delim_ops[] = {
9192 /* For non-repeated field type. */
9193 OP_UNKNOWN, /* field not found */
9194 OP_UNKNOWN, /* DOUBLE */
9195 OP_UNKNOWN, /* FLOAT */
9196 OP_UNKNOWN, /* INT64 */
9197 OP_UNKNOWN, /* UINT64 */
9198 OP_UNKNOWN, /* INT32 */
9199 OP_UNKNOWN, /* FIXED64 */
9200 OP_UNKNOWN, /* FIXED32 */
9201 OP_UNKNOWN, /* BOOL */
9202 OP_STRING, /* STRING */
9203 OP_UNKNOWN, /* GROUP */
9204 OP_SUBMSG, /* MESSAGE */
9205 OP_BYTES, /* BYTES */
9206 OP_UNKNOWN, /* UINT32 */
9207 OP_UNKNOWN, /* ENUM */
9208 OP_UNKNOWN, /* SFIXED32 */
9209 OP_UNKNOWN, /* SFIXED64 */
9210 OP_UNKNOWN, /* SINT32 */
9211 OP_UNKNOWN, /* SINT64 */
9212 OP_UNKNOWN, /* MSGSET_ITEM */
9213 /* For repeated field type. */
9214 OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
9215 OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
9216 OP_VARPCK_LG2(3), /* REPEATED INT64 */
9217 OP_VARPCK_LG2(3), /* REPEATED UINT64 */
9218 OP_VARPCK_LG2(2), /* REPEATED INT32 */
9219 OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
9220 OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
9221 OP_VARPCK_LG2(0), /* REPEATED BOOL */
9222 OP_STRING, /* REPEATED STRING */
9223 OP_SUBMSG, /* REPEATED GROUP */
9224 OP_SUBMSG, /* REPEATED MESSAGE */
9225 OP_BYTES, /* REPEATED BYTES */
9226 OP_VARPCK_LG2(2), /* REPEATED UINT32 */
9227 OP_PACKED_ENUM, /* REPEATED ENUM */
9228 OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
9229 OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
9230 OP_VARPCK_LG2(2), /* REPEATED SINT32 */
9231 OP_VARPCK_LG2(3), /* REPEATED SINT64 */
9232 /* Omitting MSGSET_*, because we never emit a repeated msgset type */
9233 };
9234
9235 typedef union {
9236 bool bool_val;
9237 uint32_t uint32_val;
9238 uint64_t uint64_val;
9239 uint32_t size;
9240 } wireval;
9241
9242 static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
9243 const upb_MiniTable* layout);
9244
decode_err(upb_Decoder * d,upb_DecodeStatus status)9245 UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) {
9246 assert(status != kUpb_DecodeStatus_Ok);
9247 UPB_LONGJMP(d->err, status);
9248 }
9249
fastdecode_err(upb_Decoder * d,int status)9250 const char* fastdecode_err(upb_Decoder* d, int status) {
9251 assert(status != kUpb_DecodeStatus_Ok);
9252 UPB_LONGJMP(d->err, status);
9253 return NULL;
9254 }
decode_verifyutf8(upb_Decoder * d,const char * buf,int len)9255 static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) {
9256 if (!decode_verifyutf8_inl(buf, len))
9257 decode_err(d, kUpb_DecodeStatus_BadUtf8);
9258 }
9259
decode_reserve(upb_Decoder * d,upb_Array * arr,size_t elem)9260 static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
9261 bool need_realloc = arr->size - arr->len < elem;
9262 if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
9263 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9264 }
9265 return need_realloc;
9266 }
9267
9268 typedef struct {
9269 const char* ptr;
9270 uint64_t val;
9271 } decode_vret;
9272
9273 UPB_NOINLINE
decode_longvarint64(const char * ptr,uint64_t val)9274 static decode_vret decode_longvarint64(const char* ptr, uint64_t val) {
9275 decode_vret ret = {NULL, 0};
9276 uint64_t byte;
9277 int i;
9278 for (i = 1; i < 10; i++) {
9279 byte = (uint8_t)ptr[i];
9280 val += (byte - 1) << (i * 7);
9281 if (!(byte & 0x80)) {
9282 ret.ptr = ptr + i + 1;
9283 ret.val = val;
9284 return ret;
9285 }
9286 }
9287 return ret;
9288 }
9289
9290 UPB_FORCEINLINE
decode_varint64(upb_Decoder * d,const char * ptr,uint64_t * val)9291 static const char* decode_varint64(upb_Decoder* d, const char* ptr,
9292 uint64_t* val) {
9293 uint64_t byte = (uint8_t)*ptr;
9294 if (UPB_LIKELY((byte & 0x80) == 0)) {
9295 *val = byte;
9296 return ptr + 1;
9297 } else {
9298 decode_vret res = decode_longvarint64(ptr, byte);
9299 if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed);
9300 *val = res.val;
9301 return res.ptr;
9302 }
9303 }
9304
9305 UPB_FORCEINLINE
decode_tag(upb_Decoder * d,const char * ptr,uint32_t * val)9306 static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) {
9307 uint64_t byte = (uint8_t)*ptr;
9308 if (UPB_LIKELY((byte & 0x80) == 0)) {
9309 *val = byte;
9310 return ptr + 1;
9311 } else {
9312 const char* start = ptr;
9313 decode_vret res = decode_longvarint64(ptr, byte);
9314 if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
9315 return decode_err(d, kUpb_DecodeStatus_Malformed);
9316 }
9317 *val = res.val;
9318 return res.ptr;
9319 }
9320 }
9321
9322 UPB_FORCEINLINE
upb_Decoder_DecodeSize(upb_Decoder * d,const char * ptr,uint32_t * size)9323 static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
9324 uint32_t* size) {
9325 uint64_t size64;
9326 ptr = decode_varint64(d, ptr, &size64);
9327 if (size64 >= INT32_MAX || ptr - d->end + (int)size64 > d->limit) {
9328 decode_err(d, kUpb_DecodeStatus_Malformed);
9329 }
9330 *size = size64;
9331 return ptr;
9332 }
9333
decode_munge_int32(wireval * val)9334 static void decode_munge_int32(wireval* val) {
9335 if (!_upb_IsLittleEndian()) {
9336 /* The next stage will memcpy(dst, &val, 4) */
9337 val->uint32_val = val->uint64_val;
9338 }
9339 }
9340
decode_munge(int type,wireval * val)9341 static void decode_munge(int type, wireval* val) {
9342 switch (type) {
9343 case kUpb_FieldType_Bool:
9344 val->bool_val = val->uint64_val != 0;
9345 break;
9346 case kUpb_FieldType_SInt32: {
9347 uint32_t n = val->uint64_val;
9348 val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
9349 break;
9350 }
9351 case kUpb_FieldType_SInt64: {
9352 uint64_t n = val->uint64_val;
9353 val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
9354 break;
9355 }
9356 case kUpb_FieldType_Int32:
9357 case kUpb_FieldType_UInt32:
9358 case kUpb_FieldType_Enum:
9359 decode_munge_int32(val);
9360 break;
9361 }
9362 }
9363
decode_newsubmsg(upb_Decoder * d,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)9364 static upb_Message* decode_newsubmsg(upb_Decoder* d,
9365 const upb_MiniTable_Sub* subs,
9366 const upb_MiniTable_Field* field) {
9367 const upb_MiniTable* subl = subs[field->submsg_index].submsg;
9368 upb_Message* msg = _upb_Message_New_inl(subl, &d->arena);
9369 if (!msg) decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9370 return msg;
9371 }
9372
9373 UPB_NOINLINE
decode_isdonefallback(upb_Decoder * d,const char * ptr,int overrun)9374 const char* decode_isdonefallback(upb_Decoder* d, const char* ptr,
9375 int overrun) {
9376 int status;
9377 ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
9378 if (ptr == NULL) {
9379 return decode_err(d, status);
9380 }
9381 return ptr;
9382 }
9383
decode_readstr(upb_Decoder * d,const char * ptr,int size,upb_StringView * str)9384 static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size,
9385 upb_StringView* str) {
9386 if (d->options & kUpb_DecodeOption_AliasString) {
9387 str->data = ptr;
9388 } else {
9389 char* data = upb_Arena_Malloc(&d->arena, size);
9390 if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9391 memcpy(data, ptr, size);
9392 str->data = data;
9393 }
9394 str->size = size;
9395 return ptr + size;
9396 }
9397
9398 UPB_FORCEINLINE
decode_tosubmsg2(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,int size)9399 static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr,
9400 upb_Message* submsg,
9401 const upb_MiniTable* subl, int size) {
9402 int saved_delta = decode_pushlimit(d, ptr, size);
9403 if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
9404 ptr = decode_msg(d, ptr, submsg, subl);
9405 if (d->end_group != DECODE_NOGROUP)
9406 return decode_err(d, kUpb_DecodeStatus_Malformed);
9407 decode_poplimit(d, ptr, saved_delta);
9408 d->depth++;
9409 return ptr;
9410 }
9411
9412 UPB_FORCEINLINE
decode_tosubmsg(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,int size)9413 static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr,
9414 upb_Message* submsg,
9415 const upb_MiniTable_Sub* subs,
9416 const upb_MiniTable_Field* field, int size) {
9417 return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg,
9418 size);
9419 }
9420
9421 UPB_FORCEINLINE
decode_group(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t number)9422 static const char* decode_group(upb_Decoder* d, const char* ptr,
9423 upb_Message* submsg, const upb_MiniTable* subl,
9424 uint32_t number) {
9425 if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
9426 if (decode_isdone(d, &ptr)) {
9427 return decode_err(d, kUpb_DecodeStatus_Malformed);
9428 }
9429 ptr = decode_msg(d, ptr, submsg, subl);
9430 if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed);
9431 d->end_group = DECODE_NOGROUP;
9432 d->depth++;
9433 return ptr;
9434 }
9435
9436 UPB_FORCEINLINE
decode_togroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)9437 static const char* decode_togroup(upb_Decoder* d, const char* ptr,
9438 upb_Message* submsg,
9439 const upb_MiniTable_Sub* subs,
9440 const upb_MiniTable_Field* field) {
9441 const upb_MiniTable* subl = subs[field->submsg_index].submsg;
9442 return decode_group(d, ptr, submsg, subl, field->number);
9443 }
9444
upb_Decoder_EncodeVarint32(uint32_t val,char * ptr)9445 static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
9446 do {
9447 uint8_t byte = val & 0x7fU;
9448 val >>= 7;
9449 if (val) byte |= 0x80U;
9450 *(ptr++) = byte;
9451 } while (val);
9452 return ptr;
9453 }
9454
upb_Decode_AddUnknownVarints(upb_Decoder * d,upb_Message * msg,uint32_t val1,uint32_t val2)9455 static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
9456 uint32_t val1, uint32_t val2) {
9457 char buf[20];
9458 char* end = buf;
9459 end = upb_Decoder_EncodeVarint32(val1, end);
9460 end = upb_Decoder_EncodeVarint32(val2, end);
9461
9462 if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
9463 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9464 }
9465 }
9466
9467 UPB_NOINLINE
decode_checkenum_slow(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Enum * e,const upb_MiniTable_Field * field,uint32_t v)9468 static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
9469 upb_Message* msg, const upb_MiniTable_Enum* e,
9470 const upb_MiniTable_Field* field,
9471 uint32_t v) {
9472 // OPT: binary search long lists?
9473 int n = e->value_count;
9474 for (int i = 0; i < n; i++) {
9475 if ((uint32_t)e->values[i] == v) return true;
9476 }
9477
9478 // Unrecognized enum goes into unknown fields.
9479 // For packed fields the tag could be arbitrarily far in the past, so we
9480 // just re-encode the tag and value here.
9481 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
9482 upb_Message* unknown_msg =
9483 field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg;
9484 upb_Decode_AddUnknownVarints(d, unknown_msg, tag, v);
9485 return false;
9486 }
9487
9488 UPB_FORCEINLINE
decode_checkenum(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Enum * e,const upb_MiniTable_Field * field,wireval * val)9489 static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg,
9490 const upb_MiniTable_Enum* e,
9491 const upb_MiniTable_Field* field, wireval* val) {
9492 uint32_t v = val->uint32_val;
9493
9494 if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
9495
9496 return decode_checkenum_slow(d, ptr, msg, e, field, v);
9497 }
9498
9499 UPB_NOINLINE
decode_enum_toarray(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9500 static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr,
9501 upb_Message* msg, upb_Array* arr,
9502 const upb_MiniTable_Sub* subs,
9503 const upb_MiniTable_Field* field,
9504 wireval* val) {
9505 const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
9506 if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
9507 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9508 arr->len++;
9509 memcpy(mem, val, 4);
9510 return ptr;
9511 }
9512
9513 UPB_FORCEINLINE
decode_fixed_packed(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTable_Field * field,int lg2)9514 static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr,
9515 upb_Array* arr, wireval* val,
9516 const upb_MiniTable_Field* field,
9517 int lg2) {
9518 int mask = (1 << lg2) - 1;
9519 size_t count = val->size >> lg2;
9520 if ((val->size & mask) != 0) {
9521 // Length isn't a round multiple of elem size.
9522 return decode_err(d, kUpb_DecodeStatus_Malformed);
9523 }
9524 decode_reserve(d, arr, count);
9525 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9526 arr->len += count;
9527 // Note: if/when the decoder supports multi-buffer input, we will need to
9528 // handle buffer seams here.
9529 if (_upb_IsLittleEndian()) {
9530 memcpy(mem, ptr, val->size);
9531 ptr += val->size;
9532 } else {
9533 const char* end = ptr + val->size;
9534 char* dst = mem;
9535 while (ptr < end) {
9536 if (lg2 == 2) {
9537 uint32_t val;
9538 memcpy(&val, ptr, sizeof(val));
9539 val = _upb_BigEndian_Swap32(val);
9540 memcpy(dst, &val, sizeof(val));
9541 } else {
9542 UPB_ASSERT(lg2 == 3);
9543 uint64_t val;
9544 memcpy(&val, ptr, sizeof(val));
9545 val = _upb_BigEndian_Swap64(val);
9546 memcpy(dst, &val, sizeof(val));
9547 }
9548 ptr += 1 << lg2;
9549 dst += 1 << lg2;
9550 }
9551 }
9552
9553 return ptr;
9554 }
9555
9556 UPB_FORCEINLINE
decode_varint_packed(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTable_Field * field,int lg2)9557 static const char* decode_varint_packed(upb_Decoder* d, const char* ptr,
9558 upb_Array* arr, wireval* val,
9559 const upb_MiniTable_Field* field,
9560 int lg2) {
9561 int scale = 1 << lg2;
9562 int saved_limit = decode_pushlimit(d, ptr, val->size);
9563 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9564 while (!decode_isdone(d, &ptr)) {
9565 wireval elem;
9566 ptr = decode_varint64(d, ptr, &elem.uint64_val);
9567 decode_munge(field->descriptortype, &elem);
9568 if (decode_reserve(d, arr, 1)) {
9569 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9570 }
9571 arr->len++;
9572 memcpy(out, &elem, scale);
9573 out += scale;
9574 }
9575 decode_poplimit(d, ptr, saved_limit);
9576 return ptr;
9577 }
9578
9579 UPB_NOINLINE
decode_enum_packed(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9580 static const char* decode_enum_packed(upb_Decoder* d, const char* ptr,
9581 upb_Message* msg, upb_Array* arr,
9582 const upb_MiniTable_Sub* subs,
9583 const upb_MiniTable_Field* field,
9584 wireval* val) {
9585 const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
9586 int saved_limit = decode_pushlimit(d, ptr, val->size);
9587 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9588 while (!decode_isdone(d, &ptr)) {
9589 wireval elem;
9590 ptr = decode_varint64(d, ptr, &elem.uint64_val);
9591 decode_munge_int32(&elem);
9592 if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
9593 continue;
9594 }
9595 if (decode_reserve(d, arr, 1)) {
9596 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9597 }
9598 arr->len++;
9599 memcpy(out, &elem, 4);
9600 out += 4;
9601 }
9602 decode_poplimit(d, ptr, saved_limit);
9603 return ptr;
9604 }
9605
decode_toarray(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val,int op)9606 static const char* decode_toarray(upb_Decoder* d, const char* ptr,
9607 upb_Message* msg,
9608 const upb_MiniTable_Sub* subs,
9609 const upb_MiniTable_Field* field,
9610 wireval* val, int op) {
9611 upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
9612 upb_Array* arr = *arrp;
9613 void* mem;
9614
9615 if (arr) {
9616 decode_reserve(d, arr, 1);
9617 } else {
9618 size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
9619 arr = _upb_Array_New(&d->arena, 4, lg2);
9620 if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9621 *arrp = arr;
9622 }
9623
9624 switch (op) {
9625 case OP_SCALAR_LG2(0):
9626 case OP_SCALAR_LG2(2):
9627 case OP_SCALAR_LG2(3):
9628 /* Append scalar value. */
9629 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
9630 arr->len++;
9631 memcpy(mem, val, 1 << op);
9632 return ptr;
9633 case OP_STRING:
9634 decode_verifyutf8(d, ptr, val->size);
9635 /* Fallthrough. */
9636 case OP_BYTES: {
9637 /* Append bytes. */
9638 upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->len;
9639 arr->len++;
9640 return decode_readstr(d, ptr, val->size, str);
9641 }
9642 case OP_SUBMSG: {
9643 /* Append submessage / group. */
9644 upb_Message* submsg = decode_newsubmsg(d, subs, field);
9645 *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void*), upb_Message*) =
9646 submsg;
9647 arr->len++;
9648 if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) {
9649 return decode_togroup(d, ptr, submsg, subs, field);
9650 } else {
9651 return decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
9652 }
9653 }
9654 case OP_FIXPCK_LG2(2):
9655 case OP_FIXPCK_LG2(3):
9656 return decode_fixed_packed(d, ptr, arr, val, field,
9657 op - OP_FIXPCK_LG2(0));
9658 case OP_VARPCK_LG2(0):
9659 case OP_VARPCK_LG2(2):
9660 case OP_VARPCK_LG2(3):
9661 return decode_varint_packed(d, ptr, arr, val, field,
9662 op - OP_VARPCK_LG2(0));
9663 case OP_ENUM:
9664 return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
9665 case OP_PACKED_ENUM:
9666 return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
9667 default:
9668 UPB_UNREACHABLE();
9669 }
9670 }
9671
decode_tomap(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9672 static const char* decode_tomap(upb_Decoder* d, const char* ptr,
9673 upb_Message* msg, const upb_MiniTable_Sub* subs,
9674 const upb_MiniTable_Field* field,
9675 wireval* val) {
9676 upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
9677 upb_Map* map = *map_p;
9678 upb_MapEntry ent;
9679 const upb_MiniTable* entry = subs[field->submsg_index].submsg;
9680
9681 if (!map) {
9682 /* Lazily create map. */
9683 const upb_MiniTable_Field* key_field = &entry->fields[0];
9684 const upb_MiniTable_Field* val_field = &entry->fields[1];
9685 char key_size = desctype_to_mapsize[key_field->descriptortype];
9686 char val_size = desctype_to_mapsize[val_field->descriptortype];
9687 UPB_ASSERT(key_field->offset == 0);
9688 UPB_ASSERT(val_field->offset == sizeof(upb_StringView));
9689 map = _upb_Map_New(&d->arena, key_size, val_size);
9690 *map_p = map;
9691 }
9692
9693 /* Parse map entry. */
9694 memset(&ent, 0, sizeof(ent));
9695
9696 if (entry->fields[1].descriptortype == kUpb_FieldType_Message ||
9697 entry->fields[1].descriptortype == kUpb_FieldType_Group) {
9698 /* Create proactively to handle the case where it doesn't appear. */
9699 ent.v.val =
9700 upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena));
9701 }
9702
9703 const char* start = ptr;
9704 ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
9705 // check if ent had any unknown fields
9706 size_t size;
9707 upb_Message_GetUnknown(&ent.k, &size);
9708 if (size != 0) {
9709 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
9710 upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start));
9711 if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
9712 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9713 }
9714 } else {
9715 if (_upb_Map_Insert(map, &ent.k, map->key_size, &ent.v, map->val_size,
9716 &d->arena) == _kUpb_MapInsertStatus_OutOfMemory) {
9717 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9718 }
9719 }
9720 return ptr;
9721 }
9722
decode_tomsg(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val,int op)9723 static const char* decode_tomsg(upb_Decoder* d, const char* ptr,
9724 upb_Message* msg, const upb_MiniTable_Sub* subs,
9725 const upb_MiniTable_Field* field, wireval* val,
9726 int op) {
9727 void* mem = UPB_PTR_AT(msg, field->offset, void);
9728 int type = field->descriptortype;
9729
9730 if (UPB_UNLIKELY(op == OP_ENUM) &&
9731 !decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
9732 val)) {
9733 return ptr;
9734 }
9735
9736 /* Set presence if necessary. */
9737 if (field->presence > 0) {
9738 _upb_sethas_field(msg, field);
9739 } else if (field->presence < 0) {
9740 /* Oneof case */
9741 uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
9742 if (op == OP_SUBMSG && *oneof_case != field->number) {
9743 memset(mem, 0, sizeof(void*));
9744 }
9745 *oneof_case = field->number;
9746 }
9747
9748 /* Store into message. */
9749 switch (op) {
9750 case OP_SUBMSG: {
9751 upb_Message** submsgp = mem;
9752 upb_Message* submsg = *submsgp;
9753 if (!submsg) {
9754 submsg = decode_newsubmsg(d, subs, field);
9755 *submsgp = submsg;
9756 }
9757 if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
9758 ptr = decode_togroup(d, ptr, submsg, subs, field);
9759 } else {
9760 ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
9761 }
9762 break;
9763 }
9764 case OP_STRING:
9765 decode_verifyutf8(d, ptr, val->size);
9766 /* Fallthrough. */
9767 case OP_BYTES:
9768 return decode_readstr(d, ptr, val->size, mem);
9769 case OP_SCALAR_LG2(3):
9770 memcpy(mem, val, 8);
9771 break;
9772 case OP_ENUM:
9773 case OP_SCALAR_LG2(2):
9774 memcpy(mem, val, 4);
9775 break;
9776 case OP_SCALAR_LG2(0):
9777 memcpy(mem, val, 1);
9778 break;
9779 default:
9780 UPB_UNREACHABLE();
9781 }
9782
9783 return ptr;
9784 }
9785
9786 UPB_NOINLINE
decode_checkrequired(upb_Decoder * d,const char * ptr,const upb_Message * msg,const upb_MiniTable * l)9787 const char* decode_checkrequired(upb_Decoder* d, const char* ptr,
9788 const upb_Message* msg,
9789 const upb_MiniTable* l) {
9790 assert(l->required_count);
9791 if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
9792 return ptr;
9793 }
9794 uint64_t msg_head;
9795 memcpy(&msg_head, msg, 8);
9796 msg_head = _upb_BigEndian_Swap64(msg_head);
9797 if (upb_MiniTable_requiredmask(l) & ~msg_head) {
9798 d->missing_required = true;
9799 }
9800 return ptr;
9801 }
9802
9803 UPB_FORCEINLINE
decode_tryfastdispatch(upb_Decoder * d,const char ** ptr,upb_Message * msg,const upb_MiniTable * layout)9804 static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr,
9805 upb_Message* msg,
9806 const upb_MiniTable* layout) {
9807 #if UPB_FASTTABLE
9808 if (layout && layout->table_mask != (unsigned char)-1) {
9809 uint16_t tag = fastdecode_loadtag(*ptr);
9810 intptr_t table = decode_totable(layout);
9811 *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
9812 return true;
9813 }
9814 #endif
9815 return false;
9816 }
9817
upb_Decoder_SkipField(upb_Decoder * d,const char * ptr,uint32_t tag)9818 static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr,
9819 uint32_t tag) {
9820 int field_number = tag >> 3;
9821 int wire_type = tag & 7;
9822 switch (wire_type) {
9823 case kUpb_WireType_Varint: {
9824 uint64_t val;
9825 return decode_varint64(d, ptr, &val);
9826 }
9827 case kUpb_WireType_64Bit:
9828 return ptr + 8;
9829 case kUpb_WireType_32Bit:
9830 return ptr + 4;
9831 case kUpb_WireType_Delimited: {
9832 uint32_t size;
9833 ptr = upb_Decoder_DecodeSize(d, ptr, &size);
9834 return ptr + size;
9835 }
9836 case kUpb_WireType_StartGroup:
9837 return decode_group(d, ptr, NULL, NULL, field_number);
9838 default:
9839 decode_err(d, kUpb_DecodeStatus_Malformed);
9840 }
9841 }
9842
9843 enum {
9844 kStartItemTag = ((1 << 3) | kUpb_WireType_StartGroup),
9845 kEndItemTag = ((1 << 3) | kUpb_WireType_EndGroup),
9846 kTypeIdTag = ((2 << 3) | kUpb_WireType_Varint),
9847 kMessageTag = ((3 << 3) | kUpb_WireType_Delimited),
9848 };
9849
upb_Decoder_AddKnownMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable_Extension * item_mt,const char * data,uint32_t size)9850 static void upb_Decoder_AddKnownMessageSetItem(
9851 upb_Decoder* d, upb_Message* msg, const upb_MiniTable_Extension* item_mt,
9852 const char* data, uint32_t size) {
9853 upb_Message_Extension* ext =
9854 _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena);
9855 if (UPB_UNLIKELY(!ext)) decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9856 upb_Message* submsg = decode_newsubmsg(d, &ext->ext->sub, &ext->ext->field);
9857 upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg,
9858 d->extreg, d->options, &d->arena);
9859 memcpy(&ext->data, &submsg, sizeof(submsg));
9860 if (status != kUpb_DecodeStatus_Ok) decode_err(d, status);
9861 }
9862
upb_Decoder_AddUnknownMessageSetItem(upb_Decoder * d,upb_Message * msg,uint32_t type_id,const char * message_data,uint32_t message_size)9863 static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d,
9864 upb_Message* msg,
9865 uint32_t type_id,
9866 const char* message_data,
9867 uint32_t message_size) {
9868 char buf[60];
9869 char* ptr = buf;
9870 ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr);
9871 ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr);
9872 ptr = upb_Decoder_EncodeVarint32(type_id, ptr);
9873 ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr);
9874 ptr = upb_Decoder_EncodeVarint32(message_size, ptr);
9875 char* split = ptr;
9876
9877 ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr);
9878 char* end = ptr;
9879
9880 if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) ||
9881 !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) ||
9882 !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) {
9883 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9884 }
9885 }
9886
upb_Decoder_AddMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable * layout,uint32_t type_id,const char * data,uint32_t size)9887 static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg,
9888 const upb_MiniTable* layout,
9889 uint32_t type_id, const char* data,
9890 uint32_t size) {
9891 const upb_MiniTable_Extension* item_mt =
9892 _upb_extreg_get(d->extreg, layout, type_id);
9893 if (item_mt) {
9894 upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size);
9895 } else {
9896 upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size);
9897 }
9898 }
9899
upb_Decoder_DecodeMessageSetItem(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)9900 static const char* upb_Decoder_DecodeMessageSetItem(
9901 upb_Decoder* d, const char* ptr, upb_Message* msg,
9902 const upb_MiniTable* layout) {
9903 uint32_t type_id = 0;
9904 upb_StringView preserved = {NULL, 0};
9905 typedef enum {
9906 kUpb_HaveId = 1 << 0,
9907 kUpb_HavePayload = 1 << 1,
9908 } StateMask;
9909 StateMask state_mask = 0;
9910 while (!decode_isdone(d, &ptr)) {
9911 uint32_t tag;
9912 ptr = decode_tag(d, ptr, &tag);
9913 switch (tag) {
9914 case kEndItemTag:
9915 return ptr;
9916 case kTypeIdTag: {
9917 uint64_t tmp;
9918 ptr = decode_varint64(d, ptr, &tmp);
9919 if (state_mask & kUpb_HaveId) break; // Ignore dup.
9920 state_mask |= kUpb_HaveId;
9921 type_id = tmp;
9922 if (state_mask & kUpb_HavePayload) {
9923 upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data,
9924 preserved.size);
9925 }
9926 break;
9927 }
9928 case kMessageTag: {
9929 uint32_t size;
9930 ptr = upb_Decoder_DecodeSize(d, ptr, &size);
9931 const char* data = ptr;
9932 ptr += size;
9933 if (state_mask & kUpb_HavePayload) break; // Ignore dup.
9934 state_mask |= kUpb_HavePayload;
9935 if (state_mask & kUpb_HaveId) {
9936 upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size);
9937 } else {
9938 // Out of order, we must preserve the payload.
9939 preserved.data = data;
9940 preserved.size = size;
9941 }
9942 break;
9943 }
9944 default:
9945 // We do not preserve unexpected fields inside a message set item.
9946 ptr = upb_Decoder_SkipField(d, ptr, tag);
9947 break;
9948 }
9949 }
9950 decode_err(d, kUpb_DecodeStatus_Malformed);
9951 }
9952
decode_findfield(upb_Decoder * d,const upb_MiniTable * l,uint32_t field_number,int * last_field_index)9953 static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d,
9954 const upb_MiniTable* l,
9955 uint32_t field_number,
9956 int* last_field_index) {
9957 static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0};
9958 if (l == NULL) return &none;
9959
9960 size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
9961 if (idx < l->dense_below) {
9962 /* Fastest case: index into dense fields. */
9963 goto found;
9964 }
9965
9966 if (l->dense_below < l->field_count) {
9967 /* Linear search non-dense fields. Resume scanning from last_field_index
9968 * since fields are usually in order. */
9969 int last = *last_field_index;
9970 for (idx = last; idx < l->field_count; idx++) {
9971 if (l->fields[idx].number == field_number) {
9972 goto found;
9973 }
9974 }
9975
9976 for (idx = l->dense_below; idx < last; idx++) {
9977 if (l->fields[idx].number == field_number) {
9978 goto found;
9979 }
9980 }
9981 }
9982
9983 if (d->extreg) {
9984 switch (l->ext) {
9985 case kUpb_ExtMode_Extendable: {
9986 const upb_MiniTable_Extension* ext =
9987 _upb_extreg_get(d->extreg, l, field_number);
9988 if (ext) return &ext->field;
9989 break;
9990 }
9991 case kUpb_ExtMode_IsMessageSet:
9992 if (field_number == _UPB_MSGSET_ITEM) {
9993 static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0};
9994 return &item;
9995 }
9996 break;
9997 }
9998 }
9999
10000 return &none; /* Unknown field. */
10001
10002 found:
10003 UPB_ASSERT(l->fields[idx].number == field_number);
10004 *last_field_index = idx;
10005 return &l->fields[idx];
10006 }
10007
10008 UPB_FORCEINLINE
decode_wireval(upb_Decoder * d,const char * ptr,const upb_MiniTable_Field * field,int wire_type,wireval * val,int * op)10009 static const char* decode_wireval(upb_Decoder* d, const char* ptr,
10010 const upb_MiniTable_Field* field,
10011 int wire_type, wireval* val, int* op) {
10012 switch (wire_type) {
10013 case kUpb_WireType_Varint:
10014 ptr = decode_varint64(d, ptr, &val->uint64_val);
10015 *op = varint_ops[field->descriptortype];
10016 decode_munge(field->descriptortype, val);
10017 return ptr;
10018 case kUpb_WireType_32Bit:
10019 memcpy(&val->uint32_val, ptr, 4);
10020 val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val);
10021 *op = OP_SCALAR_LG2(2);
10022 if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) {
10023 *op = OP_UNKNOWN;
10024 }
10025 return ptr + 4;
10026 case kUpb_WireType_64Bit:
10027 memcpy(&val->uint64_val, ptr, 8);
10028 val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val);
10029 *op = OP_SCALAR_LG2(3);
10030 if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) {
10031 *op = OP_UNKNOWN;
10032 }
10033 return ptr + 8;
10034 case kUpb_WireType_Delimited: {
10035 int ndx = field->descriptortype;
10036 if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT;
10037 ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
10038 *op = delim_ops[ndx];
10039 return ptr;
10040 }
10041 case kUpb_WireType_StartGroup:
10042 val->uint32_val = field->number;
10043 if (field->descriptortype == kUpb_FieldType_Group) {
10044 *op = OP_SUBMSG;
10045 } else if (field->descriptortype == TYPE_MSGSET_ITEM) {
10046 *op = OP_MSGSET_ITEM;
10047 } else {
10048 *op = OP_UNKNOWN;
10049 }
10050 return ptr;
10051 default:
10052 break;
10053 }
10054 return decode_err(d, kUpb_DecodeStatus_Malformed);
10055 }
10056
10057 UPB_FORCEINLINE
decode_known(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout,const upb_MiniTable_Field * field,int op,wireval * val)10058 static const char* decode_known(upb_Decoder* d, const char* ptr,
10059 upb_Message* msg, const upb_MiniTable* layout,
10060 const upb_MiniTable_Field* field, int op,
10061 wireval* val) {
10062 const upb_MiniTable_Sub* subs = layout->subs;
10063 uint8_t mode = field->mode;
10064
10065 if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
10066 const upb_MiniTable_Extension* ext_layout =
10067 (const upb_MiniTable_Extension*)field;
10068 upb_Message_Extension* ext =
10069 _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena);
10070 if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
10071 d->unknown_msg = msg;
10072 msg = &ext->data;
10073 subs = &ext->ext->sub;
10074 }
10075
10076 switch (mode & kUpb_FieldMode_Mask) {
10077 case kUpb_FieldMode_Array:
10078 return decode_toarray(d, ptr, msg, subs, field, val, op);
10079 case kUpb_FieldMode_Map:
10080 return decode_tomap(d, ptr, msg, subs, field, val);
10081 case kUpb_FieldMode_Scalar:
10082 return decode_tomsg(d, ptr, msg, subs, field, val, op);
10083 default:
10084 UPB_UNREACHABLE();
10085 }
10086 }
10087
decode_reverse_skip_varint(const char * ptr,uint32_t val)10088 static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) {
10089 uint32_t seen = 0;
10090 do {
10091 ptr--;
10092 seen <<= 7;
10093 seen |= *ptr & 0x7f;
10094 } while (seen != val);
10095 return ptr;
10096 }
10097
decode_unknown(upb_Decoder * d,const char * ptr,upb_Message * msg,int field_number,int wire_type,wireval val)10098 static const char* decode_unknown(upb_Decoder* d, const char* ptr,
10099 upb_Message* msg, int field_number,
10100 int wire_type, wireval val) {
10101 if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
10102
10103 // Since unknown fields are the uncommon case, we do a little extra work here
10104 // to walk backwards through the buffer to find the field start. This frees
10105 // up a register in the fast paths (when the field is known), which leads to
10106 // significant speedups in benchmarks.
10107 const char* start = ptr;
10108
10109 if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
10110 if (msg) {
10111 switch (wire_type) {
10112 case kUpb_WireType_Varint:
10113 case kUpb_WireType_Delimited:
10114 start--;
10115 while (start[-1] & 0x80) start--;
10116 break;
10117 case kUpb_WireType_32Bit:
10118 start -= 4;
10119 break;
10120 case kUpb_WireType_64Bit:
10121 start -= 8;
10122 break;
10123 default:
10124 break;
10125 }
10126
10127 assert(start == d->debug_valstart);
10128 uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
10129 start = decode_reverse_skip_varint(start, tag);
10130 assert(start == d->debug_tagstart);
10131
10132 if (wire_type == kUpb_WireType_StartGroup) {
10133 d->unknown = start;
10134 d->unknown_msg = msg;
10135 ptr = decode_group(d, ptr, NULL, NULL, field_number);
10136 start = d->unknown;
10137 d->unknown = NULL;
10138 }
10139 if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
10140 return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
10141 }
10142 } else if (wire_type == kUpb_WireType_StartGroup) {
10143 ptr = decode_group(d, ptr, NULL, NULL, field_number);
10144 }
10145 return ptr;
10146 }
10147
10148 UPB_NOINLINE
decode_msg(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)10149 static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
10150 const upb_MiniTable* layout) {
10151 int last_field_index = 0;
10152
10153 #if UPB_FASTTABLE
10154 // The first time we want to skip fast dispatch, because we may have just been
10155 // invoked by the fast parser to handle a case that it bailed on.
10156 if (!decode_isdone(d, &ptr)) goto nofast;
10157 #endif
10158
10159 while (!decode_isdone(d, &ptr)) {
10160 uint32_t tag;
10161 const upb_MiniTable_Field* field;
10162 int field_number;
10163 int wire_type;
10164 wireval val;
10165 int op;
10166
10167 if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
10168
10169 #if UPB_FASTTABLE
10170 nofast:
10171 #endif
10172
10173 #ifndef NDEBUG
10174 d->debug_tagstart = ptr;
10175 #endif
10176
10177 UPB_ASSERT(ptr < d->limit_ptr);
10178 ptr = decode_tag(d, ptr, &tag);
10179 field_number = tag >> 3;
10180 wire_type = tag & 7;
10181
10182 #ifndef NDEBUG
10183 d->debug_valstart = ptr;
10184 #endif
10185
10186 if (wire_type == kUpb_WireType_EndGroup) {
10187 d->end_group = field_number;
10188 return ptr;
10189 }
10190
10191 field = decode_findfield(d, layout, field_number, &last_field_index);
10192 ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
10193
10194 if (op >= 0) {
10195 ptr = decode_known(d, ptr, msg, layout, field, op, &val);
10196 } else {
10197 switch (op) {
10198 case OP_UNKNOWN:
10199 ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val);
10200 break;
10201 case OP_MSGSET_ITEM:
10202 ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout);
10203 break;
10204 }
10205 }
10206 }
10207
10208 return UPB_UNLIKELY(layout && layout->required_count)
10209 ? decode_checkrequired(d, ptr, msg, layout)
10210 : ptr;
10211 }
10212
fastdecode_generic(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)10213 const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr,
10214 upb_Message* msg, intptr_t table,
10215 uint64_t hasbits, uint64_t data) {
10216 (void)data;
10217 *(uint32_t*)msg |= hasbits;
10218 return decode_msg(d, ptr, msg, decode_totablep(table));
10219 }
10220
decode_top(struct upb_Decoder * d,const char * buf,void * msg,const upb_MiniTable * l)10221 static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf,
10222 void* msg, const upb_MiniTable* l) {
10223 if (!decode_tryfastdispatch(d, &buf, msg, l)) {
10224 decode_msg(d, buf, msg, l);
10225 }
10226 if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
10227 if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
10228 return kUpb_DecodeStatus_Ok;
10229 }
10230
upb_Decode(const char * buf,size_t size,void * msg,const upb_MiniTable * l,const upb_ExtensionRegistry * extreg,int options,upb_Arena * arena)10231 upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
10232 const upb_MiniTable* l,
10233 const upb_ExtensionRegistry* extreg, int options,
10234 upb_Arena* arena) {
10235 upb_Decoder state;
10236 unsigned depth = (unsigned)options >> 16;
10237
10238 if (size <= 16) {
10239 memset(&state.patch, 0, 32);
10240 if (size) memcpy(&state.patch, buf, size);
10241 buf = state.patch;
10242 state.end = buf + size;
10243 state.limit = 0;
10244 options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
10245 } else {
10246 state.end = buf + size - 16;
10247 state.limit = 16;
10248 }
10249
10250 state.extreg = extreg;
10251 state.limit_ptr = state.end;
10252 state.unknown = NULL;
10253 state.depth = depth ? depth : 64;
10254 state.end_group = DECODE_NOGROUP;
10255 state.options = (uint16_t)options;
10256 state.missing_required = false;
10257 state.arena.head = arena->head;
10258 state.arena.last_size = arena->last_size;
10259 state.arena.cleanup_metadata = arena->cleanup_metadata;
10260 state.arena.parent = arena;
10261
10262 upb_DecodeStatus status = UPB_SETJMP(state.err);
10263 if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) {
10264 status = decode_top(&state, buf, msg, l);
10265 }
10266
10267 arena->head.ptr = state.arena.head.ptr;
10268 arena->head.end = state.arena.head.end;
10269 arena->cleanup_metadata = state.arena.cleanup_metadata;
10270 return status;
10271 }
10272
10273 #undef OP_UNKNOWN
10274 #undef OP_SKIP
10275 #undef OP_SCALAR_LG2
10276 #undef OP_FIXPCK_LG2
10277 #undef OP_VARPCK_LG2
10278 #undef OP_STRING
10279 #undef OP_BYTES
10280 #undef OP_SUBMSG
10281
10282 /** upb/encode.c ************************************************************/
10283 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
10284
10285
10286 #include <setjmp.h>
10287 #include <string.h>
10288
10289
10290 /* Must be last. */
10291
10292 #define UPB_PB_VARINT_MAX_LEN 10
10293
10294 UPB_NOINLINE
encode_varint64(uint64_t val,char * buf)10295 static size_t encode_varint64(uint64_t val, char* buf) {
10296 size_t i = 0;
10297 do {
10298 uint8_t byte = val & 0x7fU;
10299 val >>= 7;
10300 if (val) byte |= 0x80U;
10301 buf[i++] = byte;
10302 } while (val);
10303 return i;
10304 }
10305
encode_zz32(int32_t n)10306 static uint32_t encode_zz32(int32_t n) {
10307 return ((uint32_t)n << 1) ^ (n >> 31);
10308 }
encode_zz64(int64_t n)10309 static uint64_t encode_zz64(int64_t n) {
10310 return ((uint64_t)n << 1) ^ (n >> 63);
10311 }
10312
10313 typedef struct {
10314 jmp_buf err;
10315 upb_alloc* alloc;
10316 char *buf, *ptr, *limit;
10317 int options;
10318 int depth;
10319 _upb_mapsorter sorter;
10320 } upb_encstate;
10321
upb_roundup_pow2(size_t bytes)10322 static size_t upb_roundup_pow2(size_t bytes) {
10323 size_t ret = 128;
10324 while (ret < bytes) {
10325 ret *= 2;
10326 }
10327 return ret;
10328 }
10329
encode_err(upb_encstate * e)10330 UPB_NORETURN static void encode_err(upb_encstate* e) { UPB_LONGJMP(e->err, 1); }
10331
10332 UPB_NOINLINE
encode_growbuffer(upb_encstate * e,size_t bytes)10333 static void encode_growbuffer(upb_encstate* e, size_t bytes) {
10334 size_t old_size = e->limit - e->buf;
10335 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
10336 char* new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
10337
10338 if (!new_buf) encode_err(e);
10339
10340 /* We want previous data at the end, realloc() put it at the beginning. */
10341 if (old_size > 0) {
10342 memmove(new_buf + new_size - old_size, e->buf, old_size);
10343 }
10344
10345 e->ptr = new_buf + new_size - (e->limit - e->ptr);
10346 e->limit = new_buf + new_size;
10347 e->buf = new_buf;
10348
10349 e->ptr -= bytes;
10350 }
10351
10352 /* Call to ensure that at least "bytes" bytes are available for writing at
10353 * e->ptr. Returns false if the bytes could not be allocated. */
10354 UPB_FORCEINLINE
encode_reserve(upb_encstate * e,size_t bytes)10355 static void encode_reserve(upb_encstate* e, size_t bytes) {
10356 if ((size_t)(e->ptr - e->buf) < bytes) {
10357 encode_growbuffer(e, bytes);
10358 return;
10359 }
10360
10361 e->ptr -= bytes;
10362 }
10363
10364 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_encstate * e,const void * data,size_t len)10365 static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
10366 if (len == 0) return; /* memcpy() with zero size is UB */
10367 encode_reserve(e, len);
10368 memcpy(e->ptr, data, len);
10369 }
10370
encode_fixed64(upb_encstate * e,uint64_t val)10371 static void encode_fixed64(upb_encstate* e, uint64_t val) {
10372 val = _upb_BigEndian_Swap64(val);
10373 encode_bytes(e, &val, sizeof(uint64_t));
10374 }
10375
encode_fixed32(upb_encstate * e,uint32_t val)10376 static void encode_fixed32(upb_encstate* e, uint32_t val) {
10377 val = _upb_BigEndian_Swap32(val);
10378 encode_bytes(e, &val, sizeof(uint32_t));
10379 }
10380
10381 UPB_NOINLINE
encode_longvarint(upb_encstate * e,uint64_t val)10382 static void encode_longvarint(upb_encstate* e, uint64_t val) {
10383 size_t len;
10384 char* start;
10385
10386 encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
10387 len = encode_varint64(val, e->ptr);
10388 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
10389 memmove(start, e->ptr, len);
10390 e->ptr = start;
10391 }
10392
10393 UPB_FORCEINLINE
encode_varint(upb_encstate * e,uint64_t val)10394 static void encode_varint(upb_encstate* e, uint64_t val) {
10395 if (val < 128 && e->ptr != e->buf) {
10396 --e->ptr;
10397 *e->ptr = val;
10398 } else {
10399 encode_longvarint(e, val);
10400 }
10401 }
10402
encode_double(upb_encstate * e,double d)10403 static void encode_double(upb_encstate* e, double d) {
10404 uint64_t u64;
10405 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
10406 memcpy(&u64, &d, sizeof(uint64_t));
10407 encode_fixed64(e, u64);
10408 }
10409
encode_float(upb_encstate * e,float d)10410 static void encode_float(upb_encstate* e, float d) {
10411 uint32_t u32;
10412 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
10413 memcpy(&u32, &d, sizeof(uint32_t));
10414 encode_fixed32(e, u32);
10415 }
10416
encode_tag(upb_encstate * e,uint32_t field_number,uint8_t wire_type)10417 static void encode_tag(upb_encstate* e, uint32_t field_number,
10418 uint8_t wire_type) {
10419 encode_varint(e, (field_number << 3) | wire_type);
10420 }
10421
encode_fixedarray(upb_encstate * e,const upb_Array * arr,size_t elem_size,uint32_t tag)10422 static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
10423 size_t elem_size, uint32_t tag) {
10424 size_t bytes = arr->len * elem_size;
10425 const char* data = _upb_array_constptr(arr);
10426 const char* ptr = data + bytes - elem_size;
10427
10428 if (tag || !_upb_IsLittleEndian()) {
10429 while (true) {
10430 if (elem_size == 4) {
10431 uint32_t val;
10432 memcpy(&val, ptr, sizeof(val));
10433 val = _upb_BigEndian_Swap32(val);
10434 encode_bytes(e, &val, elem_size);
10435 } else {
10436 UPB_ASSERT(elem_size == 8);
10437 uint64_t val;
10438 memcpy(&val, ptr, sizeof(val));
10439 val = _upb_BigEndian_Swap64(val);
10440 encode_bytes(e, &val, elem_size);
10441 }
10442
10443 if (tag) encode_varint(e, tag);
10444 if (ptr == data) break;
10445 ptr -= elem_size;
10446 }
10447 } else {
10448 encode_bytes(e, data, bytes);
10449 }
10450 }
10451
10452 static void encode_message(upb_encstate* e, const upb_Message* msg,
10453 const upb_MiniTable* m, size_t* size);
10454
encode_scalar(upb_encstate * e,const void * _field_mem,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10455 static void encode_scalar(upb_encstate* e, const void* _field_mem,
10456 const upb_MiniTable_Sub* subs,
10457 const upb_MiniTable_Field* f) {
10458 const char* field_mem = _field_mem;
10459 int wire_type;
10460
10461 #define CASE(ctype, type, wtype, encodeval) \
10462 { \
10463 ctype val = *(ctype*)field_mem; \
10464 encode_##type(e, encodeval); \
10465 wire_type = wtype; \
10466 break; \
10467 }
10468
10469 switch (f->descriptortype) {
10470 case kUpb_FieldType_Double:
10471 CASE(double, double, kUpb_WireType_64Bit, val);
10472 case kUpb_FieldType_Float:
10473 CASE(float, float, kUpb_WireType_32Bit, val);
10474 case kUpb_FieldType_Int64:
10475 case kUpb_FieldType_UInt64:
10476 CASE(uint64_t, varint, kUpb_WireType_Varint, val);
10477 case kUpb_FieldType_UInt32:
10478 CASE(uint32_t, varint, kUpb_WireType_Varint, val);
10479 case kUpb_FieldType_Int32:
10480 case kUpb_FieldType_Enum:
10481 CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
10482 case kUpb_FieldType_SFixed64:
10483 case kUpb_FieldType_Fixed64:
10484 CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
10485 case kUpb_FieldType_Fixed32:
10486 case kUpb_FieldType_SFixed32:
10487 CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
10488 case kUpb_FieldType_Bool:
10489 CASE(bool, varint, kUpb_WireType_Varint, val);
10490 case kUpb_FieldType_SInt32:
10491 CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
10492 case kUpb_FieldType_SInt64:
10493 CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
10494 case kUpb_FieldType_String:
10495 case kUpb_FieldType_Bytes: {
10496 upb_StringView view = *(upb_StringView*)field_mem;
10497 encode_bytes(e, view.data, view.size);
10498 encode_varint(e, view.size);
10499 wire_type = kUpb_WireType_Delimited;
10500 break;
10501 }
10502 case kUpb_FieldType_Group: {
10503 size_t size;
10504 void* submsg = *(void**)field_mem;
10505 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10506 if (submsg == NULL) {
10507 return;
10508 }
10509 if (--e->depth == 0) encode_err(e);
10510 encode_tag(e, f->number, kUpb_WireType_EndGroup);
10511 encode_message(e, submsg, subm, &size);
10512 wire_type = kUpb_WireType_StartGroup;
10513 e->depth++;
10514 break;
10515 }
10516 case kUpb_FieldType_Message: {
10517 size_t size;
10518 void* submsg = *(void**)field_mem;
10519 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10520 if (submsg == NULL) {
10521 return;
10522 }
10523 if (--e->depth == 0) encode_err(e);
10524 encode_message(e, submsg, subm, &size);
10525 encode_varint(e, size);
10526 wire_type = kUpb_WireType_Delimited;
10527 e->depth++;
10528 break;
10529 }
10530 default:
10531 UPB_UNREACHABLE();
10532 }
10533 #undef CASE
10534
10535 encode_tag(e, f->number, wire_type);
10536 }
10537
encode_array(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10538 static void encode_array(upb_encstate* e, const upb_Message* msg,
10539 const upb_MiniTable_Sub* subs,
10540 const upb_MiniTable_Field* f) {
10541 const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
10542 bool packed = f->mode & kUpb_LabelFlags_IsPacked;
10543 size_t pre_len = e->limit - e->ptr;
10544
10545 if (arr == NULL || arr->len == 0) {
10546 return;
10547 }
10548
10549 #define VARINT_CASE(ctype, encode) \
10550 { \
10551 const ctype* start = _upb_array_constptr(arr); \
10552 const ctype* ptr = start + arr->len; \
10553 uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
10554 do { \
10555 ptr--; \
10556 encode_varint(e, encode); \
10557 if (tag) encode_varint(e, tag); \
10558 } while (ptr != start); \
10559 } \
10560 break;
10561
10562 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
10563
10564 switch (f->descriptortype) {
10565 case kUpb_FieldType_Double:
10566 encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
10567 break;
10568 case kUpb_FieldType_Float:
10569 encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
10570 break;
10571 case kUpb_FieldType_SFixed64:
10572 case kUpb_FieldType_Fixed64:
10573 encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
10574 break;
10575 case kUpb_FieldType_Fixed32:
10576 case kUpb_FieldType_SFixed32:
10577 encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
10578 break;
10579 case kUpb_FieldType_Int64:
10580 case kUpb_FieldType_UInt64:
10581 VARINT_CASE(uint64_t, *ptr);
10582 case kUpb_FieldType_UInt32:
10583 VARINT_CASE(uint32_t, *ptr);
10584 case kUpb_FieldType_Int32:
10585 case kUpb_FieldType_Enum:
10586 VARINT_CASE(int32_t, (int64_t)*ptr);
10587 case kUpb_FieldType_Bool:
10588 VARINT_CASE(bool, *ptr);
10589 case kUpb_FieldType_SInt32:
10590 VARINT_CASE(int32_t, encode_zz32(*ptr));
10591 case kUpb_FieldType_SInt64:
10592 VARINT_CASE(int64_t, encode_zz64(*ptr));
10593 case kUpb_FieldType_String:
10594 case kUpb_FieldType_Bytes: {
10595 const upb_StringView* start = _upb_array_constptr(arr);
10596 const upb_StringView* ptr = start + arr->len;
10597 do {
10598 ptr--;
10599 encode_bytes(e, ptr->data, ptr->size);
10600 encode_varint(e, ptr->size);
10601 encode_tag(e, f->number, kUpb_WireType_Delimited);
10602 } while (ptr != start);
10603 return;
10604 }
10605 case kUpb_FieldType_Group: {
10606 const void* const* start = _upb_array_constptr(arr);
10607 const void* const* ptr = start + arr->len;
10608 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10609 if (--e->depth == 0) encode_err(e);
10610 do {
10611 size_t size;
10612 ptr--;
10613 encode_tag(e, f->number, kUpb_WireType_EndGroup);
10614 encode_message(e, *ptr, subm, &size);
10615 encode_tag(e, f->number, kUpb_WireType_StartGroup);
10616 } while (ptr != start);
10617 e->depth++;
10618 return;
10619 }
10620 case kUpb_FieldType_Message: {
10621 const void* const* start = _upb_array_constptr(arr);
10622 const void* const* ptr = start + arr->len;
10623 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10624 if (--e->depth == 0) encode_err(e);
10625 do {
10626 size_t size;
10627 ptr--;
10628 encode_message(e, *ptr, subm, &size);
10629 encode_varint(e, size);
10630 encode_tag(e, f->number, kUpb_WireType_Delimited);
10631 } while (ptr != start);
10632 e->depth++;
10633 return;
10634 }
10635 }
10636 #undef VARINT_CASE
10637
10638 if (packed) {
10639 encode_varint(e, e->limit - e->ptr - pre_len);
10640 encode_tag(e, f->number, kUpb_WireType_Delimited);
10641 }
10642 }
10643
encode_mapentry(upb_encstate * e,uint32_t number,const upb_MiniTable * layout,const upb_MapEntry * ent)10644 static void encode_mapentry(upb_encstate* e, uint32_t number,
10645 const upb_MiniTable* layout,
10646 const upb_MapEntry* ent) {
10647 const upb_MiniTable_Field* key_field = &layout->fields[0];
10648 const upb_MiniTable_Field* val_field = &layout->fields[1];
10649 size_t pre_len = e->limit - e->ptr;
10650 size_t size;
10651 encode_scalar(e, &ent->v, layout->subs, val_field);
10652 encode_scalar(e, &ent->k, layout->subs, key_field);
10653 size = (e->limit - e->ptr) - pre_len;
10654 encode_varint(e, size);
10655 encode_tag(e, number, kUpb_WireType_Delimited);
10656 }
10657
encode_map(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10658 static void encode_map(upb_encstate* e, const upb_Message* msg,
10659 const upb_MiniTable_Sub* subs,
10660 const upb_MiniTable_Field* f) {
10661 const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
10662 const upb_MiniTable* layout = subs[f->submsg_index].submsg;
10663 UPB_ASSERT(layout->field_count == 2);
10664
10665 if (map == NULL) return;
10666
10667 if (e->options & kUpb_Encode_Deterministic) {
10668 _upb_sortedmap sorted;
10669 _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
10670 &sorted);
10671 upb_MapEntry ent;
10672 while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
10673 encode_mapentry(e, f->number, layout, &ent);
10674 }
10675 _upb_mapsorter_popmap(&e->sorter, &sorted);
10676 } else {
10677 upb_strtable_iter i;
10678 upb_strtable_begin(&i, &map->table);
10679 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
10680 upb_StringView key = upb_strtable_iter_key(&i);
10681 const upb_value val = upb_strtable_iter_value(&i);
10682 upb_MapEntry ent;
10683 _upb_map_fromkey(key, &ent.k, map->key_size);
10684 _upb_map_fromvalue(val, &ent.v, map->val_size);
10685 encode_mapentry(e, f->number, layout, &ent);
10686 }
10687 }
10688 }
10689
encode_shouldencode(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10690 static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
10691 const upb_MiniTable_Sub* subs,
10692 const upb_MiniTable_Field* f) {
10693 if (f->presence == 0) {
10694 /* Proto3 presence or map/array. */
10695 const void* mem = UPB_PTR_AT(msg, f->offset, void);
10696 switch (f->mode >> kUpb_FieldRep_Shift) {
10697 case kUpb_FieldRep_1Byte: {
10698 char ch;
10699 memcpy(&ch, mem, 1);
10700 return ch != 0;
10701 }
10702 #if UINTPTR_MAX == 0xffffffff
10703 case kUpb_FieldRep_Pointer:
10704 #endif
10705 case kUpb_FieldRep_4Byte: {
10706 uint32_t u32;
10707 memcpy(&u32, mem, 4);
10708 return u32 != 0;
10709 }
10710 #if UINTPTR_MAX != 0xffffffff
10711 case kUpb_FieldRep_Pointer:
10712 #endif
10713 case kUpb_FieldRep_8Byte: {
10714 uint64_t u64;
10715 memcpy(&u64, mem, 8);
10716 return u64 != 0;
10717 }
10718 case kUpb_FieldRep_StringView: {
10719 const upb_StringView* str = (const upb_StringView*)mem;
10720 return str->size != 0;
10721 }
10722 default:
10723 UPB_UNREACHABLE();
10724 }
10725 } else if (f->presence > 0) {
10726 /* Proto2 presence: hasbit. */
10727 return _upb_hasbit_field(msg, f);
10728 } else {
10729 /* Field is in a oneof. */
10730 return _upb_getoneofcase_field(msg, f) == f->number;
10731 }
10732 }
10733
encode_field(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)10734 static void encode_field(upb_encstate* e, const upb_Message* msg,
10735 const upb_MiniTable_Sub* subs,
10736 const upb_MiniTable_Field* field) {
10737 switch (upb_FieldMode_Get(field)) {
10738 case kUpb_FieldMode_Array:
10739 encode_array(e, msg, subs, field);
10740 break;
10741 case kUpb_FieldMode_Map:
10742 encode_map(e, msg, subs, field);
10743 break;
10744 case kUpb_FieldMode_Scalar:
10745 encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
10746 break;
10747 default:
10748 UPB_UNREACHABLE();
10749 }
10750 }
10751
10752 /* message MessageSet {
10753 * repeated group Item = 1 {
10754 * required int32 type_id = 2;
10755 * required string message = 3;
10756 * }
10757 * } */
encode_msgset_item(upb_encstate * e,const upb_Message_Extension * ext)10758 static void encode_msgset_item(upb_encstate* e,
10759 const upb_Message_Extension* ext) {
10760 size_t size;
10761 encode_tag(e, 1, kUpb_WireType_EndGroup);
10762 encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size);
10763 encode_varint(e, size);
10764 encode_tag(e, 3, kUpb_WireType_Delimited);
10765 encode_varint(e, ext->ext->field.number);
10766 encode_tag(e, 2, kUpb_WireType_Varint);
10767 encode_tag(e, 1, kUpb_WireType_StartGroup);
10768 }
10769
encode_message(upb_encstate * e,const upb_Message * msg,const upb_MiniTable * m,size_t * size)10770 static void encode_message(upb_encstate* e, const upb_Message* msg,
10771 const upb_MiniTable* m, size_t* size) {
10772 size_t pre_len = e->limit - e->ptr;
10773
10774 if ((e->options & kUpb_Encode_CheckRequired) && m->required_count) {
10775 uint64_t msg_head;
10776 memcpy(&msg_head, msg, 8);
10777 msg_head = _upb_BigEndian_Swap64(msg_head);
10778 if (upb_MiniTable_requiredmask(m) & ~msg_head) {
10779 encode_err(e);
10780 }
10781 }
10782
10783 if ((e->options & kUpb_Encode_SkipUnknown) == 0) {
10784 size_t unknown_size;
10785 const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
10786
10787 if (unknown) {
10788 encode_bytes(e, unknown, unknown_size);
10789 }
10790 }
10791
10792 if (m->ext != kUpb_ExtMode_NonExtendable) {
10793 /* Encode all extensions together. Unlike C++, we do not attempt to keep
10794 * these in field number order relative to normal fields or even to each
10795 * other. */
10796 size_t ext_count;
10797 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
10798 if (ext_count) {
10799 const upb_Message_Extension* end = ext + ext_count;
10800 for (; ext != end; ext++) {
10801 if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) {
10802 encode_msgset_item(e, ext);
10803 } else {
10804 encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field);
10805 }
10806 }
10807 }
10808 }
10809
10810 if (m->field_count) {
10811 const upb_MiniTable_Field* f = &m->fields[m->field_count];
10812 const upb_MiniTable_Field* first = &m->fields[0];
10813 while (f != first) {
10814 f--;
10815 if (encode_shouldencode(e, msg, m->subs, f)) {
10816 encode_field(e, msg, m->subs, f);
10817 }
10818 }
10819 }
10820
10821 *size = (e->limit - e->ptr) - pre_len;
10822 }
10823
upb_Encode(const void * msg,const upb_MiniTable * l,int options,upb_Arena * arena,size_t * size)10824 char* upb_Encode(const void* msg, const upb_MiniTable* l, int options,
10825 upb_Arena* arena, size_t* size) {
10826 upb_encstate e;
10827 unsigned depth = (unsigned)options >> 16;
10828
10829 e.alloc = upb_Arena_Alloc(arena);
10830 e.buf = NULL;
10831 e.limit = NULL;
10832 e.ptr = NULL;
10833 e.depth = depth ? depth : 64;
10834 e.options = options;
10835 _upb_mapsorter_init(&e.sorter);
10836 char* ret = NULL;
10837
10838 if (UPB_SETJMP(e.err)) {
10839 *size = 0;
10840 ret = NULL;
10841 } else {
10842 encode_message(&e, msg, l, size);
10843 *size = e.limit - e.ptr;
10844 if (*size == 0) {
10845 static char ch;
10846 ret = &ch;
10847 } else {
10848 UPB_ASSERT(e.ptr);
10849 ret = e.ptr;
10850 }
10851 }
10852
10853 _upb_mapsorter_destroy(&e.sorter);
10854 return ret;
10855 }
10856
10857 /** upb/msg.c ************************************************************/
10858
10859
10860 /** upb_Message ***************************************************************/
10861
10862 static const size_t overhead = sizeof(upb_Message_InternalData);
10863
upb_Message_Getinternal_const(const upb_Message * msg)10864 static const upb_Message_Internal* upb_Message_Getinternal_const(
10865 const upb_Message* msg) {
10866 ptrdiff_t size = sizeof(upb_Message_Internal);
10867 return (upb_Message_Internal*)((char*)msg - size);
10868 }
10869
_upb_Message_New(const upb_MiniTable * l,upb_Arena * a)10870 upb_Message* _upb_Message_New(const upb_MiniTable* l, upb_Arena* a) {
10871 return _upb_Message_New_inl(l, a);
10872 }
10873
_upb_Message_Clear(upb_Message * msg,const upb_MiniTable * l)10874 void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) {
10875 void* mem = UPB_PTR_AT(msg, -sizeof(upb_Message_Internal), char);
10876 memset(mem, 0, upb_msg_sizeof(l));
10877 }
10878
realloc_internal(upb_Message * msg,size_t need,upb_Arena * arena)10879 static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
10880 upb_Message_Internal* in = upb_Message_Getinternal(msg);
10881 if (!in->internal) {
10882 /* No internal data, allocate from scratch. */
10883 size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead));
10884 upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
10885 if (!internal) return false;
10886 internal->size = size;
10887 internal->unknown_end = overhead;
10888 internal->ext_begin = size;
10889 in->internal = internal;
10890 } else if (in->internal->ext_begin - in->internal->unknown_end < need) {
10891 /* Internal data is too small, reallocate. */
10892 size_t new_size = _upb_Log2CeilingSize(in->internal->size + need);
10893 size_t ext_bytes = in->internal->size - in->internal->ext_begin;
10894 size_t new_ext_begin = new_size - ext_bytes;
10895 upb_Message_InternalData* internal =
10896 upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size);
10897 if (!internal) return false;
10898 if (ext_bytes) {
10899 /* Need to move extension data to the end. */
10900 char* ptr = (char*)internal;
10901 memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
10902 }
10903 internal->ext_begin = new_ext_begin;
10904 internal->size = new_size;
10905 in->internal = internal;
10906 }
10907 UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
10908 return true;
10909 }
10910
_upb_Message_AddUnknown(upb_Message * msg,const char * data,size_t len,upb_Arena * arena)10911 bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
10912 upb_Arena* arena) {
10913 if (!realloc_internal(msg, len, arena)) return false;
10914 upb_Message_Internal* in = upb_Message_Getinternal(msg);
10915 memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
10916 in->internal->unknown_end += len;
10917 return true;
10918 }
10919
_upb_Message_DiscardUnknown_shallow(upb_Message * msg)10920 void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) {
10921 upb_Message_Internal* in = upb_Message_Getinternal(msg);
10922 if (in->internal) {
10923 in->internal->unknown_end = overhead;
10924 }
10925 }
10926
upb_Message_GetUnknown(const upb_Message * msg,size_t * len)10927 const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) {
10928 const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
10929 if (in->internal) {
10930 *len = in->internal->unknown_end - overhead;
10931 return (char*)(in->internal + 1);
10932 } else {
10933 *len = 0;
10934 return NULL;
10935 }
10936 }
10937
_upb_Message_Getexts(const upb_Message * msg,size_t * count)10938 const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
10939 size_t* count) {
10940 const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
10941 if (in->internal) {
10942 *count = (in->internal->size - in->internal->ext_begin) /
10943 sizeof(upb_Message_Extension);
10944 return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10945 } else {
10946 *count = 0;
10947 return NULL;
10948 }
10949 }
10950
_upb_Message_Getext(const upb_Message * msg,const upb_MiniTable_Extension * e)10951 const upb_Message_Extension* _upb_Message_Getext(
10952 const upb_Message* msg, const upb_MiniTable_Extension* e) {
10953 size_t n;
10954 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n);
10955
10956 /* For now we use linear search exclusively to find extensions. If this
10957 * becomes an issue due to messages with lots of extensions, we can introduce
10958 * a table of some sort. */
10959 for (size_t i = 0; i < n; i++) {
10960 if (ext[i].ext == e) {
10961 return &ext[i];
10962 }
10963 }
10964
10965 return NULL;
10966 }
10967
_upb_Message_Clearext(upb_Message * msg,const upb_MiniTable_Extension * ext_l)10968 void _upb_Message_Clearext(upb_Message* msg,
10969 const upb_MiniTable_Extension* ext_l) {
10970 upb_Message_Internal* in = upb_Message_Getinternal(msg);
10971 if (!in->internal) return;
10972 const upb_Message_Extension* base =
10973 UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10974 upb_Message_Extension* ext =
10975 (upb_Message_Extension*)_upb_Message_Getext(msg, ext_l);
10976 if (ext) {
10977 *ext = *base;
10978 in->internal->ext_begin += sizeof(upb_Message_Extension);
10979 }
10980 }
10981
_upb_Message_GetOrCreateExtension(upb_Message * msg,const upb_MiniTable_Extension * e,upb_Arena * arena)10982 upb_Message_Extension* _upb_Message_GetOrCreateExtension(
10983 upb_Message* msg, const upb_MiniTable_Extension* e, upb_Arena* arena) {
10984 upb_Message_Extension* ext =
10985 (upb_Message_Extension*)_upb_Message_Getext(msg, e);
10986 if (ext) return ext;
10987 if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL;
10988 upb_Message_Internal* in = upb_Message_Getinternal(msg);
10989 in->internal->ext_begin -= sizeof(upb_Message_Extension);
10990 ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10991 memset(ext, 0, sizeof(upb_Message_Extension));
10992 ext->ext = e;
10993 return ext;
10994 }
10995
upb_Message_ExtensionCount(const upb_Message * msg)10996 size_t upb_Message_ExtensionCount(const upb_Message* msg) {
10997 size_t count;
10998 _upb_Message_Getexts(msg, &count);
10999 return count;
11000 }
11001
11002 /** upb_Array *****************************************************************/
11003
_upb_array_realloc(upb_Array * arr,size_t min_size,upb_Arena * arena)11004 bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena) {
11005 size_t new_size = UPB_MAX(arr->size, 4);
11006 int elem_size_lg2 = arr->data & 7;
11007 size_t old_bytes = arr->size << elem_size_lg2;
11008 size_t new_bytes;
11009 void* ptr = _upb_array_ptr(arr);
11010
11011 /* Log2 ceiling of size. */
11012 while (new_size < min_size) new_size *= 2;
11013
11014 new_bytes = new_size << elem_size_lg2;
11015 ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes);
11016
11017 if (!ptr) {
11018 return false;
11019 }
11020
11021 arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
11022 arr->size = new_size;
11023 return true;
11024 }
11025
getorcreate_array(upb_Array ** arr_ptr,int elem_size_lg2,upb_Arena * arena)11026 static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2,
11027 upb_Arena* arena) {
11028 upb_Array* arr = *arr_ptr;
11029 if (!arr) {
11030 arr = _upb_Array_New(arena, 4, elem_size_lg2);
11031 if (!arr) return NULL;
11032 *arr_ptr = arr;
11033 }
11034 return arr;
11035 }
11036
_upb_Array_Resize_fallback(upb_Array ** arr_ptr,size_t size,int elem_size_lg2,upb_Arena * arena)11037 void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size,
11038 int elem_size_lg2, upb_Arena* arena) {
11039 upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
11040 return arr && _upb_Array_Resize(arr, size, arena) ? _upb_array_ptr(arr)
11041 : NULL;
11042 }
11043
_upb_Array_Append_fallback(upb_Array ** arr_ptr,const void * value,int elem_size_lg2,upb_Arena * arena)11044 bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value,
11045 int elem_size_lg2, upb_Arena* arena) {
11046 upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
11047 if (!arr) return false;
11048
11049 size_t elems = arr->len;
11050
11051 if (!_upb_Array_Resize(arr, elems + 1, arena)) {
11052 return false;
11053 }
11054
11055 char* data = _upb_array_ptr(arr);
11056 memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
11057 return true;
11058 }
11059
11060 /** upb_Map *******************************************************************/
11061
_upb_Map_New(upb_Arena * a,size_t key_size,size_t value_size)11062 upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) {
11063 upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map));
11064
11065 if (!map) {
11066 return NULL;
11067 }
11068
11069 upb_strtable_init(&map->table, 4, a);
11070 map->key_size = key_size;
11071 map->val_size = value_size;
11072
11073 return map;
11074 }
11075
_upb_mapsorter_getkeys(const void * _a,const void * _b,void * a_key,void * b_key,size_t size)11076 static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key,
11077 void* b_key, size_t size) {
11078 const upb_tabent* const* a = _a;
11079 const upb_tabent* const* b = _b;
11080 upb_StringView a_tabkey = upb_tabstrview((*a)->key);
11081 upb_StringView b_tabkey = upb_tabstrview((*b)->key);
11082 _upb_map_fromkey(a_tabkey, a_key, size);
11083 _upb_map_fromkey(b_tabkey, b_key, size);
11084 }
11085
11086 #define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1))
11087
_upb_mapsorter_cmpi64(const void * _a,const void * _b)11088 static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) {
11089 int64_t a, b;
11090 _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
11091 return UPB_COMPARE_INTEGERS(a, b);
11092 }
11093
_upb_mapsorter_cmpu64(const void * _a,const void * _b)11094 static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) {
11095 uint64_t a, b;
11096 _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
11097 return UPB_COMPARE_INTEGERS(a, b);
11098 }
11099
_upb_mapsorter_cmpi32(const void * _a,const void * _b)11100 static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) {
11101 int32_t a, b;
11102 _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
11103 return UPB_COMPARE_INTEGERS(a, b);
11104 }
11105
_upb_mapsorter_cmpu32(const void * _a,const void * _b)11106 static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) {
11107 uint32_t a, b;
11108 _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
11109 return UPB_COMPARE_INTEGERS(a, b);
11110 }
11111
_upb_mapsorter_cmpbool(const void * _a,const void * _b)11112 static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) {
11113 bool a, b;
11114 _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
11115 return UPB_COMPARE_INTEGERS(a, b);
11116 }
11117
_upb_mapsorter_cmpstr(const void * _a,const void * _b)11118 static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) {
11119 upb_StringView a, b;
11120 _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
11121 size_t common_size = UPB_MIN(a.size, b.size);
11122 int cmp = memcmp(a.data, b.data, common_size);
11123 if (cmp) return -cmp;
11124 return UPB_COMPARE_INTEGERS(a.size, b.size);
11125 }
11126
11127 #undef UPB_COMPARE_INTEGERS
11128
_upb_mapsorter_pushmap(_upb_mapsorter * s,upb_FieldType key_type,const upb_Map * map,_upb_sortedmap * sorted)11129 bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
11130 const upb_Map* map, _upb_sortedmap* sorted) {
11131 int map_size = _upb_Map_Size(map);
11132 sorted->start = s->size;
11133 sorted->pos = sorted->start;
11134 sorted->end = sorted->start + map_size;
11135
11136 /* Grow s->entries if necessary. */
11137 if (sorted->end > s->cap) {
11138 s->cap = _upb_Log2CeilingSize(sorted->end);
11139 s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
11140 if (!s->entries) return false;
11141 }
11142
11143 s->size = sorted->end;
11144
11145 /* Copy non-empty entries from the table to s->entries. */
11146 upb_tabent const** dst = &s->entries[sorted->start];
11147 const upb_tabent* src = map->table.t.entries;
11148 const upb_tabent* end = src + upb_table_size(&map->table.t);
11149 for (; src < end; src++) {
11150 if (!upb_tabent_isempty(src)) {
11151 *dst = src;
11152 dst++;
11153 }
11154 }
11155 UPB_ASSERT(dst == &s->entries[sorted->end]);
11156
11157 /* Sort entries according to the key type. */
11158
11159 int (*compar)(const void*, const void*);
11160
11161 switch (key_type) {
11162 case kUpb_FieldType_Int64:
11163 case kUpb_FieldType_SFixed64:
11164 case kUpb_FieldType_SInt64:
11165 compar = _upb_mapsorter_cmpi64;
11166 break;
11167 case kUpb_FieldType_UInt64:
11168 case kUpb_FieldType_Fixed64:
11169 compar = _upb_mapsorter_cmpu64;
11170 break;
11171 case kUpb_FieldType_Int32:
11172 case kUpb_FieldType_SInt32:
11173 case kUpb_FieldType_SFixed32:
11174 case kUpb_FieldType_Enum:
11175 compar = _upb_mapsorter_cmpi32;
11176 break;
11177 case kUpb_FieldType_UInt32:
11178 case kUpb_FieldType_Fixed32:
11179 compar = _upb_mapsorter_cmpu32;
11180 break;
11181 case kUpb_FieldType_Bool:
11182 compar = _upb_mapsorter_cmpbool;
11183 break;
11184 case kUpb_FieldType_String:
11185 case kUpb_FieldType_Bytes:
11186 compar = _upb_mapsorter_cmpstr;
11187 break;
11188 default:
11189 UPB_UNREACHABLE();
11190 }
11191
11192 qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
11193 return true;
11194 }
11195
11196 /** upb_ExtensionRegistry *****************************************************/
11197
11198 struct upb_ExtensionRegistry {
11199 upb_Arena* arena;
11200 upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */
11201 };
11202
11203 #define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t))
11204
extreg_key(char * buf,const upb_MiniTable * l,uint32_t fieldnum)11205 static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) {
11206 memcpy(buf, &l, sizeof(l));
11207 memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
11208 }
11209
upb_ExtensionRegistry_New(upb_Arena * arena)11210 upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) {
11211 upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r));
11212 if (!r) return NULL;
11213 r->arena = arena;
11214 if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
11215 return r;
11216 }
11217
_upb_extreg_add(upb_ExtensionRegistry * r,const upb_MiniTable_Extension ** e,size_t count)11218 bool _upb_extreg_add(upb_ExtensionRegistry* r,
11219 const upb_MiniTable_Extension** e, size_t count) {
11220 char buf[EXTREG_KEY_SIZE];
11221 const upb_MiniTable_Extension** start = e;
11222 const upb_MiniTable_Extension** end = UPB_PTRADD(e, count);
11223 for (; e < end; e++) {
11224 const upb_MiniTable_Extension* ext = *e;
11225 extreg_key(buf, ext->extendee, ext->field.number);
11226 upb_value v;
11227 if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
11228 goto failure;
11229 }
11230 if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
11231 upb_value_constptr(ext), r->arena)) {
11232 goto failure;
11233 }
11234 }
11235 return true;
11236
11237 failure:
11238 /* Back out the entries previously added. */
11239 for (end = e, e = start; e < end; e++) {
11240 const upb_MiniTable_Extension* ext = *e;
11241 extreg_key(buf, ext->extendee, ext->field.number);
11242 upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
11243 }
11244 return false;
11245 }
11246
_upb_extreg_get(const upb_ExtensionRegistry * r,const upb_MiniTable * l,uint32_t num)11247 const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r,
11248 const upb_MiniTable* l,
11249 uint32_t num) {
11250 char buf[EXTREG_KEY_SIZE];
11251 upb_value v;
11252 extreg_key(buf, l, num);
11253 if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
11254 return upb_value_getconstptr(v);
11255 } else {
11256 return NULL;
11257 }
11258 }
11259
11260 /** upb/table.c ************************************************************/
11261 /*
11262 * upb_table Implementation
11263 *
11264 * Implementation is heavily inspired by Lua's ltable.c.
11265 */
11266
11267 #include <string.h>
11268
11269
11270 /* Must be last. */
11271
11272 #define UPB_MAXARRSIZE 16 /* 64k. */
11273
11274 /* From Chromium. */
11275 #define ARRAY_SIZE(x) \
11276 ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
11277
11278 static const double MAX_LOAD = 0.85;
11279
11280 /* The minimum utilization of the array part of a mixed hash/array table. This
11281 * is a speed/memory-usage tradeoff (though it's not straightforward because of
11282 * cache effects). The lower this is, the more memory we'll use. */
11283 static const double MIN_DENSITY = 0.1;
11284
is_pow2(uint64_t v)11285 static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
11286
_upb_value_val(uint64_t val)11287 static upb_value _upb_value_val(uint64_t val) {
11288 upb_value ret;
11289 _upb_value_setval(&ret, val);
11290 return ret;
11291 }
11292
log2ceil(uint64_t v)11293 static int log2ceil(uint64_t v) {
11294 int ret = 0;
11295 bool pow2 = is_pow2(v);
11296 while (v >>= 1) ret++;
11297 ret = pow2 ? ret : ret + 1; /* Ceiling. */
11298 return UPB_MIN(UPB_MAXARRSIZE, ret);
11299 }
11300
upb_strdup2(const char * s,size_t len,upb_Arena * a)11301 char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
11302 size_t n;
11303 char* p;
11304
11305 /* Prevent overflow errors. */
11306 if (len == SIZE_MAX) return NULL;
11307 /* Always null-terminate, even if binary data; but don't rely on the input to
11308 * have a null-terminating byte since it may be a raw binary buffer. */
11309 n = len + 1;
11310 p = upb_Arena_Malloc(a, n);
11311 if (p) {
11312 memcpy(p, s, len);
11313 p[len] = 0;
11314 }
11315 return p;
11316 }
11317
11318 /* A type to represent the lookup key of either a strtable or an inttable. */
11319 typedef union {
11320 uintptr_t num;
11321 struct {
11322 const char* str;
11323 size_t len;
11324 } str;
11325 } lookupkey_t;
11326
strkey2(const char * str,size_t len)11327 static lookupkey_t strkey2(const char* str, size_t len) {
11328 lookupkey_t k;
11329 k.str.str = str;
11330 k.str.len = len;
11331 return k;
11332 }
11333
intkey(uintptr_t key)11334 static lookupkey_t intkey(uintptr_t key) {
11335 lookupkey_t k;
11336 k.num = key;
11337 return k;
11338 }
11339
11340 typedef uint32_t hashfunc_t(upb_tabkey key);
11341 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
11342
11343 /* Base table (shared code) ***************************************************/
11344
upb_inthash(uintptr_t key)11345 static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
11346
upb_getentry(const upb_table * t,uint32_t hash)11347 static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
11348 return t->entries + (hash & t->mask);
11349 }
11350
upb_arrhas(upb_tabval key)11351 static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
11352
isfull(upb_table * t)11353 static bool isfull(upb_table* t) { return t->count == t->max_count; }
11354
init(upb_table * t,uint8_t size_lg2,upb_Arena * a)11355 static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
11356 size_t bytes;
11357
11358 t->count = 0;
11359 t->size_lg2 = size_lg2;
11360 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
11361 t->max_count = upb_table_size(t) * MAX_LOAD;
11362 bytes = upb_table_size(t) * sizeof(upb_tabent);
11363 if (bytes > 0) {
11364 t->entries = upb_Arena_Malloc(a, bytes);
11365 if (!t->entries) return false;
11366 memset(t->entries, 0, bytes);
11367 } else {
11368 t->entries = NULL;
11369 }
11370 return true;
11371 }
11372
emptyent(upb_table * t,upb_tabent * e)11373 static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
11374 upb_tabent* begin = t->entries;
11375 upb_tabent* end = begin + upb_table_size(t);
11376 for (e = e + 1; e < end; e++) {
11377 if (upb_tabent_isempty(e)) return e;
11378 }
11379 for (e = begin; e < end; e++) {
11380 if (upb_tabent_isempty(e)) return e;
11381 }
11382 UPB_ASSERT(false);
11383 return NULL;
11384 }
11385
getentry_mutable(upb_table * t,uint32_t hash)11386 static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
11387 return (upb_tabent*)upb_getentry(t, hash);
11388 }
11389
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)11390 static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
11391 uint32_t hash, eqlfunc_t* eql) {
11392 const upb_tabent* e;
11393
11394 if (t->size_lg2 == 0) return NULL;
11395 e = upb_getentry(t, hash);
11396 if (upb_tabent_isempty(e)) return NULL;
11397 while (1) {
11398 if (eql(e->key, key)) return e;
11399 if ((e = e->next) == NULL) return NULL;
11400 }
11401 }
11402
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)11403 static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
11404 uint32_t hash, eqlfunc_t* eql) {
11405 return (upb_tabent*)findentry(t, key, hash, eql);
11406 }
11407
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)11408 static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
11409 uint32_t hash, eqlfunc_t* eql) {
11410 const upb_tabent* e = findentry(t, key, hash, eql);
11411 if (e) {
11412 if (v) {
11413 _upb_value_setval(v, e->val.val);
11414 }
11415 return true;
11416 } else {
11417 return false;
11418 }
11419 }
11420
11421 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)11422 static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
11423 upb_value val, uint32_t hash, hashfunc_t* hashfunc,
11424 eqlfunc_t* eql) {
11425 upb_tabent* mainpos_e;
11426 upb_tabent* our_e;
11427
11428 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
11429
11430 t->count++;
11431 mainpos_e = getentry_mutable(t, hash);
11432 our_e = mainpos_e;
11433
11434 if (upb_tabent_isempty(mainpos_e)) {
11435 /* Our main position is empty; use it. */
11436 our_e->next = NULL;
11437 } else {
11438 /* Collision. */
11439 upb_tabent* new_e = emptyent(t, mainpos_e);
11440 /* Head of collider's chain. */
11441 upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
11442 if (chain == mainpos_e) {
11443 /* Existing ent is in its main position (it has the same hash as us, and
11444 * is the head of our chain). Insert to new ent and append to this chain.
11445 */
11446 new_e->next = mainpos_e->next;
11447 mainpos_e->next = new_e;
11448 our_e = new_e;
11449 } else {
11450 /* Existing ent is not in its main position (it is a node in some other
11451 * chain). This implies that no existing ent in the table has our hash.
11452 * Evict it (updating its chain) and use its ent for head of our chain. */
11453 *new_e = *mainpos_e; /* copies next. */
11454 while (chain->next != mainpos_e) {
11455 chain = (upb_tabent*)chain->next;
11456 UPB_ASSERT(chain);
11457 }
11458 chain->next = new_e;
11459 our_e = mainpos_e;
11460 our_e->next = NULL;
11461 }
11462 }
11463 our_e->key = tabkey;
11464 our_e->val.val = val.val;
11465 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
11466 }
11467
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)11468 static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
11469 upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
11470 upb_tabent* chain = getentry_mutable(t, hash);
11471 if (upb_tabent_isempty(chain)) return false;
11472 if (eql(chain->key, key)) {
11473 /* Element to remove is at the head of its chain. */
11474 t->count--;
11475 if (val) _upb_value_setval(val, chain->val.val);
11476 if (removed) *removed = chain->key;
11477 if (chain->next) {
11478 upb_tabent* move = (upb_tabent*)chain->next;
11479 *chain = *move;
11480 move->key = 0; /* Make the slot empty. */
11481 } else {
11482 chain->key = 0; /* Make the slot empty. */
11483 }
11484 return true;
11485 } else {
11486 /* Element to remove is either in a non-head position or not in the
11487 * table. */
11488 while (chain->next && !eql(chain->next->key, key)) {
11489 chain = (upb_tabent*)chain->next;
11490 }
11491 if (chain->next) {
11492 /* Found element to remove. */
11493 upb_tabent* rm = (upb_tabent*)chain->next;
11494 t->count--;
11495 if (val) _upb_value_setval(val, chain->next->val.val);
11496 if (removed) *removed = rm->key;
11497 rm->key = 0; /* Make the slot empty. */
11498 chain->next = rm->next;
11499 return true;
11500 } else {
11501 /* Element to remove is not in the table. */
11502 return false;
11503 }
11504 }
11505 }
11506
next(const upb_table * t,size_t i)11507 static size_t next(const upb_table* t, size_t i) {
11508 do {
11509 if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
11510 } while (upb_tabent_isempty(&t->entries[i]));
11511
11512 return i;
11513 }
11514
begin(const upb_table * t)11515 static size_t begin(const upb_table* t) { return next(t, -1); }
11516
11517 /* upb_strtable ***************************************************************/
11518
11519 /* A simple "subclass" of upb_table that only adds a hash function for strings.
11520 */
11521
strcopy(lookupkey_t k2,upb_Arena * a)11522 static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
11523 uint32_t len = (uint32_t)k2.str.len;
11524 char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
11525 if (str == NULL) return 0;
11526 memcpy(str, &len, sizeof(uint32_t));
11527 if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
11528 str[sizeof(uint32_t) + k2.str.len] = '\0';
11529 return (uintptr_t)str;
11530 }
11531
11532 /* Adapted from ABSL's wyhash. */
11533
UnalignedLoad64(const void * p)11534 static uint64_t UnalignedLoad64(const void* p) {
11535 uint64_t val;
11536 memcpy(&val, p, 8);
11537 return val;
11538 }
11539
UnalignedLoad32(const void * p)11540 static uint32_t UnalignedLoad32(const void* p) {
11541 uint32_t val;
11542 memcpy(&val, p, 4);
11543 return val;
11544 }
11545
11546 #if defined(_MSC_VER) && defined(_M_X64)
11547 #include <intrin.h>
11548 #endif
11549
11550 /* Computes a * b, returning the low 64 bits of the result and storing the high
11551 * 64 bits in |*high|. */
upb_umul128(uint64_t v0,uint64_t v1,uint64_t * out_high)11552 static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
11553 #ifdef __SIZEOF_INT128__
11554 __uint128_t p = v0;
11555 p *= v1;
11556 *out_high = (uint64_t)(p >> 64);
11557 return (uint64_t)p;
11558 #elif defined(_MSC_VER) && defined(_M_X64)
11559 return _umul128(v0, v1, out_high);
11560 #else
11561 uint64_t a32 = v0 >> 32;
11562 uint64_t a00 = v0 & 0xffffffff;
11563 uint64_t b32 = v1 >> 32;
11564 uint64_t b00 = v1 & 0xffffffff;
11565 uint64_t high = a32 * b32;
11566 uint64_t low = a00 * b00;
11567 uint64_t mid1 = a32 * b00;
11568 uint64_t mid2 = a00 * b32;
11569 low += (mid1 << 32) + (mid2 << 32);
11570 // Omit carry bit, for mixing we do not care about exact numerical precision.
11571 high += (mid1 >> 32) + (mid2 >> 32);
11572 *out_high = high;
11573 return low;
11574 #endif
11575 }
11576
WyhashMix(uint64_t v0,uint64_t v1)11577 static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
11578 uint64_t high;
11579 uint64_t low = upb_umul128(v0, v1, &high);
11580 return low ^ high;
11581 }
11582
Wyhash(const void * data,size_t len,uint64_t seed,const uint64_t salt[])11583 static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
11584 const uint64_t salt[]) {
11585 const uint8_t* ptr = (const uint8_t*)data;
11586 uint64_t starting_length = (uint64_t)len;
11587 uint64_t current_state = seed ^ salt[0];
11588
11589 if (len > 64) {
11590 // If we have more than 64 bytes, we're going to handle chunks of 64
11591 // bytes at a time. We're going to build up two separate hash states
11592 // which we will then hash together.
11593 uint64_t duplicated_state = current_state;
11594
11595 do {
11596 uint64_t a = UnalignedLoad64(ptr);
11597 uint64_t b = UnalignedLoad64(ptr + 8);
11598 uint64_t c = UnalignedLoad64(ptr + 16);
11599 uint64_t d = UnalignedLoad64(ptr + 24);
11600 uint64_t e = UnalignedLoad64(ptr + 32);
11601 uint64_t f = UnalignedLoad64(ptr + 40);
11602 uint64_t g = UnalignedLoad64(ptr + 48);
11603 uint64_t h = UnalignedLoad64(ptr + 56);
11604
11605 uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
11606 uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
11607 current_state = (cs0 ^ cs1);
11608
11609 uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
11610 uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
11611 duplicated_state = (ds0 ^ ds1);
11612
11613 ptr += 64;
11614 len -= 64;
11615 } while (len > 64);
11616
11617 current_state = current_state ^ duplicated_state;
11618 }
11619
11620 // We now have a data `ptr` with at most 64 bytes and the current state
11621 // of the hashing state machine stored in current_state.
11622 while (len > 16) {
11623 uint64_t a = UnalignedLoad64(ptr);
11624 uint64_t b = UnalignedLoad64(ptr + 8);
11625
11626 current_state = WyhashMix(a ^ salt[1], b ^ current_state);
11627
11628 ptr += 16;
11629 len -= 16;
11630 }
11631
11632 // We now have a data `ptr` with at most 16 bytes.
11633 uint64_t a = 0;
11634 uint64_t b = 0;
11635 if (len > 8) {
11636 // When we have at least 9 and at most 16 bytes, set A to the first 64
11637 // bits of the input and B to the last 64 bits of the input. Yes, they will
11638 // overlap in the middle if we are working with less than the full 16
11639 // bytes.
11640 a = UnalignedLoad64(ptr);
11641 b = UnalignedLoad64(ptr + len - 8);
11642 } else if (len > 3) {
11643 // If we have at least 4 and at most 8 bytes, set A to the first 32
11644 // bits and B to the last 32 bits.
11645 a = UnalignedLoad32(ptr);
11646 b = UnalignedLoad32(ptr + len - 4);
11647 } else if (len > 0) {
11648 // If we have at least 1 and at most 3 bytes, read all of the provided
11649 // bits into A, with some adjustments.
11650 a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
11651 b = 0;
11652 } else {
11653 a = 0;
11654 b = 0;
11655 }
11656
11657 uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
11658 uint64_t z = salt[1] ^ starting_length;
11659 return WyhashMix(w, z);
11660 }
11661
11662 const uint64_t kWyhashSalt[5] = {
11663 0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
11664 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
11665 };
11666
_upb_Hash(const void * p,size_t n,uint64_t seed)11667 uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
11668 return Wyhash(p, n, seed, kWyhashSalt);
11669 }
11670
_upb_Hash_NoSeed(const char * p,size_t n)11671 static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
11672 return _upb_Hash(p, n, 0);
11673 }
11674
strhash(upb_tabkey key)11675 static uint32_t strhash(upb_tabkey key) {
11676 uint32_t len;
11677 char* str = upb_tabstr(key, &len);
11678 return _upb_Hash_NoSeed(str, len);
11679 }
11680
streql(upb_tabkey k1,lookupkey_t k2)11681 static bool streql(upb_tabkey k1, lookupkey_t k2) {
11682 uint32_t len;
11683 char* str = upb_tabstr(k1, &len);
11684 return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
11685 }
11686
upb_strtable_init(upb_strtable * t,size_t expected_size,upb_Arena * a)11687 bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
11688 // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
11689 // denominator.
11690 size_t need_entries = (expected_size + 1) * 1204 / 1024;
11691 UPB_ASSERT(need_entries >= expected_size * 0.85);
11692 int size_lg2 = _upb_Log2Ceiling(need_entries);
11693 return init(&t->t, size_lg2, a);
11694 }
11695
upb_strtable_clear(upb_strtable * t)11696 void upb_strtable_clear(upb_strtable* t) {
11697 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
11698 t->t.count = 0;
11699 memset((char*)t->t.entries, 0, bytes);
11700 }
11701
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_Arena * a)11702 bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
11703 upb_strtable new_table;
11704 upb_strtable_iter i;
11705
11706 if (!init(&new_table.t, size_lg2, a)) return false;
11707 upb_strtable_begin(&i, t);
11708 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
11709 upb_StringView key = upb_strtable_iter_key(&i);
11710 upb_strtable_insert(&new_table, key.data, key.size,
11711 upb_strtable_iter_value(&i), a);
11712 }
11713 *t = new_table;
11714 return true;
11715 }
11716
upb_strtable_insert(upb_strtable * t,const char * k,size_t len,upb_value v,upb_Arena * a)11717 bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
11718 upb_value v, upb_Arena* a) {
11719 lookupkey_t key;
11720 upb_tabkey tabkey;
11721 uint32_t hash;
11722
11723 if (isfull(&t->t)) {
11724 /* Need to resize. New table of double the size, add old elements to it. */
11725 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
11726 return false;
11727 }
11728 }
11729
11730 key = strkey2(k, len);
11731 tabkey = strcopy(key, a);
11732 if (tabkey == 0) return false;
11733
11734 hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
11735 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
11736 return true;
11737 }
11738
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)11739 bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
11740 upb_value* v) {
11741 uint32_t hash = _upb_Hash_NoSeed(key, len);
11742 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
11743 }
11744
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)11745 bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
11746 upb_value* val) {
11747 uint32_t hash = _upb_Hash_NoSeed(key, len);
11748 upb_tabkey tabkey;
11749 return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
11750 }
11751
11752 /* Iteration */
11753
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)11754 void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
11755 i->t = t;
11756 i->index = begin(&t->t);
11757 }
11758
upb_strtable_next(upb_strtable_iter * i)11759 void upb_strtable_next(upb_strtable_iter* i) {
11760 i->index = next(&i->t->t, i->index);
11761 }
11762
upb_strtable_done(const upb_strtable_iter * i)11763 bool upb_strtable_done(const upb_strtable_iter* i) {
11764 if (!i->t) return true;
11765 return i->index >= upb_table_size(&i->t->t) ||
11766 upb_tabent_isempty(str_tabent(i));
11767 }
11768
upb_strtable_iter_key(const upb_strtable_iter * i)11769 upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
11770 upb_StringView key;
11771 uint32_t len;
11772 UPB_ASSERT(!upb_strtable_done(i));
11773 key.data = upb_tabstr(str_tabent(i)->key, &len);
11774 key.size = len;
11775 return key;
11776 }
11777
upb_strtable_iter_value(const upb_strtable_iter * i)11778 upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
11779 UPB_ASSERT(!upb_strtable_done(i));
11780 return _upb_value_val(str_tabent(i)->val.val);
11781 }
11782
upb_strtable_iter_setdone(upb_strtable_iter * i)11783 void upb_strtable_iter_setdone(upb_strtable_iter* i) {
11784 i->t = NULL;
11785 i->index = SIZE_MAX;
11786 }
11787
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)11788 bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
11789 const upb_strtable_iter* i2) {
11790 if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
11791 return i1->t == i2->t && i1->index == i2->index;
11792 }
11793
11794 /* upb_inttable ***************************************************************/
11795
11796 /* For inttables we use a hybrid structure where small keys are kept in an
11797 * array and large keys are put in the hash table. */
11798
inthash(upb_tabkey key)11799 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
11800
inteql(upb_tabkey k1,lookupkey_t k2)11801 static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
11802
mutable_array(upb_inttable * t)11803 static upb_tabval* mutable_array(upb_inttable* t) {
11804 return (upb_tabval*)t->array;
11805 }
11806
inttable_val(upb_inttable * t,uintptr_t key)11807 static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
11808 if (key < t->array_size) {
11809 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
11810 } else {
11811 upb_tabent* e =
11812 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
11813 return e ? &e->val : NULL;
11814 }
11815 }
11816
inttable_val_const(const upb_inttable * t,uintptr_t key)11817 static const upb_tabval* inttable_val_const(const upb_inttable* t,
11818 uintptr_t key) {
11819 return inttable_val((upb_inttable*)t, key);
11820 }
11821
upb_inttable_count(const upb_inttable * t)11822 size_t upb_inttable_count(const upb_inttable* t) {
11823 return t->t.count + t->array_count;
11824 }
11825
check(upb_inttable * t)11826 static void check(upb_inttable* t) {
11827 UPB_UNUSED(t);
11828 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
11829 {
11830 /* This check is very expensive (makes inserts/deletes O(N)). */
11831 size_t count = 0;
11832 upb_inttable_iter i;
11833 upb_inttable_begin(&i, t);
11834 for (; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
11835 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
11836 }
11837 UPB_ASSERT(count == upb_inttable_count(t));
11838 }
11839 #endif
11840 }
11841
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_Arena * a)11842 bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
11843 upb_Arena* a) {
11844 size_t array_bytes;
11845
11846 if (!init(&t->t, hsize_lg2, a)) return false;
11847 /* Always make the array part at least 1 long, so that we know key 0
11848 * won't be in the hash part, which simplifies things. */
11849 t->array_size = UPB_MAX(1, asize);
11850 t->array_count = 0;
11851 array_bytes = t->array_size * sizeof(upb_value);
11852 t->array = upb_Arena_Malloc(a, array_bytes);
11853 if (!t->array) {
11854 return false;
11855 }
11856 memset(mutable_array(t), 0xff, array_bytes);
11857 check(t);
11858 return true;
11859 }
11860
upb_inttable_init(upb_inttable * t,upb_Arena * a)11861 bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
11862 return upb_inttable_sizedinit(t, 0, 4, a);
11863 }
11864
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val,upb_Arena * a)11865 bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
11866 upb_Arena* a) {
11867 upb_tabval tabval;
11868 tabval.val = val.val;
11869 UPB_ASSERT(
11870 upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
11871
11872 if (key < t->array_size) {
11873 UPB_ASSERT(!upb_arrhas(t->array[key]));
11874 t->array_count++;
11875 mutable_array(t)[key].val = val.val;
11876 } else {
11877 if (isfull(&t->t)) {
11878 /* Need to resize the hash part, but we re-use the array part. */
11879 size_t i;
11880 upb_table new_table;
11881
11882 if (!init(&new_table, t->t.size_lg2 + 1, a)) {
11883 return false;
11884 }
11885
11886 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
11887 const upb_tabent* e = &t->t.entries[i];
11888 uint32_t hash;
11889 upb_value v;
11890
11891 _upb_value_setval(&v, e->val.val);
11892 hash = upb_inthash(e->key);
11893 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
11894 }
11895
11896 UPB_ASSERT(t->t.count == new_table.count);
11897
11898 t->t = new_table;
11899 }
11900 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
11901 }
11902 check(t);
11903 return true;
11904 }
11905
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)11906 bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
11907 const upb_tabval* table_v = inttable_val_const(t, key);
11908 if (!table_v) return false;
11909 if (v) _upb_value_setval(v, table_v->val);
11910 return true;
11911 }
11912
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)11913 bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
11914 upb_tabval* table_v = inttable_val(t, key);
11915 if (!table_v) return false;
11916 table_v->val = val.val;
11917 return true;
11918 }
11919
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)11920 bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
11921 bool success;
11922 if (key < t->array_size) {
11923 if (upb_arrhas(t->array[key])) {
11924 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
11925 t->array_count--;
11926 if (val) {
11927 _upb_value_setval(val, t->array[key].val);
11928 }
11929 mutable_array(t)[key] = empty;
11930 success = true;
11931 } else {
11932 success = false;
11933 }
11934 } else {
11935 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
11936 }
11937 check(t);
11938 return success;
11939 }
11940
upb_inttable_compact(upb_inttable * t,upb_Arena * a)11941 void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
11942 /* A power-of-two histogram of the table keys. */
11943 size_t counts[UPB_MAXARRSIZE + 1] = {0};
11944
11945 /* The max key in each bucket. */
11946 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
11947
11948 upb_inttable_iter i;
11949 size_t arr_count;
11950 int size_lg2;
11951 upb_inttable new_t;
11952
11953 upb_inttable_begin(&i, t);
11954 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11955 uintptr_t key = upb_inttable_iter_key(&i);
11956 int bucket = log2ceil(key);
11957 max[bucket] = UPB_MAX(max[bucket], key);
11958 counts[bucket]++;
11959 }
11960
11961 /* Find the largest power of two that satisfies the MIN_DENSITY
11962 * definition (while actually having some keys). */
11963 arr_count = upb_inttable_count(t);
11964
11965 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
11966 if (counts[size_lg2] == 0) {
11967 /* We can halve again without losing any entries. */
11968 continue;
11969 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
11970 break;
11971 }
11972
11973 arr_count -= counts[size_lg2];
11974 }
11975
11976 UPB_ASSERT(arr_count <= upb_inttable_count(t));
11977
11978 {
11979 /* Insert all elements into new, perfectly-sized table. */
11980 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
11981 size_t hash_count = upb_inttable_count(t) - arr_count;
11982 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
11983 int hashsize_lg2 = log2ceil(hash_size);
11984
11985 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
11986 upb_inttable_begin(&i, t);
11987 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11988 uintptr_t k = upb_inttable_iter_key(&i);
11989 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a);
11990 }
11991 UPB_ASSERT(new_t.array_size == arr_size);
11992 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
11993 }
11994 *t = new_t;
11995 }
11996
11997 /* Iteration. */
11998
int_tabent(const upb_inttable_iter * i)11999 static const upb_tabent* int_tabent(const upb_inttable_iter* i) {
12000 UPB_ASSERT(!i->array_part);
12001 return &i->t->t.entries[i->index];
12002 }
12003
int_arrent(const upb_inttable_iter * i)12004 static upb_tabval int_arrent(const upb_inttable_iter* i) {
12005 UPB_ASSERT(i->array_part);
12006 return i->t->array[i->index];
12007 }
12008
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)12009 void upb_inttable_begin(upb_inttable_iter* i, const upb_inttable* t) {
12010 i->t = t;
12011 i->index = -1;
12012 i->array_part = true;
12013 upb_inttable_next(i);
12014 }
12015
upb_inttable_next(upb_inttable_iter * iter)12016 void upb_inttable_next(upb_inttable_iter* iter) {
12017 const upb_inttable* t = iter->t;
12018 if (iter->array_part) {
12019 while (++iter->index < t->array_size) {
12020 if (upb_arrhas(int_arrent(iter))) {
12021 return;
12022 }
12023 }
12024 iter->array_part = false;
12025 iter->index = begin(&t->t);
12026 } else {
12027 iter->index = next(&t->t, iter->index);
12028 }
12029 }
12030
upb_inttable_next2(const upb_inttable * t,uintptr_t * key,upb_value * val,intptr_t * iter)12031 bool upb_inttable_next2(const upb_inttable* t, uintptr_t* key, upb_value* val,
12032 intptr_t* iter) {
12033 intptr_t i = *iter;
12034 if (i < t->array_size) {
12035 while (++i < t->array_size) {
12036 upb_tabval ent = t->array[i];
12037 if (upb_arrhas(ent)) {
12038 *key = i;
12039 *val = _upb_value_val(ent.val);
12040 *iter = i;
12041 return true;
12042 }
12043 }
12044 }
12045
12046 size_t tab_idx = next(&t->t, i == -1 ? -1 : i - t->array_size);
12047 if (tab_idx < upb_table_size(&t->t)) {
12048 upb_tabent* ent = &t->t.entries[tab_idx];
12049 *key = ent->key;
12050 *val = _upb_value_val(ent->val.val);
12051 *iter = tab_idx + t->array_size;
12052 return true;
12053 }
12054
12055 return false;
12056 }
12057
upb_inttable_removeiter(upb_inttable * t,intptr_t * iter)12058 void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
12059 intptr_t i = *iter;
12060 if (i < t->array_size) {
12061 t->array_count--;
12062 mutable_array(t)[i].val = -1;
12063 } else {
12064 upb_tabent* ent = &t->t.entries[i - t->array_size];
12065 upb_tabent* prev = NULL;
12066
12067 // Linear search, not great.
12068 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
12069 for (upb_tabent* e = t->t.entries; e != end; e++) {
12070 if (e->next == ent) {
12071 prev = e;
12072 break;
12073 }
12074 }
12075
12076 if (prev) {
12077 prev->next = ent->next;
12078 }
12079
12080 t->t.count--;
12081 ent->key = 0;
12082 ent->next = NULL;
12083 }
12084 }
12085
upb_strtable_next2(const upb_strtable * t,upb_StringView * key,upb_value * val,intptr_t * iter)12086 bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
12087 upb_value* val, intptr_t* iter) {
12088 size_t tab_idx = next(&t->t, *iter);
12089 if (tab_idx < upb_table_size(&t->t)) {
12090 upb_tabent* ent = &t->t.entries[tab_idx];
12091 uint32_t len;
12092 key->data = upb_tabstr(ent->key, &len);
12093 key->size = len;
12094 *val = _upb_value_val(ent->val.val);
12095 *iter = tab_idx;
12096 return true;
12097 }
12098
12099 return false;
12100 }
12101
upb_strtable_removeiter(upb_strtable * t,intptr_t * iter)12102 void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
12103 intptr_t i = *iter;
12104 upb_tabent* ent = &t->t.entries[i];
12105 upb_tabent* prev = NULL;
12106
12107 // Linear search, not great.
12108 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
12109 for (upb_tabent* e = t->t.entries; e != end; e++) {
12110 if (e->next == ent) {
12111 prev = e;
12112 break;
12113 }
12114 }
12115
12116 if (prev) {
12117 prev->next = ent->next;
12118 }
12119
12120 t->t.count--;
12121 ent->key = 0;
12122 ent->next = NULL;
12123 }
12124
upb_inttable_done(const upb_inttable_iter * i)12125 bool upb_inttable_done(const upb_inttable_iter* i) {
12126 if (!i->t) return true;
12127 if (i->array_part) {
12128 return i->index >= i->t->array_size || !upb_arrhas(int_arrent(i));
12129 } else {
12130 return i->index >= upb_table_size(&i->t->t) ||
12131 upb_tabent_isempty(int_tabent(i));
12132 }
12133 }
12134
upb_inttable_iter_key(const upb_inttable_iter * i)12135 uintptr_t upb_inttable_iter_key(const upb_inttable_iter* i) {
12136 UPB_ASSERT(!upb_inttable_done(i));
12137 return i->array_part ? i->index : int_tabent(i)->key;
12138 }
12139
upb_inttable_iter_value(const upb_inttable_iter * i)12140 upb_value upb_inttable_iter_value(const upb_inttable_iter* i) {
12141 UPB_ASSERT(!upb_inttable_done(i));
12142 return _upb_value_val(i->array_part ? i->t->array[i->index].val
12143 : int_tabent(i)->val.val);
12144 }
12145
upb_inttable_iter_setdone(upb_inttable_iter * i)12146 void upb_inttable_iter_setdone(upb_inttable_iter* i) {
12147 i->t = NULL;
12148 i->index = SIZE_MAX;
12149 i->array_part = false;
12150 }
12151
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)12152 bool upb_inttable_iter_isequal(const upb_inttable_iter* i1,
12153 const upb_inttable_iter* i2) {
12154 if (upb_inttable_done(i1) && upb_inttable_done(i2)) return true;
12155 return i1->t == i2->t && i1->index == i2->index &&
12156 i1->array_part == i2->array_part;
12157 }
12158
12159 /** upb/upb.c ************************************************************/
12160 #include <errno.h>
12161 #include <float.h>
12162 #include <stdarg.h>
12163 #include <stddef.h>
12164 #include <stdint.h>
12165 #include <stdio.h>
12166 #include <stdlib.h>
12167 #include <string.h>
12168
12169
12170 // Must be last.
12171
12172 /* upb_Status *****************************************************************/
12173
upb_Status_Clear(upb_Status * status)12174 void upb_Status_Clear(upb_Status* status) {
12175 if (!status) return;
12176 status->ok = true;
12177 status->msg[0] = '\0';
12178 }
12179
upb_Status_IsOk(const upb_Status * status)12180 bool upb_Status_IsOk(const upb_Status* status) { return status->ok; }
12181
upb_Status_ErrorMessage(const upb_Status * status)12182 const char* upb_Status_ErrorMessage(const upb_Status* status) {
12183 return status->msg;
12184 }
12185
upb_Status_SetErrorMessage(upb_Status * status,const char * msg)12186 void upb_Status_SetErrorMessage(upb_Status* status, const char* msg) {
12187 if (!status) return;
12188 status->ok = false;
12189 strncpy(status->msg, msg, _kUpb_Status_MaxMessage - 1);
12190 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12191 }
12192
upb_Status_SetErrorFormat(upb_Status * status,const char * fmt,...)12193 void upb_Status_SetErrorFormat(upb_Status* status, const char* fmt, ...) {
12194 va_list args;
12195 va_start(args, fmt);
12196 upb_Status_VSetErrorFormat(status, fmt, args);
12197 va_end(args);
12198 }
12199
upb_Status_VSetErrorFormat(upb_Status * status,const char * fmt,va_list args)12200 void upb_Status_VSetErrorFormat(upb_Status* status, const char* fmt,
12201 va_list args) {
12202 if (!status) return;
12203 status->ok = false;
12204 vsnprintf(status->msg, sizeof(status->msg), fmt, args);
12205 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12206 }
12207
upb_Status_VAppendErrorFormat(upb_Status * status,const char * fmt,va_list args)12208 void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt,
12209 va_list args) {
12210 size_t len;
12211 if (!status) return;
12212 status->ok = false;
12213 len = strlen(status->msg);
12214 vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
12215 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12216 }
12217
12218 /* upb_alloc ******************************************************************/
12219
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)12220 static void* upb_global_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
12221 size_t size) {
12222 UPB_UNUSED(alloc);
12223 UPB_UNUSED(oldsize);
12224 if (size == 0) {
12225 free(ptr);
12226 return NULL;
12227 } else {
12228 return realloc(ptr, size);
12229 }
12230 }
12231
upb_cleanup_pointer(uintptr_t cleanup_metadata)12232 static uint32_t* upb_cleanup_pointer(uintptr_t cleanup_metadata) {
12233 return (uint32_t*)(cleanup_metadata & ~0x1);
12234 }
12235
upb_cleanup_has_initial_block(uintptr_t cleanup_metadata)12236 static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) {
12237 return cleanup_metadata & 0x1;
12238 }
12239
upb_cleanup_metadata(uint32_t * cleanup,bool has_initial_block)12240 static uintptr_t upb_cleanup_metadata(uint32_t* cleanup,
12241 bool has_initial_block) {
12242 return (uintptr_t)cleanup | has_initial_block;
12243 }
12244
12245 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
12246
12247 /* upb_Arena ******************************************************************/
12248
12249 struct mem_block {
12250 struct mem_block* next;
12251 uint32_t size;
12252 uint32_t cleanups;
12253 /* Data follows. */
12254 };
12255
12256 typedef struct cleanup_ent {
12257 upb_CleanupFunc* cleanup;
12258 void* ud;
12259 } cleanup_ent;
12260
12261 static const size_t memblock_reserve =
12262 UPB_ALIGN_UP(sizeof(mem_block), UPB_MALLOC_ALIGN);
12263
arena_findroot(upb_Arena * a)12264 static upb_Arena* arena_findroot(upb_Arena* a) {
12265 /* Path splitting keeps time complexity down, see:
12266 * https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
12267 while (a->parent != a) {
12268 upb_Arena* next = a->parent;
12269 a->parent = next->parent;
12270 a = next;
12271 }
12272 return a;
12273 }
12274
upb_Arena_addblock(upb_Arena * a,upb_Arena * root,void * ptr,size_t size)12275 static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr,
12276 size_t size) {
12277 mem_block* block = ptr;
12278
12279 /* The block is for arena |a|, but should appear in the freelist of |root|. */
12280 block->next = root->freelist;
12281 block->size = (uint32_t)size;
12282 block->cleanups = 0;
12283 root->freelist = block;
12284 a->last_size = block->size;
12285 if (!root->freelist_tail) root->freelist_tail = block;
12286
12287 a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
12288 a->head.end = UPB_PTR_AT(block, size, char);
12289 a->cleanup_metadata = upb_cleanup_metadata(
12290 &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata));
12291
12292 UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
12293 }
12294
upb_Arena_Allocblock(upb_Arena * a,size_t size)12295 static bool upb_Arena_Allocblock(upb_Arena* a, size_t size) {
12296 upb_Arena* root = arena_findroot(a);
12297 size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve;
12298 mem_block* block = upb_malloc(root->block_alloc, block_size);
12299
12300 if (!block) return false;
12301 upb_Arena_addblock(a, root, block, block_size);
12302 return true;
12303 }
12304
_upb_Arena_SlowMalloc(upb_Arena * a,size_t size)12305 void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size) {
12306 if (!upb_Arena_Allocblock(a, size)) return NULL; /* Out of memory. */
12307 UPB_ASSERT(_upb_ArenaHas(a) >= size);
12308 return upb_Arena_Malloc(a, size);
12309 }
12310
upb_Arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)12311 static void* upb_Arena_doalloc(upb_alloc* alloc, void* ptr, size_t oldsize,
12312 size_t size) {
12313 upb_Arena* a = (upb_Arena*)alloc; /* upb_alloc is initial member. */
12314 return upb_Arena_Realloc(a, ptr, oldsize, size);
12315 }
12316
12317 /* Public Arena API ***********************************************************/
12318
arena_initslow(void * mem,size_t n,upb_alloc * alloc)12319 upb_Arena* arena_initslow(void* mem, size_t n, upb_alloc* alloc) {
12320 const size_t first_block_overhead = sizeof(upb_Arena) + memblock_reserve;
12321 upb_Arena* a;
12322
12323 /* We need to malloc the initial block. */
12324 n = first_block_overhead + 256;
12325 if (!alloc || !(mem = upb_malloc(alloc, n))) {
12326 return NULL;
12327 }
12328
12329 a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
12330 n -= sizeof(*a);
12331
12332 a->head.alloc.func = &upb_Arena_doalloc;
12333 a->block_alloc = alloc;
12334 a->parent = a;
12335 a->refcount = 1;
12336 a->freelist = NULL;
12337 a->freelist_tail = NULL;
12338 a->cleanup_metadata = upb_cleanup_metadata(NULL, false);
12339
12340 upb_Arena_addblock(a, a, mem, n);
12341
12342 return a;
12343 }
12344
upb_Arena_Init(void * mem,size_t n,upb_alloc * alloc)12345 upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
12346 upb_Arena* a;
12347
12348 if (n) {
12349 /* Align initial pointer up so that we return properly-aligned pointers. */
12350 void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, UPB_MALLOC_ALIGN);
12351 size_t delta = (uintptr_t)aligned - (uintptr_t)mem;
12352 n = delta <= n ? n - delta : 0;
12353 mem = aligned;
12354 }
12355
12356 /* Round block size down to alignof(*a) since we will allocate the arena
12357 * itself at the end. */
12358 n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_Arena));
12359
12360 if (UPB_UNLIKELY(n < sizeof(upb_Arena))) {
12361 return arena_initslow(mem, n, alloc);
12362 }
12363
12364 a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
12365
12366 a->head.alloc.func = &upb_Arena_doalloc;
12367 a->block_alloc = alloc;
12368 a->parent = a;
12369 a->refcount = 1;
12370 a->last_size = UPB_MAX(128, n);
12371 a->head.ptr = mem;
12372 a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char);
12373 a->freelist = NULL;
12374 a->cleanup_metadata = upb_cleanup_metadata(NULL, true);
12375
12376 return a;
12377 }
12378
arena_dofree(upb_Arena * a)12379 static void arena_dofree(upb_Arena* a) {
12380 mem_block* block = a->freelist;
12381 UPB_ASSERT(a->parent == a);
12382 UPB_ASSERT(a->refcount == 0);
12383
12384 while (block) {
12385 /* Load first since we are deleting block. */
12386 mem_block* next = block->next;
12387
12388 if (block->cleanups > 0) {
12389 cleanup_ent* end = UPB_PTR_AT(block, block->size, void);
12390 cleanup_ent* ptr = end - block->cleanups;
12391
12392 for (; ptr < end; ptr++) {
12393 ptr->cleanup(ptr->ud);
12394 }
12395 }
12396
12397 upb_free(a->block_alloc, block);
12398 block = next;
12399 }
12400 }
12401
upb_Arena_Free(upb_Arena * a)12402 void upb_Arena_Free(upb_Arena* a) {
12403 a = arena_findroot(a);
12404 if (--a->refcount == 0) arena_dofree(a);
12405 }
12406
upb_Arena_AddCleanup(upb_Arena * a,void * ud,upb_CleanupFunc * func)12407 bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func) {
12408 cleanup_ent* ent;
12409 uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata);
12410
12411 if (!cleanups || _upb_ArenaHas(a) < sizeof(cleanup_ent)) {
12412 if (!upb_Arena_Allocblock(a, 128)) return false; /* Out of memory. */
12413 UPB_ASSERT(_upb_ArenaHas(a) >= sizeof(cleanup_ent));
12414 cleanups = upb_cleanup_pointer(a->cleanup_metadata);
12415 }
12416
12417 a->head.end -= sizeof(cleanup_ent);
12418 ent = (cleanup_ent*)a->head.end;
12419 (*cleanups)++;
12420 UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
12421
12422 ent->cleanup = func;
12423 ent->ud = ud;
12424
12425 return true;
12426 }
12427
upb_Arena_Fuse(upb_Arena * a1,upb_Arena * a2)12428 bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
12429 upb_Arena* r1 = arena_findroot(a1);
12430 upb_Arena* r2 = arena_findroot(a2);
12431
12432 if (r1 == r2) return true; /* Already fused. */
12433
12434 /* Do not fuse initial blocks since we cannot lifetime extend them. */
12435 if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false;
12436 if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false;
12437
12438 /* Only allow fuse with a common allocator */
12439 if (r1->block_alloc != r2->block_alloc) return false;
12440
12441 /* We want to join the smaller tree to the larger tree.
12442 * So swap first if they are backwards. */
12443 if (r1->refcount < r2->refcount) {
12444 upb_Arena* tmp = r1;
12445 r1 = r2;
12446 r2 = tmp;
12447 }
12448
12449 /* r1 takes over r2's freelist and refcount. */
12450 r1->refcount += r2->refcount;
12451 if (r2->freelist_tail) {
12452 UPB_ASSERT(r2->freelist_tail->next == NULL);
12453 r2->freelist_tail->next = r1->freelist;
12454 r1->freelist = r2->freelist;
12455 }
12456 r2->parent = r1;
12457 return true;
12458 }
12459
12460 /* Miscellaneous utilities ****************************************************/
12461
upb_FixLocale(char * p)12462 static void upb_FixLocale(char* p) {
12463 /* printf() is dependent on locales; sadly there is no easy and portable way
12464 * to avoid this. This little post-processing step will translate 1,2 -> 1.2
12465 * since JSON needs the latter. Arguably a hack, but it is simple and the
12466 * alternatives are far more complicated, platform-dependent, and/or larger
12467 * in code size. */
12468 for (; *p; p++) {
12469 if (*p == ',') *p = '.';
12470 }
12471 }
12472
_upb_EncodeRoundTripDouble(double val,char * buf,size_t size)12473 void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) {
12474 assert(size >= kUpb_RoundTripBufferSize);
12475 snprintf(buf, size, "%.*g", DBL_DIG, val);
12476 if (strtod(buf, NULL) != val) {
12477 snprintf(buf, size, "%.*g", DBL_DIG + 2, val);
12478 assert(strtod(buf, NULL) == val);
12479 }
12480 upb_FixLocale(buf);
12481 }
12482
_upb_EncodeRoundTripFloat(float val,char * buf,size_t size)12483 void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) {
12484 assert(size >= kUpb_RoundTripBufferSize);
12485 snprintf(buf, size, "%.*g", FLT_DIG, val);
12486 if (strtof(buf, NULL) != val) {
12487 snprintf(buf, size, "%.*g", FLT_DIG + 3, val);
12488 assert(strtof(buf, NULL) == val);
12489 }
12490 upb_FixLocale(buf);
12491 }
12492
12493 /** upb/port_undef.inc ************************************************************/
12494 /* See port_def.inc. This should #undef all macros #defined there. */
12495
12496 #undef UPB_SIZE
12497 #undef UPB_PTR_AT
12498 #undef UPB_READ_ONEOF
12499 #undef UPB_WRITE_ONEOF
12500 #undef UPB_MAPTYPE_STRING
12501 #undef UPB_INLINE
12502 #undef UPB_ALIGN_UP
12503 #undef UPB_ALIGN_DOWN
12504 #undef UPB_ALIGN_MALLOC
12505 #undef UPB_ALIGN_OF
12506 #undef UPB_MALLOC_ALIGN
12507 #undef UPB_LIKELY
12508 #undef UPB_UNLIKELY
12509 #undef UPB_FORCEINLINE
12510 #undef UPB_NOINLINE
12511 #undef UPB_NORETURN
12512 #undef UPB_PRINTF
12513 #undef UPB_MAX
12514 #undef UPB_MIN
12515 #undef UPB_UNUSED
12516 #undef UPB_ASSUME
12517 #undef UPB_ASSERT
12518 #undef UPB_UNREACHABLE
12519 #undef UPB_SETJMP
12520 #undef UPB_LONGJMP
12521 #undef UPB_PTRADD
12522 #undef UPB_MUSTTAIL
12523 #undef UPB_FASTTABLE_SUPPORTED
12524 #undef UPB_FASTTABLE
12525 #undef UPB_FASTTABLE_INIT
12526 #undef UPB_POISON_MEMORY_REGION
12527 #undef UPB_UNPOISON_MEMORY_REGION
12528 #undef UPB_ASAN
12529 #undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3
12530