xref: /aosp_15_r20/external/protobuf/ruby/ext/google/protobuf_c/ruby-upb.c (revision 1b3f573f81763fcece89efc2b6a5209149e44ab8)
1 /* Amalgamated source file */
2 #include "ruby-upb.h"
3 /*
4  * Copyright (c) 2009-2021, Google LLC
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *     * Redistributions of source code must retain the above copyright
10  *       notice, this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above copyright
12  *       notice, this list of conditions and the following disclaimer in the
13  *       documentation and/or other materials provided with the distribution.
14  *     * Neither the name of Google LLC nor the
15  *       names of its contributors may be used to endorse or promote products
16  *       derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This is where we define macros used across upb.
32  *
33  * All of these macros are undef'd in port_undef.inc to avoid leaking them to
34  * users.
35  *
36  * The correct usage is:
37  *
38  *   #include "upb/foobar.h"
39  *   #include "upb/baz.h"
40  *
41  *   // MUST be last included header.
42  *   #include "upb/port_def.inc"
43  *
44  *   // Code for this file.
45  *   // <...>
46  *
47  *   // Can be omitted for .c files, required for .h.
48  *   #include "upb/port_undef.inc"
49  *
50  * This file is private and must not be included by users!
51  */
52 
53 #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
54       (defined(__cplusplus) && __cplusplus >= 201103L) ||           \
55       (defined(_MSC_VER) && _MSC_VER >= 1900))
56 #error upb requires C99 or C++11 or MSVC >= 2015.
57 #endif
58 
59 #include <stdint.h>
60 #include <stddef.h>
61 
62 #if UINTPTR_MAX == 0xffffffff
63 #define UPB_SIZE(size32, size64) size32
64 #else
65 #define UPB_SIZE(size32, size64) size64
66 #endif
67 
68 /* If we always read/write as a consistent type to each address, this shouldn't
69  * violate aliasing.
70  */
71 #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
72 
73 #define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
74   *UPB_PTR_AT(msg, case_offset, int) == case_val                              \
75       ? *UPB_PTR_AT(msg, offset, fieldtype)                                   \
76       : default
77 
78 #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
79   *UPB_PTR_AT(msg, case_offset, int) = case_val;                             \
80   *UPB_PTR_AT(msg, offset, fieldtype) = value;
81 
82 #define UPB_MAPTYPE_STRING 0
83 
84 /* UPB_INLINE: inline if possible, emit standalone code if required. */
85 #ifdef __cplusplus
86 #define UPB_INLINE inline
87 #elif defined (__GNUC__) || defined(__clang__)
88 #define UPB_INLINE static __inline__
89 #else
90 #define UPB_INLINE static
91 #endif
92 
93 #define UPB_MALLOC_ALIGN 8
94 #define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
95 #define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
96 #define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, UPB_MALLOC_ALIGN)
97 #define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
98 
99 // Hints to the compiler about likely/unlikely branches.
100 #if defined (__GNUC__) || defined(__clang__)
101 #define UPB_LIKELY(x) __builtin_expect((bool)(x), 1)
102 #define UPB_UNLIKELY(x) __builtin_expect((bool)(x), 0)
103 #else
104 #define UPB_LIKELY(x) (x)
105 #define UPB_UNLIKELY(x) (x)
106 #endif
107 
108 // Macros for function attributes on compilers that support them.
109 #ifdef __GNUC__
110 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
111 #define UPB_NOINLINE __attribute__((noinline))
112 #define UPB_NORETURN __attribute__((__noreturn__))
113 #define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
114 #elif defined(_MSC_VER)
115 #define UPB_NOINLINE
116 #define UPB_FORCEINLINE
117 #define UPB_NORETURN __declspec(noreturn)
118 #define UPB_PRINTF(str, first_vararg)
119 #else  /* !defined(__GNUC__) */
120 #define UPB_FORCEINLINE
121 #define UPB_NOINLINE
122 #define UPB_NORETURN
123 #define UPB_PRINTF(str, first_vararg)
124 #endif
125 
126 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
127 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
128 
129 #define UPB_UNUSED(var) (void)var
130 
131 // UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
132 #ifdef NDEBUG
133 #ifdef __GNUC__
134 #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
135 #elif defined _MSC_VER
136 #define UPB_ASSUME(expr) if (!(expr)) __assume(0)
137 #else
138 #define UPB_ASSUME(expr) do {} while (false && (expr))
139 #endif
140 #else
141 #define UPB_ASSUME(expr) assert(expr)
142 #endif
143 
144 /* UPB_ASSERT(): in release mode, we use the expression without letting it be
145  * evaluated.  This prevents "unused variable" warnings. */
146 #ifdef NDEBUG
147 #define UPB_ASSERT(expr) do {} while (false && (expr))
148 #else
149 #define UPB_ASSERT(expr) assert(expr)
150 #endif
151 
152 #if defined(__GNUC__) || defined(__clang__)
153 #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
154 #else
155 #define UPB_UNREACHABLE() do { assert(0); } while(0)
156 #endif
157 
158 /* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
159 #ifdef __APPLE__
160 #define UPB_SETJMP(buf) _setjmp(buf)
161 #define UPB_LONGJMP(buf, val) _longjmp(buf, val)
162 #else
163 #define UPB_SETJMP(buf) setjmp(buf)
164 #define UPB_LONGJMP(buf, val) longjmp(buf, val)
165 #endif
166 
167 /* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */
168 #define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr))
169 
170 /* Configure whether fasttable is switched on or not. *************************/
171 
172 #ifdef __has_attribute
173 #define UPB_HAS_ATTRIBUTE(x) __has_attribute(x)
174 #else
175 #define UPB_HAS_ATTRIBUTE(x) 0
176 #endif
177 
178 #if UPB_HAS_ATTRIBUTE(musttail)
179 #define UPB_MUSTTAIL __attribute__((musttail))
180 #else
181 #define UPB_MUSTTAIL
182 #endif
183 
184 #undef UPB_HAS_ATTRIBUTE
185 
186 /* This check is not fully robust: it does not require that we have "musttail"
187  * support available. We need tail calls to avoid consuming arbitrary amounts
188  * of stack space.
189  *
190  * GCC/Clang can mostly be trusted to generate tail calls as long as
191  * optimization is enabled, but, debug builds will not generate tail calls
192  * unless "musttail" is available.
193  *
194  * We should probably either:
195  *   1. require that the compiler supports musttail.
196  *   2. add some fallback code for when musttail isn't available (ie. return
197  *      instead of tail calling). This is safe and portable, but this comes at
198  *      a CPU cost.
199  */
200 #if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__)
201 #define UPB_FASTTABLE_SUPPORTED 1
202 #else
203 #define UPB_FASTTABLE_SUPPORTED 0
204 #endif
205 
206 /* define UPB_ENABLE_FASTTABLE to force fast table support.
207  * This is useful when we want to ensure we are really getting fasttable,
208  * for example for testing or benchmarking. */
209 #if defined(UPB_ENABLE_FASTTABLE)
210 #if !UPB_FASTTABLE_SUPPORTED
211 #error fasttable is x86-64/ARM64 only and requires GCC or Clang.
212 #endif
213 #define UPB_FASTTABLE 1
214 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
215  * This is useful for releasing code that might be used on multiple platforms,
216  * for example the PHP or Ruby C extensions. */
217 #elif defined(UPB_TRY_ENABLE_FASTTABLE)
218 #define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
219 #else
220 #define UPB_FASTTABLE 0
221 #endif
222 
223 /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
224  * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */
225 #if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE)
226 #define UPB_FASTTABLE_INIT(...)
227 #else
228 #define UPB_FASTTABLE_INIT(...) __VA_ARGS__
229 #endif
230 
231 #undef UPB_FASTTABLE_SUPPORTED
232 
233 /* ASAN poisoning (for arena) *************************************************/
234 
235 #if defined(__SANITIZE_ADDRESS__)
236 #define UPB_ASAN 1
237 #ifdef __cplusplus
238 extern "C" {
239 #endif
240 void __asan_poison_memory_region(void const volatile *addr, size_t size);
241 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
242 #ifdef __cplusplus
243 }  /* extern "C" */
244 #endif
245 #define UPB_POISON_MEMORY_REGION(addr, size) \
246   __asan_poison_memory_region((addr), (size))
247 #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
248   __asan_unpoison_memory_region((addr), (size))
249 #else
250 #define UPB_ASAN 0
251 #define UPB_POISON_MEMORY_REGION(addr, size) \
252   ((void)(addr), (void)(size))
253 #define UPB_UNPOISON_MEMORY_REGION(addr, size) \
254   ((void)(addr), (void)(size))
255 #endif
256 
257 /* Disable proto2 arena behavior (TEMPORARY) **********************************/
258 
259 #ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
260 #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
261 #else
262 #define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
263 #endif
264 
265 /** upb/collections.c ************************************************************/
266 
267 #include <string.h>
268 
269 
270 /* Strings/bytes are special-cased in maps. */
271 static char _upb_CTypeo_mapsize[12] = {
272     0,
273     1,             /* kUpb_CType_Bool */
274     4,             /* kUpb_CType_Float */
275     4,             /* kUpb_CType_Int32 */
276     4,             /* kUpb_CType_UInt32 */
277     4,             /* kUpb_CType_Enum */
278     sizeof(void*), /* kUpb_CType_Message */
279     8,             /* kUpb_CType_Double */
280     8,             /* kUpb_CType_Int64 */
281     8,             /* kUpb_CType_UInt64 */
282     0,             /* kUpb_CType_String */
283     0,             /* kUpb_CType_Bytes */
284 };
285 
286 static const char _upb_CTypeo_sizelg2[12] = {
287     0,
288     0,              /* kUpb_CType_Bool */
289     2,              /* kUpb_CType_Float */
290     2,              /* kUpb_CType_Int32 */
291     2,              /* kUpb_CType_UInt32 */
292     2,              /* kUpb_CType_Enum */
293     UPB_SIZE(2, 3), /* kUpb_CType_Message */
294     3,              /* kUpb_CType_Double */
295     3,              /* kUpb_CType_Int64 */
296     3,              /* kUpb_CType_UInt64 */
297     UPB_SIZE(3, 4), /* kUpb_CType_String */
298     UPB_SIZE(3, 4), /* kUpb_CType_Bytes */
299 };
300 
301 /** upb_Array *****************************************************************/
302 
upb_Array_New(upb_Arena * a,upb_CType type)303 upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) {
304   return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]);
305 }
306 
upb_Array_Size(const upb_Array * arr)307 size_t upb_Array_Size(const upb_Array* arr) { return arr->len; }
308 
upb_Array_Get(const upb_Array * arr,size_t i)309 upb_MessageValue upb_Array_Get(const upb_Array* arr, size_t i) {
310   upb_MessageValue ret;
311   const char* data = _upb_array_constptr(arr);
312   int lg2 = arr->data & 7;
313   UPB_ASSERT(i < arr->len);
314   memcpy(&ret, data + (i << lg2), 1 << lg2);
315   return ret;
316 }
317 
upb_Array_Set(upb_Array * arr,size_t i,upb_MessageValue val)318 void upb_Array_Set(upb_Array* arr, size_t i, upb_MessageValue val) {
319   char* data = _upb_array_ptr(arr);
320   int lg2 = arr->data & 7;
321   UPB_ASSERT(i < arr->len);
322   memcpy(data + (i << lg2), &val, 1 << lg2);
323 }
324 
upb_Array_Append(upb_Array * arr,upb_MessageValue val,upb_Arena * arena)325 bool upb_Array_Append(upb_Array* arr, upb_MessageValue val, upb_Arena* arena) {
326   if (!upb_Array_Resize(arr, arr->len + 1, arena)) {
327     return false;
328   }
329   upb_Array_Set(arr, arr->len - 1, val);
330   return true;
331 }
332 
upb_Array_Move(upb_Array * arr,size_t dst_idx,size_t src_idx,size_t count)333 void upb_Array_Move(upb_Array* arr, size_t dst_idx, size_t src_idx,
334                     size_t count) {
335   char* data = _upb_array_ptr(arr);
336   int lg2 = arr->data & 7;
337   memmove(&data[dst_idx << lg2], &data[src_idx << lg2], count << lg2);
338 }
339 
upb_Array_Insert(upb_Array * arr,size_t i,size_t count,upb_Arena * arena)340 bool upb_Array_Insert(upb_Array* arr, size_t i, size_t count,
341                       upb_Arena* arena) {
342   UPB_ASSERT(i <= arr->len);
343   UPB_ASSERT(count + arr->len >= count);
344   size_t oldsize = arr->len;
345   if (!upb_Array_Resize(arr, arr->len + count, arena)) {
346     return false;
347   }
348   upb_Array_Move(arr, i + count, i, oldsize - i);
349   return true;
350 }
351 
352 /*
353  *              i        end      arr->len
354  * |------------|XXXXXXXX|--------|
355  */
upb_Array_Delete(upb_Array * arr,size_t i,size_t count)356 void upb_Array_Delete(upb_Array* arr, size_t i, size_t count) {
357   size_t end = i + count;
358   UPB_ASSERT(i <= end);
359   UPB_ASSERT(end <= arr->len);
360   upb_Array_Move(arr, i, end, arr->len - end);
361   arr->len -= count;
362 }
363 
upb_Array_Resize(upb_Array * arr,size_t size,upb_Arena * arena)364 bool upb_Array_Resize(upb_Array* arr, size_t size, upb_Arena* arena) {
365   return _upb_Array_Resize(arr, size, arena);
366 }
367 
368 /** upb_Map *******************************************************************/
369 
upb_Map_New(upb_Arena * a,upb_CType key_type,upb_CType value_type)370 upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) {
371   return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type],
372                       _upb_CTypeo_mapsize[value_type]);
373 }
374 
upb_Map_Size(const upb_Map * map)375 size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); }
376 
upb_Map_Get(const upb_Map * map,upb_MessageValue key,upb_MessageValue * val)377 bool upb_Map_Get(const upb_Map* map, upb_MessageValue key,
378                  upb_MessageValue* val) {
379   return _upb_Map_Get(map, &key, map->key_size, val, map->val_size);
380 }
381 
upb_Map_Clear(upb_Map * map)382 void upb_Map_Clear(upb_Map* map) { _upb_Map_Clear(map); }
383 
upb_Map_Insert(upb_Map * map,upb_MessageValue key,upb_MessageValue val,upb_Arena * arena)384 upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key,
385                                    upb_MessageValue val, upb_Arena* arena) {
386   return (upb_MapInsertStatus)_upb_Map_Insert(map, &key, map->key_size, &val,
387                                               map->val_size, arena);
388 }
389 
upb_Map_Delete(upb_Map * map,upb_MessageValue key)390 bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) {
391   return _upb_Map_Delete(map, &key, map->key_size);
392 }
393 
upb_MapIterator_Next(const upb_Map * map,size_t * iter)394 bool upb_MapIterator_Next(const upb_Map* map, size_t* iter) {
395   return _upb_map_next(map, iter);
396 }
397 
upb_MapIterator_Done(const upb_Map * map,size_t iter)398 bool upb_MapIterator_Done(const upb_Map* map, size_t iter) {
399   upb_strtable_iter i;
400   UPB_ASSERT(iter != kUpb_Map_Begin);
401   i.t = &map->table;
402   i.index = iter;
403   return upb_strtable_done(&i);
404 }
405 
406 /* Returns the key and value for this entry of the map. */
upb_MapIterator_Key(const upb_Map * map,size_t iter)407 upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter) {
408   upb_strtable_iter i;
409   upb_MessageValue ret;
410   i.t = &map->table;
411   i.index = iter;
412   _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
413   return ret;
414 }
415 
upb_MapIterator_Value(const upb_Map * map,size_t iter)416 upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter) {
417   upb_strtable_iter i;
418   upb_MessageValue ret;
419   i.t = &map->table;
420   i.index = iter;
421   _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
422   return ret;
423 }
424 
425 /* void upb_MapIterator_SetValue(upb_Map *map, size_t iter, upb_MessageValue
426  * value); */
427 
428 /** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input
429  * file:
430  *
431  *     google/protobuf/descriptor.proto
432  *
433  * Do not edit -- your changes will be discarded when the file is
434  * regenerated. */
435 
436 #include <stddef.h>
437 
438 
439 static const upb_MiniTable_Sub google_protobuf_FileDescriptorSet_submsgs[1] = {
440   {.submsg = &google_protobuf_FileDescriptorProto_msginit},
441 };
442 
443 static const upb_MiniTable_Field google_protobuf_FileDescriptorSet__fields[1] = {
444   {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
445 };
446 
447 const upb_MiniTable google_protobuf_FileDescriptorSet_msginit = {
448   &google_protobuf_FileDescriptorSet_submsgs[0],
449   &google_protobuf_FileDescriptorSet__fields[0],
450   UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
451 };
452 
453 static const upb_MiniTable_Sub google_protobuf_FileDescriptorProto_submsgs[6] = {
454   {.submsg = &google_protobuf_DescriptorProto_msginit},
455   {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
456   {.submsg = &google_protobuf_ServiceDescriptorProto_msginit},
457   {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
458   {.submsg = &google_protobuf_FileOptions_msginit},
459   {.submsg = &google_protobuf_SourceCodeInfo_msginit},
460 };
461 
462 static const upb_MiniTable_Field google_protobuf_FileDescriptorProto__fields[12] = {
463   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
464   {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
465   {3, UPB_SIZE(20, 40), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
466   {4, UPB_SIZE(24, 48), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
467   {5, UPB_SIZE(28, 56), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
468   {6, UPB_SIZE(32, 64), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
469   {7, UPB_SIZE(36, 72), UPB_SIZE(0, 0), 3, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
470   {8, UPB_SIZE(40, 80), UPB_SIZE(3, 3), 4, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
471   {9, UPB_SIZE(44, 88), UPB_SIZE(4, 4), 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
472   {10, UPB_SIZE(48, 96), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
473   {11, UPB_SIZE(52, 104), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
474   {12, UPB_SIZE(56, 112), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
475 };
476 
477 const upb_MiniTable google_protobuf_FileDescriptorProto_msginit = {
478   &google_protobuf_FileDescriptorProto_submsgs[0],
479   &google_protobuf_FileDescriptorProto__fields[0],
480   UPB_SIZE(64, 128), 12, kUpb_ExtMode_NonExtendable, 12, 255, 0,
481 };
482 
483 static const upb_MiniTable_Sub google_protobuf_DescriptorProto_submsgs[8] = {
484   {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
485   {.submsg = &google_protobuf_DescriptorProto_msginit},
486   {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
487   {.submsg = &google_protobuf_DescriptorProto_ExtensionRange_msginit},
488   {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
489   {.submsg = &google_protobuf_MessageOptions_msginit},
490   {.submsg = &google_protobuf_OneofDescriptorProto_msginit},
491   {.submsg = &google_protobuf_DescriptorProto_ReservedRange_msginit},
492 };
493 
494 static const upb_MiniTable_Field google_protobuf_DescriptorProto__fields[10] = {
495   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
496   {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
497   {3, UPB_SIZE(16, 32), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
498   {4, UPB_SIZE(20, 40), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
499   {5, UPB_SIZE(24, 48), UPB_SIZE(0, 0), 3, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
500   {6, UPB_SIZE(28, 56), UPB_SIZE(0, 0), 4, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
501   {7, UPB_SIZE(32, 64), UPB_SIZE(2, 2), 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
502   {8, UPB_SIZE(36, 72), UPB_SIZE(0, 0), 6, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
503   {9, UPB_SIZE(40, 80), UPB_SIZE(0, 0), 7, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
504   {10, UPB_SIZE(44, 88), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
505 };
506 
507 const upb_MiniTable google_protobuf_DescriptorProto_msginit = {
508   &google_protobuf_DescriptorProto_submsgs[0],
509   &google_protobuf_DescriptorProto__fields[0],
510   UPB_SIZE(48, 96), 10, kUpb_ExtMode_NonExtendable, 10, 255, 0,
511 };
512 
513 static const upb_MiniTable_Sub google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
514   {.submsg = &google_protobuf_ExtensionRangeOptions_msginit},
515 };
516 
517 static const upb_MiniTable_Field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
518   {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
519   {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
520   {3, UPB_SIZE(12, 16), UPB_SIZE(3, 3), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
521 };
522 
523 const upb_MiniTable google_protobuf_DescriptorProto_ExtensionRange_msginit = {
524   &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
525   &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
526   UPB_SIZE(16, 24), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
527 };
528 
529 static const upb_MiniTable_Field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
530   {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
531   {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
532 };
533 
534 const upb_MiniTable google_protobuf_DescriptorProto_ReservedRange_msginit = {
535   NULL,
536   &google_protobuf_DescriptorProto_ReservedRange__fields[0],
537   UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
538 };
539 
540 static const upb_MiniTable_Sub google_protobuf_ExtensionRangeOptions_submsgs[1] = {
541   {.submsg = &google_protobuf_UninterpretedOption_msginit},
542 };
543 
544 static const upb_MiniTable_Field google_protobuf_ExtensionRangeOptions__fields[1] = {
545   {999, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
546 };
547 
548 const upb_MiniTable google_protobuf_ExtensionRangeOptions_msginit = {
549   &google_protobuf_ExtensionRangeOptions_submsgs[0],
550   &google_protobuf_ExtensionRangeOptions__fields[0],
551   UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
552 };
553 
554 static const upb_MiniTable_Sub google_protobuf_FieldDescriptorProto_submsgs[3] = {
555   {.subenum = &google_protobuf_FieldDescriptorProto_Label_enuminit},
556   {.subenum = &google_protobuf_FieldDescriptorProto_Type_enuminit},
557   {.submsg = &google_protobuf_FieldOptions_msginit},
558 };
559 
560 static const upb_MiniTable_Field google_protobuf_FieldDescriptorProto__fields[11] = {
561   {1, UPB_SIZE(24, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
562   {2, UPB_SIZE(32, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
563   {3, UPB_SIZE(4, 4), UPB_SIZE(3, 3), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
564   {4, UPB_SIZE(8, 8), UPB_SIZE(4, 4), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
565   {5, UPB_SIZE(12, 12), UPB_SIZE(5, 5), 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
566   {6, UPB_SIZE(40, 56), UPB_SIZE(6, 6), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
567   {7, UPB_SIZE(48, 72), UPB_SIZE(7, 7), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
568   {8, UPB_SIZE(56, 88), UPB_SIZE(8, 8), 2, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
569   {9, UPB_SIZE(16, 16), UPB_SIZE(9, 9), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
570   {10, UPB_SIZE(60, 96), UPB_SIZE(10, 10), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
571   {17, UPB_SIZE(20, 20), UPB_SIZE(11, 11), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
572 };
573 
574 const upb_MiniTable google_protobuf_FieldDescriptorProto_msginit = {
575   &google_protobuf_FieldDescriptorProto_submsgs[0],
576   &google_protobuf_FieldDescriptorProto__fields[0],
577   UPB_SIZE(72, 112), 11, kUpb_ExtMode_NonExtendable, 10, 255, 0,
578 };
579 
580 static const upb_MiniTable_Sub google_protobuf_OneofDescriptorProto_submsgs[1] = {
581   {.submsg = &google_protobuf_OneofOptions_msginit},
582 };
583 
584 static const upb_MiniTable_Field google_protobuf_OneofDescriptorProto__fields[2] = {
585   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
586   {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
587 };
588 
589 const upb_MiniTable google_protobuf_OneofDescriptorProto_msginit = {
590   &google_protobuf_OneofDescriptorProto_submsgs[0],
591   &google_protobuf_OneofDescriptorProto__fields[0],
592   UPB_SIZE(16, 32), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
593 };
594 
595 static const upb_MiniTable_Sub google_protobuf_EnumDescriptorProto_submsgs[3] = {
596   {.submsg = &google_protobuf_EnumValueDescriptorProto_msginit},
597   {.submsg = &google_protobuf_EnumOptions_msginit},
598   {.submsg = &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit},
599 };
600 
601 static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto__fields[5] = {
602   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
603   {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
604   {3, UPB_SIZE(16, 32), UPB_SIZE(2, 2), 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
605   {4, UPB_SIZE(20, 40), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
606   {5, UPB_SIZE(24, 48), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
607 };
608 
609 const upb_MiniTable google_protobuf_EnumDescriptorProto_msginit = {
610   &google_protobuf_EnumDescriptorProto_submsgs[0],
611   &google_protobuf_EnumDescriptorProto__fields[0],
612   UPB_SIZE(32, 56), 5, kUpb_ExtMode_NonExtendable, 5, 255, 0,
613 };
614 
615 static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
616   {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
617   {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
618 };
619 
620 const upb_MiniTable google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
621   NULL,
622   &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
623   UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
624 };
625 
626 static const upb_MiniTable_Sub google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
627   {.submsg = &google_protobuf_EnumValueOptions_msginit},
628 };
629 
630 static const upb_MiniTable_Field google_protobuf_EnumValueDescriptorProto__fields[3] = {
631   {1, UPB_SIZE(8, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
632   {2, UPB_SIZE(4, 4), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
633   {3, UPB_SIZE(16, 24), UPB_SIZE(3, 3), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
634 };
635 
636 const upb_MiniTable google_protobuf_EnumValueDescriptorProto_msginit = {
637   &google_protobuf_EnumValueDescriptorProto_submsgs[0],
638   &google_protobuf_EnumValueDescriptorProto__fields[0],
639   UPB_SIZE(24, 32), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
640 };
641 
642 static const upb_MiniTable_Sub google_protobuf_ServiceDescriptorProto_submsgs[2] = {
643   {.submsg = &google_protobuf_MethodDescriptorProto_msginit},
644   {.submsg = &google_protobuf_ServiceOptions_msginit},
645 };
646 
647 static const upb_MiniTable_Field google_protobuf_ServiceDescriptorProto__fields[3] = {
648   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
649   {2, UPB_SIZE(12, 24), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
650   {3, UPB_SIZE(16, 32), UPB_SIZE(2, 2), 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
651 };
652 
653 const upb_MiniTable google_protobuf_ServiceDescriptorProto_msginit = {
654   &google_protobuf_ServiceDescriptorProto_submsgs[0],
655   &google_protobuf_ServiceDescriptorProto__fields[0],
656   UPB_SIZE(24, 40), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
657 };
658 
659 static const upb_MiniTable_Sub google_protobuf_MethodDescriptorProto_submsgs[1] = {
660   {.submsg = &google_protobuf_MethodOptions_msginit},
661 };
662 
663 static const upb_MiniTable_Field google_protobuf_MethodDescriptorProto__fields[6] = {
664   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
665   {2, UPB_SIZE(12, 24), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
666   {3, UPB_SIZE(20, 40), UPB_SIZE(3, 3), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
667   {4, UPB_SIZE(28, 56), UPB_SIZE(4, 4), 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
668   {5, UPB_SIZE(1, 1), UPB_SIZE(5, 5), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
669   {6, UPB_SIZE(2, 2), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
670 };
671 
672 const upb_MiniTable google_protobuf_MethodDescriptorProto_msginit = {
673   &google_protobuf_MethodDescriptorProto_submsgs[0],
674   &google_protobuf_MethodDescriptorProto__fields[0],
675   UPB_SIZE(32, 64), 6, kUpb_ExtMode_NonExtendable, 6, 255, 0,
676 };
677 
678 static const upb_MiniTable_Sub google_protobuf_FileOptions_submsgs[2] = {
679   {.subenum = &google_protobuf_FileOptions_OptimizeMode_enuminit},
680   {.submsg = &google_protobuf_UninterpretedOption_msginit},
681 };
682 
683 static const upb_MiniTable_Field google_protobuf_FileOptions__fields[21] = {
684   {1, UPB_SIZE(20, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
685   {8, UPB_SIZE(28, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
686   {9, UPB_SIZE(4, 4), UPB_SIZE(3, 3), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
687   {10, UPB_SIZE(8, 8), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
688   {11, UPB_SIZE(36, 56), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
689   {16, UPB_SIZE(9, 9), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
690   {17, UPB_SIZE(10, 10), UPB_SIZE(7, 7), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
691   {18, UPB_SIZE(11, 11), UPB_SIZE(8, 8), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
692   {20, UPB_SIZE(12, 12), UPB_SIZE(9, 9), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
693   {23, UPB_SIZE(13, 13), UPB_SIZE(10, 10), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
694   {27, UPB_SIZE(14, 14), UPB_SIZE(11, 11), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
695   {31, UPB_SIZE(15, 15), UPB_SIZE(12, 12), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
696   {36, UPB_SIZE(44, 72), UPB_SIZE(13, 13), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
697   {37, UPB_SIZE(52, 88), UPB_SIZE(14, 14), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
698   {39, UPB_SIZE(60, 104), UPB_SIZE(15, 15), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
699   {40, UPB_SIZE(68, 120), UPB_SIZE(16, 16), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
700   {41, UPB_SIZE(76, 136), UPB_SIZE(17, 17), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
701   {42, UPB_SIZE(16, 16), UPB_SIZE(18, 18), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
702   {44, UPB_SIZE(84, 152), UPB_SIZE(19, 19), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
703   {45, UPB_SIZE(92, 168), UPB_SIZE(20, 20), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
704   {999, UPB_SIZE(100, 184), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
705 };
706 
707 const upb_MiniTable google_protobuf_FileOptions_msginit = {
708   &google_protobuf_FileOptions_submsgs[0],
709   &google_protobuf_FileOptions__fields[0],
710   UPB_SIZE(104, 192), 21, kUpb_ExtMode_Extendable, 1, 255, 0,
711 };
712 
713 static const upb_MiniTable_Sub google_protobuf_MessageOptions_submsgs[1] = {
714   {.submsg = &google_protobuf_UninterpretedOption_msginit},
715 };
716 
717 static const upb_MiniTable_Field google_protobuf_MessageOptions__fields[5] = {
718   {1, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
719   {2, UPB_SIZE(2, 2), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
720   {3, UPB_SIZE(3, 3), UPB_SIZE(3, 3), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
721   {7, UPB_SIZE(4, 4), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
722   {999, UPB_SIZE(8, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
723 };
724 
725 const upb_MiniTable google_protobuf_MessageOptions_msginit = {
726   &google_protobuf_MessageOptions_submsgs[0],
727   &google_protobuf_MessageOptions__fields[0],
728   UPB_SIZE(16, 16), 5, kUpb_ExtMode_Extendable, 3, 255, 0,
729 };
730 
731 static const upb_MiniTable_Sub google_protobuf_FieldOptions_submsgs[3] = {
732   {.subenum = &google_protobuf_FieldOptions_CType_enuminit},
733   {.subenum = &google_protobuf_FieldOptions_JSType_enuminit},
734   {.submsg = &google_protobuf_UninterpretedOption_msginit},
735 };
736 
737 static const upb_MiniTable_Field google_protobuf_FieldOptions__fields[8] = {
738   {1, UPB_SIZE(4, 4), UPB_SIZE(1, 1), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
739   {2, UPB_SIZE(8, 8), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
740   {3, UPB_SIZE(9, 9), UPB_SIZE(3, 3), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
741   {5, UPB_SIZE(10, 10), UPB_SIZE(4, 4), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
742   {6, UPB_SIZE(12, 12), UPB_SIZE(5, 5), 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
743   {10, UPB_SIZE(16, 16), UPB_SIZE(6, 6), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
744   {15, UPB_SIZE(17, 17), UPB_SIZE(7, 7), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
745   {999, UPB_SIZE(20, 24), UPB_SIZE(0, 0), 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
746 };
747 
748 const upb_MiniTable google_protobuf_FieldOptions_msginit = {
749   &google_protobuf_FieldOptions_submsgs[0],
750   &google_protobuf_FieldOptions__fields[0],
751   UPB_SIZE(24, 32), 8, kUpb_ExtMode_Extendable, 3, 255, 0,
752 };
753 
754 static const upb_MiniTable_Sub google_protobuf_OneofOptions_submsgs[1] = {
755   {.submsg = &google_protobuf_UninterpretedOption_msginit},
756 };
757 
758 static const upb_MiniTable_Field google_protobuf_OneofOptions__fields[1] = {
759   {999, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
760 };
761 
762 const upb_MiniTable google_protobuf_OneofOptions_msginit = {
763   &google_protobuf_OneofOptions_submsgs[0],
764   &google_protobuf_OneofOptions__fields[0],
765   UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
766 };
767 
768 static const upb_MiniTable_Sub google_protobuf_EnumOptions_submsgs[1] = {
769   {.submsg = &google_protobuf_UninterpretedOption_msginit},
770 };
771 
772 static const upb_MiniTable_Field google_protobuf_EnumOptions__fields[3] = {
773   {2, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
774   {3, UPB_SIZE(2, 2), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
775   {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
776 };
777 
778 const upb_MiniTable google_protobuf_EnumOptions_msginit = {
779   &google_protobuf_EnumOptions_submsgs[0],
780   &google_protobuf_EnumOptions__fields[0],
781   UPB_SIZE(8, 16), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
782 };
783 
784 static const upb_MiniTable_Sub google_protobuf_EnumValueOptions_submsgs[1] = {
785   {.submsg = &google_protobuf_UninterpretedOption_msginit},
786 };
787 
788 static const upb_MiniTable_Field google_protobuf_EnumValueOptions__fields[2] = {
789   {1, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
790   {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
791 };
792 
793 const upb_MiniTable google_protobuf_EnumValueOptions_msginit = {
794   &google_protobuf_EnumValueOptions_submsgs[0],
795   &google_protobuf_EnumValueOptions__fields[0],
796   UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 1, 255, 0,
797 };
798 
799 static const upb_MiniTable_Sub google_protobuf_ServiceOptions_submsgs[1] = {
800   {.submsg = &google_protobuf_UninterpretedOption_msginit},
801 };
802 
803 static const upb_MiniTable_Field google_protobuf_ServiceOptions__fields[2] = {
804   {33, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
805   {999, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
806 };
807 
808 const upb_MiniTable google_protobuf_ServiceOptions_msginit = {
809   &google_protobuf_ServiceOptions_submsgs[0],
810   &google_protobuf_ServiceOptions__fields[0],
811   UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 0, 255, 0,
812 };
813 
814 static const upb_MiniTable_Sub google_protobuf_MethodOptions_submsgs[2] = {
815   {.subenum = &google_protobuf_MethodOptions_IdempotencyLevel_enuminit},
816   {.submsg = &google_protobuf_UninterpretedOption_msginit},
817 };
818 
819 static const upb_MiniTable_Field google_protobuf_MethodOptions__fields[3] = {
820   {33, UPB_SIZE(1, 1), UPB_SIZE(1, 1), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
821   {34, UPB_SIZE(4, 4), UPB_SIZE(2, 2), 0, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
822   {999, UPB_SIZE(8, 8), UPB_SIZE(0, 0), 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
823 };
824 
825 const upb_MiniTable google_protobuf_MethodOptions_msginit = {
826   &google_protobuf_MethodOptions_submsgs[0],
827   &google_protobuf_MethodOptions__fields[0],
828   UPB_SIZE(16, 16), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
829 };
830 
831 static const upb_MiniTable_Sub google_protobuf_UninterpretedOption_submsgs[1] = {
832   {.submsg = &google_protobuf_UninterpretedOption_NamePart_msginit},
833 };
834 
835 static const upb_MiniTable_Field google_protobuf_UninterpretedOption__fields[7] = {
836   {2, UPB_SIZE(4, 8), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
837   {3, UPB_SIZE(8, 16), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
838   {4, UPB_SIZE(32, 64), UPB_SIZE(2, 2), kUpb_NoSub, 4, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
839   {5, UPB_SIZE(40, 72), UPB_SIZE(3, 3), kUpb_NoSub, 3, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
840   {6, UPB_SIZE(48, 80), UPB_SIZE(4, 4), kUpb_NoSub, 1, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
841   {7, UPB_SIZE(16, 32), UPB_SIZE(5, 5), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
842   {8, UPB_SIZE(24, 48), UPB_SIZE(6, 6), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
843 };
844 
845 const upb_MiniTable google_protobuf_UninterpretedOption_msginit = {
846   &google_protobuf_UninterpretedOption_submsgs[0],
847   &google_protobuf_UninterpretedOption__fields[0],
848   UPB_SIZE(56, 88), 7, kUpb_ExtMode_NonExtendable, 0, 255, 0,
849 };
850 
851 static const upb_MiniTable_Field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
852   {1, UPB_SIZE(4, 8), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
853   {2, UPB_SIZE(1, 1), UPB_SIZE(2, 2), kUpb_NoSub, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
854 };
855 
856 const upb_MiniTable google_protobuf_UninterpretedOption_NamePart_msginit = {
857   NULL,
858   &google_protobuf_UninterpretedOption_NamePart__fields[0],
859   UPB_SIZE(16, 24), 2, kUpb_ExtMode_NonExtendable, 2, 255, 2,
860 };
861 
862 static const upb_MiniTable_Sub google_protobuf_SourceCodeInfo_submsgs[1] = {
863   {.submsg = &google_protobuf_SourceCodeInfo_Location_msginit},
864 };
865 
866 static const upb_MiniTable_Field google_protobuf_SourceCodeInfo__fields[1] = {
867   {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
868 };
869 
870 const upb_MiniTable google_protobuf_SourceCodeInfo_msginit = {
871   &google_protobuf_SourceCodeInfo_submsgs[0],
872   &google_protobuf_SourceCodeInfo__fields[0],
873   UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
874 };
875 
876 static const upb_MiniTable_Field google_protobuf_SourceCodeInfo_Location__fields[5] = {
877   {1, UPB_SIZE(4, 8), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
878   {2, UPB_SIZE(8, 16), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
879   {3, UPB_SIZE(12, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
880   {4, UPB_SIZE(20, 40), UPB_SIZE(2, 2), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
881   {6, UPB_SIZE(28, 56), UPB_SIZE(0, 0), kUpb_NoSub, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
882 };
883 
884 const upb_MiniTable google_protobuf_SourceCodeInfo_Location_msginit = {
885   NULL,
886   &google_protobuf_SourceCodeInfo_Location__fields[0],
887   UPB_SIZE(32, 64), 5, kUpb_ExtMode_NonExtendable, 4, 255, 0,
888 };
889 
890 static const upb_MiniTable_Sub google_protobuf_GeneratedCodeInfo_submsgs[1] = {
891   {.submsg = &google_protobuf_GeneratedCodeInfo_Annotation_msginit},
892 };
893 
894 static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo__fields[1] = {
895   {1, UPB_SIZE(0, 0), UPB_SIZE(0, 0), 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
896 };
897 
898 const upb_MiniTable google_protobuf_GeneratedCodeInfo_msginit = {
899   &google_protobuf_GeneratedCodeInfo_submsgs[0],
900   &google_protobuf_GeneratedCodeInfo__fields[0],
901   UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
902 };
903 
904 static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
905   {1, UPB_SIZE(12, 16), UPB_SIZE(0, 0), kUpb_NoSub, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
906   {2, UPB_SIZE(16, 24), UPB_SIZE(1, 1), kUpb_NoSub, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
907   {3, UPB_SIZE(4, 4), UPB_SIZE(2, 2), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
908   {4, UPB_SIZE(8, 8), UPB_SIZE(3, 3), kUpb_NoSub, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
909 };
910 
911 const upb_MiniTable google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
912   NULL,
913   &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
914   UPB_SIZE(24, 40), 4, kUpb_ExtMode_NonExtendable, 4, 255, 0,
915 };
916 
917 static const upb_MiniTable *messages_layout[27] = {
918   &google_protobuf_FileDescriptorSet_msginit,
919   &google_protobuf_FileDescriptorProto_msginit,
920   &google_protobuf_DescriptorProto_msginit,
921   &google_protobuf_DescriptorProto_ExtensionRange_msginit,
922   &google_protobuf_DescriptorProto_ReservedRange_msginit,
923   &google_protobuf_ExtensionRangeOptions_msginit,
924   &google_protobuf_FieldDescriptorProto_msginit,
925   &google_protobuf_OneofDescriptorProto_msginit,
926   &google_protobuf_EnumDescriptorProto_msginit,
927   &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
928   &google_protobuf_EnumValueDescriptorProto_msginit,
929   &google_protobuf_ServiceDescriptorProto_msginit,
930   &google_protobuf_MethodDescriptorProto_msginit,
931   &google_protobuf_FileOptions_msginit,
932   &google_protobuf_MessageOptions_msginit,
933   &google_protobuf_FieldOptions_msginit,
934   &google_protobuf_OneofOptions_msginit,
935   &google_protobuf_EnumOptions_msginit,
936   &google_protobuf_EnumValueOptions_msginit,
937   &google_protobuf_ServiceOptions_msginit,
938   &google_protobuf_MethodOptions_msginit,
939   &google_protobuf_UninterpretedOption_msginit,
940   &google_protobuf_UninterpretedOption_NamePart_msginit,
941   &google_protobuf_SourceCodeInfo_msginit,
942   &google_protobuf_SourceCodeInfo_Location_msginit,
943   &google_protobuf_GeneratedCodeInfo_msginit,
944   &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
945 };
946 
947 const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Type_enuminit = {
948     NULL,
949     0x7fffeULL,
950     0,
951 };
952 
953 const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Label_enuminit = {
954     NULL,
955     0xeULL,
956     0,
957 };
958 
959 const upb_MiniTable_Enum google_protobuf_FileOptions_OptimizeMode_enuminit = {
960     NULL,
961     0xeULL,
962     0,
963 };
964 
965 const upb_MiniTable_Enum google_protobuf_FieldOptions_CType_enuminit = {
966     NULL,
967     0x7ULL,
968     0,
969 };
970 
971 const upb_MiniTable_Enum google_protobuf_FieldOptions_JSType_enuminit = {
972     NULL,
973     0x7ULL,
974     0,
975 };
976 
977 const upb_MiniTable_Enum google_protobuf_MethodOptions_IdempotencyLevel_enuminit = {
978     NULL,
979     0x7ULL,
980     0,
981 };
982 
983 static const upb_MiniTable_Enum *enums_layout[6] = {
984   &google_protobuf_FieldDescriptorProto_Type_enuminit,
985   &google_protobuf_FieldDescriptorProto_Label_enuminit,
986   &google_protobuf_FileOptions_OptimizeMode_enuminit,
987   &google_protobuf_FieldOptions_CType_enuminit,
988   &google_protobuf_FieldOptions_JSType_enuminit,
989   &google_protobuf_MethodOptions_IdempotencyLevel_enuminit,
990 };
991 
992 const upb_MiniTable_File google_protobuf_descriptor_proto_upb_file_layout = {
993   messages_layout,
994   enums_layout,
995   NULL,
996   27,
997   6,
998   0,
999 };
1000 
1001 
1002 
1003 /** upb/decode_fast.c ************************************************************/
1004 // Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64.
1005 // Also the table size grows by 2x.
1006 //
1007 // Could potentially be ported to other 64-bit archs that pass at least six
1008 // arguments in registers and have 8 unused high bits in pointers.
1009 //
1010 // The overall design is to create specialized functions for every possible
1011 // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
1012 // to the specialized function as quickly as possible.
1013 
1014 
1015 
1016 /* Must be last. */
1017 
1018 #if UPB_FASTTABLE
1019 
1020 // The standard set of arguments passed to each parsing function.
1021 // Thanks to x86-64 calling conventions, these will stay in registers.
1022 #define UPB_PARSE_PARAMS                                             \
1023   upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
1024       uint64_t hasbits, uint64_t data
1025 
1026 #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
1027 
1028 #define RETURN_GENERIC(m)                                 \
1029   /* Uncomment either of these for debugging purposes. */ \
1030   /* fprintf(stderr, m); */                               \
1031   /*__builtin_trap(); */                                  \
1032   return fastdecode_generic(d, ptr, msg, table, hasbits, 0);
1033 
1034 typedef enum {
1035   CARD_s = 0, /* Singular (optional, non-repeated) */
1036   CARD_o = 1, /* Oneof */
1037   CARD_r = 2, /* Repeated */
1038   CARD_p = 3  /* Packed Repeated */
1039 } upb_card;
1040 
1041 UPB_NOINLINE
fastdecode_isdonefallback(UPB_PARSE_PARAMS)1042 static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
1043   int overrun = data;
1044   int status;
1045   ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
1046   if (ptr == NULL) {
1047     return fastdecode_err(d, status);
1048   }
1049   data = fastdecode_loadtag(ptr);
1050   UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
1051 }
1052 
1053 UPB_FORCEINLINE
fastdecode_dispatch(UPB_PARSE_PARAMS)1054 static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
1055   if (UPB_UNLIKELY(ptr >= d->limit_ptr)) {
1056     int overrun = ptr - d->end;
1057     if (UPB_LIKELY(overrun == d->limit)) {
1058       // Parse is finished.
1059       *(uint32_t*)msg |= hasbits;  // Sync hasbits.
1060       const upb_MiniTable* l = decode_totablep(table);
1061       return UPB_UNLIKELY(l->required_count)
1062                  ? decode_checkrequired(d, ptr, msg, l)
1063                  : ptr;
1064     } else {
1065       data = overrun;
1066       UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
1067     }
1068   }
1069 
1070   // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
1071   data = fastdecode_loadtag(ptr);
1072   UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
1073 }
1074 
1075 UPB_FORCEINLINE
fastdecode_checktag(uint16_t data,int tagbytes)1076 static bool fastdecode_checktag(uint16_t data, int tagbytes) {
1077   if (tagbytes == 1) {
1078     return (data & 0xff) == 0;
1079   } else {
1080     return data == 0;
1081   }
1082 }
1083 
1084 UPB_FORCEINLINE
fastdecode_longsize(const char * ptr,int * size)1085 static const char* fastdecode_longsize(const char* ptr, int* size) {
1086   int i;
1087   UPB_ASSERT(*size & 0x80);
1088   *size &= 0xff;
1089   for (i = 0; i < 3; i++) {
1090     ptr++;
1091     size_t byte = (uint8_t)ptr[-1];
1092     *size += (byte - 1) << (7 + 7 * i);
1093     if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
1094   }
1095   ptr++;
1096   size_t byte = (uint8_t)ptr[-1];
1097   // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
1098   // for a 32 bit varint.
1099   if (UPB_UNLIKELY(byte >= 8)) return NULL;
1100   *size += (byte - 1) << 28;
1101   return ptr;
1102 }
1103 
1104 UPB_FORCEINLINE
fastdecode_boundscheck(const char * ptr,size_t len,const char * end)1105 static bool fastdecode_boundscheck(const char* ptr, size_t len,
1106                                    const char* end) {
1107   uintptr_t uptr = (uintptr_t)ptr;
1108   uintptr_t uend = (uintptr_t)end + 16;
1109   uintptr_t res = uptr + len;
1110   return res < uptr || res > uend;
1111 }
1112 
1113 UPB_FORCEINLINE
fastdecode_boundscheck2(const char * ptr,size_t len,const char * end)1114 static bool fastdecode_boundscheck2(const char* ptr, size_t len,
1115                                     const char* end) {
1116   // This is one extra branch compared to the more normal:
1117   //   return (size_t)(end - ptr) < size;
1118   // However it is one less computation if we are just about to use "ptr + len":
1119   //   https://godbolt.org/z/35YGPz
1120   // In microbenchmarks this shows an overall 4% improvement.
1121   uintptr_t uptr = (uintptr_t)ptr;
1122   uintptr_t uend = (uintptr_t)end;
1123   uintptr_t res = uptr + len;
1124   return res < uptr || res > uend;
1125 }
1126 
1127 typedef const char* fastdecode_delimfunc(upb_Decoder* d, const char* ptr,
1128                                          void* ctx);
1129 
1130 UPB_FORCEINLINE
fastdecode_delimited(upb_Decoder * d,const char * ptr,fastdecode_delimfunc * func,void * ctx)1131 static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr,
1132                                         fastdecode_delimfunc* func, void* ctx) {
1133   ptr++;
1134   int len = (int8_t)ptr[-1];
1135   if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
1136     // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
1137     // If it exceeds the buffer limit, limit/limit_ptr will change during
1138     // sub-message parsing, so we need to preserve delta, not limit.
1139     if (UPB_UNLIKELY(len & 0x80)) {
1140       // Size varint >1 byte (length >= 128).
1141       ptr = fastdecode_longsize(ptr, &len);
1142       if (!ptr) {
1143         // Corrupt wire format: size exceeded INT_MAX.
1144         return NULL;
1145       }
1146     }
1147     if (ptr - d->end + (int)len > d->limit) {
1148       // Corrupt wire format: invalid limit.
1149       return NULL;
1150     }
1151     int delta = decode_pushlimit(d, ptr, len);
1152     ptr = func(d, ptr, ctx);
1153     decode_poplimit(d, ptr, delta);
1154   } else {
1155     // Fast case: Sub-message is <128 bytes and fits in the current buffer.
1156     // This means we can preserve limit/limit_ptr verbatim.
1157     const char* saved_limit_ptr = d->limit_ptr;
1158     int saved_limit = d->limit;
1159     d->limit_ptr = ptr + len;
1160     d->limit = d->limit_ptr - d->end;
1161     UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
1162     ptr = func(d, ptr, ctx);
1163     d->limit_ptr = saved_limit_ptr;
1164     d->limit = saved_limit;
1165     UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
1166   }
1167   return ptr;
1168 }
1169 
1170 /* singular, oneof, repeated field handling ***********************************/
1171 
1172 typedef struct {
1173   upb_Array* arr;
1174   void* end;
1175 } fastdecode_arr;
1176 
1177 typedef enum {
1178   FD_NEXT_ATLIMIT,
1179   FD_NEXT_SAMEFIELD,
1180   FD_NEXT_OTHERFIELD
1181 } fastdecode_next;
1182 
1183 typedef struct {
1184   void* dst;
1185   fastdecode_next next;
1186   uint32_t tag;
1187 } fastdecode_nextret;
1188 
1189 UPB_FORCEINLINE
fastdecode_resizearr(upb_Decoder * d,void * dst,fastdecode_arr * farr,int valbytes)1190 static void* fastdecode_resizearr(upb_Decoder* d, void* dst,
1191                                   fastdecode_arr* farr, int valbytes) {
1192   if (UPB_UNLIKELY(dst == farr->end)) {
1193     size_t old_size = farr->arr->size;
1194     size_t old_bytes = old_size * valbytes;
1195     size_t new_size = old_size * 2;
1196     size_t new_bytes = new_size * valbytes;
1197     char* old_ptr = _upb_array_ptr(farr->arr);
1198     char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes);
1199     uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
1200     farr->arr->size = new_size;
1201     farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2);
1202     dst = (void*)(new_ptr + (old_size * valbytes));
1203     farr->end = (void*)(new_ptr + (new_size * valbytes));
1204   }
1205   return dst;
1206 }
1207 
1208 UPB_FORCEINLINE
fastdecode_tagmatch(uint32_t tag,uint64_t data,int tagbytes)1209 static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
1210   if (tagbytes == 1) {
1211     return (uint8_t)tag == (uint8_t)data;
1212   } else {
1213     return (uint16_t)tag == (uint16_t)data;
1214   }
1215 }
1216 
1217 UPB_FORCEINLINE
fastdecode_commitarr(void * dst,fastdecode_arr * farr,int valbytes)1218 static void fastdecode_commitarr(void* dst, fastdecode_arr* farr,
1219                                  int valbytes) {
1220   farr->arr->len =
1221       (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes;
1222 }
1223 
1224 UPB_FORCEINLINE
fastdecode_nextrepeated(upb_Decoder * d,void * dst,const char ** ptr,fastdecode_arr * farr,uint64_t data,int tagbytes,int valbytes)1225 static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst,
1226                                                   const char** ptr,
1227                                                   fastdecode_arr* farr,
1228                                                   uint64_t data, int tagbytes,
1229                                                   int valbytes) {
1230   fastdecode_nextret ret;
1231   dst = (char*)dst + valbytes;
1232 
1233   if (UPB_LIKELY(!decode_isdone(d, ptr))) {
1234     ret.tag = fastdecode_loadtag(*ptr);
1235     if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
1236       ret.next = FD_NEXT_SAMEFIELD;
1237     } else {
1238       fastdecode_commitarr(dst, farr, valbytes);
1239       ret.next = FD_NEXT_OTHERFIELD;
1240     }
1241   } else {
1242     fastdecode_commitarr(dst, farr, valbytes);
1243     ret.next = FD_NEXT_ATLIMIT;
1244   }
1245 
1246   ret.dst = dst;
1247   return ret;
1248 }
1249 
1250 UPB_FORCEINLINE
fastdecode_fieldmem(upb_Message * msg,uint64_t data)1251 static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) {
1252   size_t ofs = data >> 48;
1253   return (char*)msg + ofs;
1254 }
1255 
1256 UPB_FORCEINLINE
fastdecode_getfield(upb_Decoder * d,const char * ptr,upb_Message * msg,uint64_t * data,uint64_t * hasbits,fastdecode_arr * farr,int valbytes,upb_card card)1257 static void* fastdecode_getfield(upb_Decoder* d, const char* ptr,
1258                                  upb_Message* msg, uint64_t* data,
1259                                  uint64_t* hasbits, fastdecode_arr* farr,
1260                                  int valbytes, upb_card card) {
1261   switch (card) {
1262     case CARD_s: {
1263       uint8_t hasbit_index = *data >> 24;
1264       // Set hasbit and return pointer to scalar field.
1265       *hasbits |= 1ull << hasbit_index;
1266       return fastdecode_fieldmem(msg, *data);
1267     }
1268     case CARD_o: {
1269       uint16_t case_ofs = *data >> 32;
1270       uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
1271       uint8_t field_number = *data >> 24;
1272       *oneof_case = field_number;
1273       return fastdecode_fieldmem(msg, *data);
1274     }
1275     case CARD_r: {
1276       // Get pointer to upb_Array and allocate/expand if necessary.
1277       uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
1278       upb_Array** arr_p = fastdecode_fieldmem(msg, *data);
1279       char* begin;
1280       *(uint32_t*)msg |= *hasbits;
1281       *hasbits = 0;
1282       if (UPB_LIKELY(!*arr_p)) {
1283         farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2);
1284         *arr_p = farr->arr;
1285       } else {
1286         farr->arr = *arr_p;
1287       }
1288       begin = _upb_array_ptr(farr->arr);
1289       farr->end = begin + (farr->arr->size * valbytes);
1290       *data = fastdecode_loadtag(ptr);
1291       return begin + (farr->arr->len * valbytes);
1292     }
1293     default:
1294       UPB_UNREACHABLE();
1295   }
1296 }
1297 
1298 UPB_FORCEINLINE
fastdecode_flippacked(uint64_t * data,int tagbytes)1299 static bool fastdecode_flippacked(uint64_t* data, int tagbytes) {
1300   *data ^= (0x2 ^ 0x0);  // Patch data to match packed wiretype.
1301   return fastdecode_checktag(*data, tagbytes);
1302 }
1303 
1304 #define FASTDECODE_CHECKPACKED(tagbytes, card, func)                \
1305   if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {         \
1306     if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \
1307       UPB_MUSTTAIL return func(UPB_PARSE_ARGS);                     \
1308     }                                                               \
1309     RETURN_GENERIC("packed check tag mismatch\n");                  \
1310   }
1311 
1312 /* varint fields **************************************************************/
1313 
1314 UPB_FORCEINLINE
fastdecode_munge(uint64_t val,int valbytes,bool zigzag)1315 static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
1316   if (valbytes == 1) {
1317     return val != 0;
1318   } else if (zigzag) {
1319     if (valbytes == 4) {
1320       uint32_t n = val;
1321       return (n >> 1) ^ -(int32_t)(n & 1);
1322     } else if (valbytes == 8) {
1323       return (val >> 1) ^ -(int64_t)(val & 1);
1324     }
1325     UPB_UNREACHABLE();
1326   }
1327   return val;
1328 }
1329 
1330 UPB_FORCEINLINE
fastdecode_varint64(const char * ptr,uint64_t * val)1331 static const char* fastdecode_varint64(const char* ptr, uint64_t* val) {
1332   ptr++;
1333   *val = (uint8_t)ptr[-1];
1334   if (UPB_UNLIKELY(*val & 0x80)) {
1335     int i;
1336     for (i = 0; i < 8; i++) {
1337       ptr++;
1338       uint64_t byte = (uint8_t)ptr[-1];
1339       *val += (byte - 1) << (7 + 7 * i);
1340       if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
1341     }
1342     ptr++;
1343     uint64_t byte = (uint8_t)ptr[-1];
1344     if (byte > 1) {
1345       return NULL;
1346     }
1347     *val += (byte - 1) << 63;
1348   }
1349 done:
1350   UPB_ASSUME(ptr != NULL);
1351   return ptr;
1352 }
1353 
1354 #define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1355                                   valbytes, card, zigzag, packed)              \
1356   uint64_t val;                                                                \
1357   void* dst;                                                                   \
1358   fastdecode_arr farr;                                                         \
1359                                                                                \
1360   FASTDECODE_CHECKPACKED(tagbytes, card, packed);                              \
1361                                                                                \
1362   dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes,     \
1363                             card);                                             \
1364   if (card == CARD_r) {                                                        \
1365     if (UPB_UNLIKELY(!dst)) {                                                  \
1366       RETURN_GENERIC("need array resize\n");                                   \
1367     }                                                                          \
1368   }                                                                            \
1369                                                                                \
1370   again:                                                                       \
1371   if (card == CARD_r) {                                                        \
1372     dst = fastdecode_resizearr(d, dst, &farr, valbytes);                       \
1373   }                                                                            \
1374                                                                                \
1375   ptr += tagbytes;                                                             \
1376   ptr = fastdecode_varint64(ptr, &val);                                        \
1377   if (ptr == NULL) return fastdecode_err(d, kUpb_DecodeStatus_Malformed);      \
1378   val = fastdecode_munge(val, valbytes, zigzag);                               \
1379   memcpy(dst, &val, valbytes);                                                 \
1380                                                                                \
1381   if (card == CARD_r) {                                                        \
1382     fastdecode_nextret ret = fastdecode_nextrepeated(                          \
1383         d, dst, &ptr, &farr, data, tagbytes, valbytes);                        \
1384     switch (ret.next) {                                                        \
1385       case FD_NEXT_SAMEFIELD:                                                  \
1386         dst = ret.dst;                                                         \
1387         goto again;                                                            \
1388       case FD_NEXT_OTHERFIELD:                                                 \
1389         data = ret.tag;                                                        \
1390         UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);            \
1391       case FD_NEXT_ATLIMIT:                                                    \
1392         return ptr;                                                            \
1393     }                                                                          \
1394   }                                                                            \
1395                                                                                \
1396   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1397 
1398 typedef struct {
1399   uint8_t valbytes;
1400   bool zigzag;
1401   void* dst;
1402   fastdecode_arr farr;
1403 } fastdecode_varintdata;
1404 
1405 UPB_FORCEINLINE
fastdecode_topackedvarint(upb_Decoder * d,const char * ptr,void * ctx)1406 static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr,
1407                                              void* ctx) {
1408   fastdecode_varintdata* data = ctx;
1409   void* dst = data->dst;
1410   uint64_t val;
1411 
1412   while (!decode_isdone(d, &ptr)) {
1413     dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
1414     ptr = fastdecode_varint64(ptr, &val);
1415     if (ptr == NULL) return NULL;
1416     val = fastdecode_munge(val, data->valbytes, data->zigzag);
1417     memcpy(dst, &val, data->valbytes);
1418     dst = (char*)dst + data->valbytes;
1419   }
1420 
1421   fastdecode_commitarr(dst, &data->farr, data->valbytes);
1422   return ptr;
1423 }
1424 
1425 #define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1426                                 valbytes, zigzag, unpacked)                  \
1427   fastdecode_varintdata ctx = {valbytes, zigzag};                            \
1428                                                                              \
1429   FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked);                        \
1430                                                                              \
1431   ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr,     \
1432                                 valbytes, CARD_r);                           \
1433   if (UPB_UNLIKELY(!ctx.dst)) {                                              \
1434     RETURN_GENERIC("need array resize\n");                                   \
1435   }                                                                          \
1436                                                                              \
1437   ptr += tagbytes;                                                           \
1438   ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx);      \
1439                                                                              \
1440   if (UPB_UNLIKELY(ptr == NULL)) {                                           \
1441     return fastdecode_err(d, kUpb_DecodeStatus_Malformed);                   \
1442   }                                                                          \
1443                                                                              \
1444   UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0);
1445 
1446 #define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes,     \
1447                           valbytes, card, zigzag, unpacked, packed)        \
1448   if (card == CARD_p) {                                                    \
1449     FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes,   \
1450                             valbytes, zigzag, unpacked);                   \
1451   } else {                                                                 \
1452     FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
1453                               valbytes, card, zigzag, packed);             \
1454   }
1455 
1456 #define z_ZZ true
1457 #define b_ZZ false
1458 #define v_ZZ false
1459 
1460 /* Generate all combinations:
1461  * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
1462 
1463 #define F(card, type, valbytes, tagbytes)                                      \
1464   UPB_NOINLINE                                                                 \
1465   const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1466     FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes,   \
1467                       CARD_##card, type##_ZZ,                                  \
1468                       upb_pr##type##valbytes##_##tagbytes##bt,                 \
1469                       upb_pp##type##valbytes##_##tagbytes##bt);                \
1470   }
1471 
1472 #define TYPES(card, tagbytes) \
1473   F(card, b, 1, tagbytes)     \
1474   F(card, v, 4, tagbytes)     \
1475   F(card, v, 8, tagbytes)     \
1476   F(card, z, 4, tagbytes)     \
1477   F(card, z, 8, tagbytes)
1478 
1479 #define TAGBYTES(card) \
1480   TYPES(card, 1)       \
1481   TYPES(card, 2)
1482 
1483 TAGBYTES(s)
1484 TAGBYTES(o)
1485 TAGBYTES(r)
1486 TAGBYTES(p)
1487 
1488 #undef z_ZZ
1489 #undef b_ZZ
1490 #undef v_ZZ
1491 #undef o_ONEOF
1492 #undef s_ONEOF
1493 #undef r_ONEOF
1494 #undef F
1495 #undef TYPES
1496 #undef TAGBYTES
1497 #undef FASTDECODE_UNPACKEDVARINT
1498 #undef FASTDECODE_PACKEDVARINT
1499 #undef FASTDECODE_VARINT
1500 
1501 /* fixed fields ***************************************************************/
1502 
1503 #define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1504                                  valbytes, card, packed)                      \
1505   void* dst;                                                                  \
1506   fastdecode_arr farr;                                                        \
1507                                                                               \
1508   FASTDECODE_CHECKPACKED(tagbytes, card, packed)                              \
1509                                                                               \
1510   dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes,    \
1511                             card);                                            \
1512   if (card == CARD_r) {                                                       \
1513     if (UPB_UNLIKELY(!dst)) {                                                 \
1514       RETURN_GENERIC("couldn't allocate array in arena\n");                   \
1515     }                                                                         \
1516   }                                                                           \
1517                                                                               \
1518   again:                                                                      \
1519   if (card == CARD_r) {                                                       \
1520     dst = fastdecode_resizearr(d, dst, &farr, valbytes);                      \
1521   }                                                                           \
1522                                                                               \
1523   ptr += tagbytes;                                                            \
1524   memcpy(dst, ptr, valbytes);                                                 \
1525   ptr += valbytes;                                                            \
1526                                                                               \
1527   if (card == CARD_r) {                                                       \
1528     fastdecode_nextret ret = fastdecode_nextrepeated(                         \
1529         d, dst, &ptr, &farr, data, tagbytes, valbytes);                       \
1530     switch (ret.next) {                                                       \
1531       case FD_NEXT_SAMEFIELD:                                                 \
1532         dst = ret.dst;                                                        \
1533         goto again;                                                           \
1534       case FD_NEXT_OTHERFIELD:                                                \
1535         data = ret.tag;                                                       \
1536         UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);           \
1537       case FD_NEXT_ATLIMIT:                                                   \
1538         return ptr;                                                           \
1539     }                                                                         \
1540   }                                                                           \
1541                                                                               \
1542   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1543 
1544 #define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1545                                valbytes, unpacked)                          \
1546   FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked)                        \
1547                                                                             \
1548   ptr += tagbytes;                                                          \
1549   int size = (uint8_t)ptr[0];                                               \
1550   ptr++;                                                                    \
1551   if (size & 0x80) {                                                        \
1552     ptr = fastdecode_longsize(ptr, &size);                                  \
1553   }                                                                         \
1554                                                                             \
1555   if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) ||       \
1556                    (size % valbytes) != 0)) {                               \
1557     return fastdecode_err(d, kUpb_DecodeStatus_Malformed);                  \
1558   }                                                                         \
1559                                                                             \
1560   upb_Array** arr_p = fastdecode_fieldmem(msg, data);                       \
1561   upb_Array* arr = *arr_p;                                                  \
1562   uint8_t elem_size_lg2 = __builtin_ctz(valbytes);                          \
1563   int elems = size / valbytes;                                              \
1564                                                                             \
1565   if (UPB_LIKELY(!arr)) {                                                   \
1566     *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2);         \
1567     if (!arr) {                                                             \
1568       return fastdecode_err(d, kUpb_DecodeStatus_Malformed);                \
1569     }                                                                       \
1570   } else {                                                                  \
1571     _upb_Array_Resize(arr, elems, &d->arena);                               \
1572   }                                                                         \
1573                                                                             \
1574   char* dst = _upb_array_ptr(arr);                                          \
1575   memcpy(dst, ptr, size);                                                   \
1576   arr->len = elems;                                                         \
1577                                                                             \
1578   ptr += size;                                                              \
1579   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1580 
1581 #define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes,     \
1582                          valbytes, card, unpacked, packed)                \
1583   if (card == CARD_p) {                                                   \
1584     FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes,   \
1585                            valbytes, unpacked);                           \
1586   } else {                                                                \
1587     FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
1588                              valbytes, card, packed);                     \
1589   }
1590 
1591 /* Generate all combinations:
1592  * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
1593 
1594 #define F(card, valbytes, tagbytes)                                         \
1595   UPB_NOINLINE                                                              \
1596   const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
1597     FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
1598                      CARD_##card, upb_ppf##valbytes##_##tagbytes##bt,       \
1599                      upb_prf##valbytes##_##tagbytes##bt);                   \
1600   }
1601 
1602 #define TYPES(card, tagbytes) \
1603   F(card, 4, tagbytes)        \
1604   F(card, 8, tagbytes)
1605 
1606 #define TAGBYTES(card) \
1607   TYPES(card, 1)       \
1608   TYPES(card, 2)
1609 
1610 TAGBYTES(s)
1611 TAGBYTES(o)
1612 TAGBYTES(r)
1613 TAGBYTES(p)
1614 
1615 #undef F
1616 #undef TYPES
1617 #undef TAGBYTES
1618 #undef FASTDECODE_UNPACKEDFIXED
1619 #undef FASTDECODE_PACKEDFIXED
1620 
1621 /* string fields **************************************************************/
1622 
1623 typedef const char* fastdecode_copystr_func(struct upb_Decoder* d,
1624                                             const char* ptr, upb_Message* msg,
1625                                             const upb_MiniTable* table,
1626                                             uint64_t hasbits,
1627                                             upb_StringView* dst);
1628 
1629 UPB_NOINLINE
fastdecode_verifyutf8(upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1630 static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
1631                                          upb_Message* msg, intptr_t table,
1632                                          uint64_t hasbits, uint64_t data) {
1633   upb_StringView* dst = (upb_StringView*)data;
1634   if (!decode_verifyutf8_inl(dst->data, dst->size)) {
1635     return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8);
1636   }
1637   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1638 }
1639 
1640 #define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
1641   int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */               \
1642   ptr++;                                                                       \
1643   if (size & 0x80) {                                                           \
1644     ptr = fastdecode_longsize(ptr, &size);                                     \
1645   }                                                                            \
1646                                                                                \
1647   if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) {         \
1648     dst->size = 0;                                                             \
1649     return fastdecode_err(d, kUpb_DecodeStatus_Malformed);                     \
1650   }                                                                            \
1651                                                                                \
1652   if (d->options & kUpb_DecodeOption_AliasString) {                            \
1653     dst->data = ptr;                                                           \
1654     dst->size = size;                                                          \
1655   } else {                                                                     \
1656     char* data = upb_Arena_Malloc(&d->arena, size);                            \
1657     if (!data) {                                                               \
1658       return fastdecode_err(d, kUpb_DecodeStatus_OutOfMemory);                 \
1659     }                                                                          \
1660     memcpy(data, ptr, size);                                                   \
1661     dst->data = data;                                                          \
1662     dst->size = size;                                                          \
1663   }                                                                            \
1664                                                                                \
1665   ptr += size;                                                                 \
1666   if (validate_utf8) {                                                         \
1667     data = (uint64_t)dst;                                                      \
1668     UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS);                 \
1669   } else {                                                                     \
1670     UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);                   \
1671   }
1672 
1673 UPB_NOINLINE
fastdecode_longstring_utf8(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1674 static const char* fastdecode_longstring_utf8(struct upb_Decoder* d,
1675                                               const char* ptr, upb_Message* msg,
1676                                               intptr_t table, uint64_t hasbits,
1677                                               uint64_t data) {
1678   upb_StringView* dst = (upb_StringView*)data;
1679   FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true);
1680 }
1681 
1682 UPB_NOINLINE
fastdecode_longstring_noutf8(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)1683 static const char* fastdecode_longstring_noutf8(
1684     struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
1685     uint64_t hasbits, uint64_t data) {
1686   upb_StringView* dst = (upb_StringView*)data;
1687   FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false);
1688 }
1689 
1690 UPB_FORCEINLINE
fastdecode_docopy(upb_Decoder * d,const char * ptr,uint32_t size,int copy,char * data,upb_StringView * dst)1691 static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
1692                               int copy, char* data, upb_StringView* dst) {
1693   d->arena.head.ptr += copy;
1694   dst->data = data;
1695   UPB_UNPOISON_MEMORY_REGION(data, copy);
1696   memcpy(data, ptr, copy);
1697   UPB_POISON_MEMORY_REGION(data + size, copy - size);
1698 }
1699 
1700 #define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes,    \
1701                               card, validate_utf8)                            \
1702   upb_StringView* dst;                                                        \
1703   fastdecode_arr farr;                                                        \
1704   int64_t size;                                                               \
1705   size_t arena_has;                                                           \
1706   size_t common_has;                                                          \
1707   char* buf;                                                                  \
1708                                                                               \
1709   UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0);              \
1710   UPB_ASSERT(fastdecode_checktag(data, tagbytes));                            \
1711                                                                               \
1712   dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,              \
1713                             sizeof(upb_StringView), card);                    \
1714                                                                               \
1715   again:                                                                      \
1716   if (card == CARD_r) {                                                       \
1717     dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView));        \
1718   }                                                                           \
1719                                                                               \
1720   size = (uint8_t)ptr[tagbytes];                                              \
1721   ptr += tagbytes + 1;                                                        \
1722   dst->size = size;                                                           \
1723                                                                               \
1724   buf = d->arena.head.ptr;                                                    \
1725   arena_has = _upb_ArenaHas(&d->arena);                                       \
1726   common_has = UPB_MIN(arena_has, (d->end - ptr) + 16);                       \
1727                                                                               \
1728   if (UPB_LIKELY(size <= 15 - tagbytes)) {                                    \
1729     if (arena_has < 16) goto longstr;                                         \
1730     d->arena.head.ptr += 16;                                                  \
1731     memcpy(buf, ptr - tagbytes - 1, 16);                                      \
1732     dst->data = buf + tagbytes + 1;                                           \
1733   } else if (UPB_LIKELY(size <= 32)) {                                        \
1734     if (UPB_UNLIKELY(common_has < 32)) goto longstr;                          \
1735     fastdecode_docopy(d, ptr, size, 32, buf, dst);                            \
1736   } else if (UPB_LIKELY(size <= 64)) {                                        \
1737     if (UPB_UNLIKELY(common_has < 64)) goto longstr;                          \
1738     fastdecode_docopy(d, ptr, size, 64, buf, dst);                            \
1739   } else if (UPB_LIKELY(size < 128)) {                                        \
1740     if (UPB_UNLIKELY(common_has < 128)) goto longstr;                         \
1741     fastdecode_docopy(d, ptr, size, 128, buf, dst);                           \
1742   } else {                                                                    \
1743     goto longstr;                                                             \
1744   }                                                                           \
1745                                                                               \
1746   ptr += size;                                                                \
1747                                                                               \
1748   if (card == CARD_r) {                                                       \
1749     if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) {      \
1750       return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8);                    \
1751     }                                                                         \
1752     fastdecode_nextret ret = fastdecode_nextrepeated(                         \
1753         d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView));         \
1754     switch (ret.next) {                                                       \
1755       case FD_NEXT_SAMEFIELD:                                                 \
1756         dst = ret.dst;                                                        \
1757         goto again;                                                           \
1758       case FD_NEXT_OTHERFIELD:                                                \
1759         data = ret.tag;                                                       \
1760         UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);           \
1761       case FD_NEXT_ATLIMIT:                                                   \
1762         return ptr;                                                           \
1763     }                                                                         \
1764   }                                                                           \
1765                                                                               \
1766   if (card != CARD_r && validate_utf8) {                                      \
1767     data = (uint64_t)dst;                                                     \
1768     UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS);                \
1769   }                                                                           \
1770                                                                               \
1771   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);                    \
1772                                                                               \
1773   longstr:                                                                    \
1774   if (card == CARD_r) {                                                       \
1775     fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView));             \
1776   }                                                                           \
1777   ptr--;                                                                      \
1778   if (validate_utf8) {                                                        \
1779     UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table,        \
1780                                                    hasbits, (uint64_t)dst);   \
1781   } else {                                                                    \
1782     UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table,      \
1783                                                      hasbits, (uint64_t)dst); \
1784   }
1785 
1786 #define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card,   \
1787                           copyfunc, validate_utf8)                             \
1788   upb_StringView* dst;                                                         \
1789   fastdecode_arr farr;                                                         \
1790   int64_t size;                                                                \
1791                                                                                \
1792   if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {                    \
1793     RETURN_GENERIC("string field tag mismatch\n");                             \
1794   }                                                                            \
1795                                                                                \
1796   if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) {       \
1797     UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS);                              \
1798   }                                                                            \
1799                                                                                \
1800   dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,               \
1801                             sizeof(upb_StringView), card);                     \
1802                                                                                \
1803   again:                                                                       \
1804   if (card == CARD_r) {                                                        \
1805     dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView));         \
1806   }                                                                            \
1807                                                                                \
1808   size = (int8_t)ptr[tagbytes];                                                \
1809   ptr += tagbytes + 1;                                                         \
1810   dst->data = ptr;                                                             \
1811   dst->size = size;                                                            \
1812                                                                                \
1813   if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) {               \
1814     ptr--;                                                                     \
1815     if (validate_utf8) {                                                       \
1816       return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits,           \
1817                                         (uint64_t)dst);                        \
1818     } else {                                                                   \
1819       return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits,         \
1820                                           (uint64_t)dst);                      \
1821     }                                                                          \
1822   }                                                                            \
1823                                                                                \
1824   ptr += size;                                                                 \
1825                                                                                \
1826   if (card == CARD_r) {                                                        \
1827     if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) {       \
1828       return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8);                     \
1829     }                                                                          \
1830     fastdecode_nextret ret = fastdecode_nextrepeated(                          \
1831         d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView));          \
1832     switch (ret.next) {                                                        \
1833       case FD_NEXT_SAMEFIELD:                                                  \
1834         dst = ret.dst;                                                         \
1835         if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
1836           /* Buffer flipped and we can't alias any more. Bounce to */          \
1837           /* copyfunc(), but via dispatch since we need to reload table */     \
1838           /* data also. */                                                     \
1839           fastdecode_commitarr(dst, &farr, sizeof(upb_StringView));            \
1840           data = ret.tag;                                                      \
1841           UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);          \
1842         }                                                                      \
1843         goto again;                                                            \
1844       case FD_NEXT_OTHERFIELD:                                                 \
1845         data = ret.tag;                                                        \
1846         UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);            \
1847       case FD_NEXT_ATLIMIT:                                                    \
1848         return ptr;                                                            \
1849     }                                                                          \
1850   }                                                                            \
1851                                                                                \
1852   if (card != CARD_r && validate_utf8) {                                       \
1853     data = (uint64_t)dst;                                                      \
1854     UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS);                 \
1855   }                                                                            \
1856                                                                                \
1857   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
1858 
1859 /* Generate all combinations:
1860  * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
1861 
1862 #define s_VALIDATE true
1863 #define b_VALIDATE false
1864 
1865 #define F(card, tagbytes, type)                                        \
1866   UPB_NOINLINE                                                         \
1867   const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) {   \
1868     FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
1869                           CARD_##card, type##_VALIDATE);               \
1870   }                                                                    \
1871   const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) {   \
1872     FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes,     \
1873                       CARD_##card, upb_c##card##type##_##tagbytes##bt, \
1874                       type##_VALIDATE);                                \
1875   }
1876 
1877 #define UTF8(card, tagbytes) \
1878   F(card, tagbytes, s)       \
1879   F(card, tagbytes, b)
1880 
1881 #define TAGBYTES(card) \
1882   UTF8(card, 1)        \
1883   UTF8(card, 2)
1884 
1885 TAGBYTES(s)
TAGBYTES(o)1886 TAGBYTES(o)
1887 TAGBYTES(r)
1888 
1889 #undef s_VALIDATE
1890 #undef b_VALIDATE
1891 #undef F
1892 #undef TAGBYTES
1893 #undef FASTDECODE_LONGSTRING
1894 #undef FASTDECODE_COPYSTRING
1895 #undef FASTDECODE_STRING
1896 
1897 /* message fields *************************************************************/
1898 
1899 UPB_INLINE
1900 upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l,
1901                                 int msg_ceil_bytes) {
1902   size_t size = l->size + sizeof(upb_Message_Internal);
1903   char* msg_data;
1904   if (UPB_LIKELY(msg_ceil_bytes > 0 &&
1905                  _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) {
1906     UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
1907     msg_data = d->arena.head.ptr;
1908     d->arena.head.ptr += size;
1909     UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
1910     memset(msg_data, 0, msg_ceil_bytes);
1911     UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
1912   } else {
1913     msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
1914     memset(msg_data, 0, size);
1915   }
1916   return msg_data + sizeof(upb_Message_Internal);
1917 }
1918 
1919 typedef struct {
1920   intptr_t table;
1921   upb_Message* msg;
1922 } fastdecode_submsgdata;
1923 
1924 UPB_FORCEINLINE
fastdecode_tosubmsg(upb_Decoder * d,const char * ptr,void * ctx)1925 static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr,
1926                                        void* ctx) {
1927   fastdecode_submsgdata* submsg = ctx;
1928   ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
1929   UPB_ASSUME(ptr != NULL);
1930   return ptr;
1931 }
1932 
1933 #define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes,    \
1934                           msg_ceil_bytes, card)                           \
1935                                                                           \
1936   if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {               \
1937     RETURN_GENERIC("submessage field tag mismatch\n");                    \
1938   }                                                                       \
1939                                                                           \
1940   if (--d->depth == 0) {                                                  \
1941     return fastdecode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);         \
1942   }                                                                       \
1943                                                                           \
1944   upb_Message** dst;                                                      \
1945   uint32_t submsg_idx = (data >> 16) & 0xff;                              \
1946   const upb_MiniTable* tablep = decode_totablep(table);                   \
1947   const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg;       \
1948   fastdecode_submsgdata submsg = {decode_totable(subtablep)};             \
1949   fastdecode_arr farr;                                                    \
1950                                                                           \
1951   if (subtablep->table_mask == (uint8_t)-1) {                             \
1952     RETURN_GENERIC("submessage doesn't have fast tables.");               \
1953   }                                                                       \
1954                                                                           \
1955   dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr,          \
1956                             sizeof(upb_Message*), card);                  \
1957                                                                           \
1958   if (card == CARD_s) {                                                   \
1959     *(uint32_t*)msg |= hasbits;                                           \
1960     hasbits = 0;                                                          \
1961   }                                                                       \
1962                                                                           \
1963   again:                                                                  \
1964   if (card == CARD_r) {                                                   \
1965     dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*));      \
1966   }                                                                       \
1967                                                                           \
1968   submsg.msg = *dst;                                                      \
1969                                                                           \
1970   if (card == CARD_r || UPB_LIKELY(!submsg.msg)) {                        \
1971     *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \
1972   }                                                                       \
1973                                                                           \
1974   ptr += tagbytes;                                                        \
1975   ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg);       \
1976                                                                           \
1977   if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) {      \
1978     return fastdecode_err(d, kUpb_DecodeStatus_Malformed);                \
1979   }                                                                       \
1980                                                                           \
1981   if (card == CARD_r) {                                                   \
1982     fastdecode_nextret ret = fastdecode_nextrepeated(                     \
1983         d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*));       \
1984     switch (ret.next) {                                                   \
1985       case FD_NEXT_SAMEFIELD:                                             \
1986         dst = ret.dst;                                                    \
1987         goto again;                                                       \
1988       case FD_NEXT_OTHERFIELD:                                            \
1989         d->depth++;                                                       \
1990         data = ret.tag;                                                   \
1991         UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);       \
1992       case FD_NEXT_ATLIMIT:                                               \
1993         d->depth++;                                                       \
1994         return ptr;                                                       \
1995     }                                                                     \
1996   }                                                                       \
1997                                                                           \
1998   d->depth++;                                                             \
1999   UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
2000 
2001 #define F(card, tagbytes, size_ceil, ceil_arg)                               \
2002   const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b(               \
2003       UPB_PARSE_PARAMS) {                                                    \
2004     FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \
2005                       CARD_##card);                                          \
2006   }
2007 
2008 #define SIZES(card, tagbytes) \
2009   F(card, tagbytes, 64, 64)   \
2010   F(card, tagbytes, 128, 128) \
2011   F(card, tagbytes, 192, 192) \
2012   F(card, tagbytes, 256, 256) \
2013   F(card, tagbytes, max, -1)
2014 
2015 #define TAGBYTES(card) \
2016   SIZES(card, 1)       \
2017   SIZES(card, 2)
2018 
2019 TAGBYTES(s)
2020 TAGBYTES(o)
2021 TAGBYTES(r)
2022 
2023 #undef TAGBYTES
2024 #undef SIZES
2025 #undef F
2026 #undef FASTDECODE_SUBMSG
2027 
2028 #endif /* UPB_FASTTABLE */
2029 
2030 /** upb/json_decode.c ************************************************************/
2031 
2032 #include <errno.h>
2033 #include <float.h>
2034 #include <inttypes.h>
2035 #include <limits.h>
2036 #include <math.h>
2037 #include <setjmp.h>
2038 #include <stdlib.h>
2039 #include <string.h>
2040 
2041 
2042 /* Special header, must be included last. */
2043 
2044 typedef struct {
2045   const char *ptr, *end;
2046   upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
2047   const upb_DefPool* symtab;
2048   int depth;
2049   upb_Status* status;
2050   jmp_buf err;
2051   int line;
2052   const char* line_begin;
2053   bool is_first;
2054   int options;
2055   const upb_FieldDef* debug_field;
2056 } jsondec;
2057 
2058 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
2059 
2060 /* Forward declarations of mutually-recursive functions. */
2061 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
2062                               const upb_MessageDef* m);
2063 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
2064 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
2065                                    const upb_MessageDef* m);
2066 static void jsondec_object(jsondec* d, upb_Message* msg,
2067                            const upb_MessageDef* m);
2068 
jsondec_streql(upb_StringView str,const char * lit)2069 static bool jsondec_streql(upb_StringView str, const char* lit) {
2070   return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
2071 }
2072 
jsondec_isnullvalue(const upb_FieldDef * f)2073 static bool jsondec_isnullvalue(const upb_FieldDef* f) {
2074   return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
2075          strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
2076                 "google.protobuf.NullValue") == 0;
2077 }
2078 
jsondec_isvalue(const upb_FieldDef * f)2079 static bool jsondec_isvalue(const upb_FieldDef* f) {
2080   return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
2081           upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
2082               kUpb_WellKnown_Value) ||
2083          jsondec_isnullvalue(f);
2084 }
2085 
jsondec_err(jsondec * d,const char * msg)2086 UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
2087   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
2088                             (int)(d->ptr - d->line_begin), msg);
2089   UPB_LONGJMP(d->err, 1);
2090 }
2091 
2092 UPB_PRINTF(2, 3)
jsondec_errf(jsondec * d,const char * fmt,...)2093 UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
2094   va_list argp;
2095   upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
2096                             (int)(d->ptr - d->line_begin));
2097   va_start(argp, fmt);
2098   upb_Status_VAppendErrorFormat(d->status, fmt, argp);
2099   va_end(argp);
2100   UPB_LONGJMP(d->err, 1);
2101 }
2102 
jsondec_skipws(jsondec * d)2103 static void jsondec_skipws(jsondec* d) {
2104   while (d->ptr != d->end) {
2105     switch (*d->ptr) {
2106       case '\n':
2107         d->line++;
2108         d->line_begin = d->ptr;
2109         /* Fallthrough. */
2110       case '\r':
2111       case '\t':
2112       case ' ':
2113         d->ptr++;
2114         break;
2115       default:
2116         return;
2117     }
2118   }
2119   jsondec_err(d, "Unexpected EOF");
2120 }
2121 
jsondec_tryparsech(jsondec * d,char ch)2122 static bool jsondec_tryparsech(jsondec* d, char ch) {
2123   if (d->ptr == d->end || *d->ptr != ch) return false;
2124   d->ptr++;
2125   return true;
2126 }
2127 
jsondec_parselit(jsondec * d,const char * lit)2128 static void jsondec_parselit(jsondec* d, const char* lit) {
2129   size_t avail = d->end - d->ptr;
2130   size_t len = strlen(lit);
2131   if (avail < len || memcmp(d->ptr, lit, len) != 0) {
2132     jsondec_errf(d, "Expected: '%s'", lit);
2133   }
2134   d->ptr += len;
2135 }
2136 
jsondec_wsch(jsondec * d,char ch)2137 static void jsondec_wsch(jsondec* d, char ch) {
2138   jsondec_skipws(d);
2139   if (!jsondec_tryparsech(d, ch)) {
2140     jsondec_errf(d, "Expected: '%c'", ch);
2141   }
2142 }
2143 
jsondec_true(jsondec * d)2144 static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)2145 static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)2146 static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
2147 
jsondec_entrysep(jsondec * d)2148 static void jsondec_entrysep(jsondec* d) {
2149   jsondec_skipws(d);
2150   jsondec_parselit(d, ":");
2151 }
2152 
jsondec_rawpeek(jsondec * d)2153 static int jsondec_rawpeek(jsondec* d) {
2154   switch (*d->ptr) {
2155     case '{':
2156       return JD_OBJECT;
2157     case '[':
2158       return JD_ARRAY;
2159     case '"':
2160       return JD_STRING;
2161     case '-':
2162     case '0':
2163     case '1':
2164     case '2':
2165     case '3':
2166     case '4':
2167     case '5':
2168     case '6':
2169     case '7':
2170     case '8':
2171     case '9':
2172       return JD_NUMBER;
2173     case 't':
2174       return JD_TRUE;
2175     case 'f':
2176       return JD_FALSE;
2177     case 'n':
2178       return JD_NULL;
2179     default:
2180       jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
2181   }
2182 }
2183 
2184 /* JSON object/array **********************************************************/
2185 
2186 /* These are used like so:
2187  *
2188  * jsondec_objstart(d);
2189  * while (jsondec_objnext(d)) {
2190  *   ...
2191  * }
2192  * jsondec_objend(d) */
2193 
jsondec_peek(jsondec * d)2194 static int jsondec_peek(jsondec* d) {
2195   jsondec_skipws(d);
2196   return jsondec_rawpeek(d);
2197 }
2198 
jsondec_push(jsondec * d)2199 static void jsondec_push(jsondec* d) {
2200   if (--d->depth < 0) {
2201     jsondec_err(d, "Recursion limit exceeded");
2202   }
2203   d->is_first = true;
2204 }
2205 
jsondec_seqnext(jsondec * d,char end_ch)2206 static bool jsondec_seqnext(jsondec* d, char end_ch) {
2207   bool is_first = d->is_first;
2208   d->is_first = false;
2209   jsondec_skipws(d);
2210   if (*d->ptr == end_ch) return false;
2211   if (!is_first) jsondec_parselit(d, ",");
2212   return true;
2213 }
2214 
jsondec_arrstart(jsondec * d)2215 static void jsondec_arrstart(jsondec* d) {
2216   jsondec_push(d);
2217   jsondec_wsch(d, '[');
2218 }
2219 
jsondec_arrend(jsondec * d)2220 static void jsondec_arrend(jsondec* d) {
2221   d->depth++;
2222   jsondec_wsch(d, ']');
2223 }
2224 
jsondec_arrnext(jsondec * d)2225 static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
2226 
jsondec_objstart(jsondec * d)2227 static void jsondec_objstart(jsondec* d) {
2228   jsondec_push(d);
2229   jsondec_wsch(d, '{');
2230 }
2231 
jsondec_objend(jsondec * d)2232 static void jsondec_objend(jsondec* d) {
2233   d->depth++;
2234   jsondec_wsch(d, '}');
2235 }
2236 
jsondec_objnext(jsondec * d)2237 static bool jsondec_objnext(jsondec* d) {
2238   if (!jsondec_seqnext(d, '}')) return false;
2239   if (jsondec_peek(d) != JD_STRING) {
2240     jsondec_err(d, "Object must start with string");
2241   }
2242   return true;
2243 }
2244 
2245 /* JSON number ****************************************************************/
2246 
jsondec_tryskipdigits(jsondec * d)2247 static bool jsondec_tryskipdigits(jsondec* d) {
2248   const char* start = d->ptr;
2249 
2250   while (d->ptr < d->end) {
2251     if (*d->ptr < '0' || *d->ptr > '9') {
2252       break;
2253     }
2254     d->ptr++;
2255   }
2256 
2257   return d->ptr != start;
2258 }
2259 
jsondec_skipdigits(jsondec * d)2260 static void jsondec_skipdigits(jsondec* d) {
2261   if (!jsondec_tryskipdigits(d)) {
2262     jsondec_err(d, "Expected one or more digits");
2263   }
2264 }
2265 
jsondec_number(jsondec * d)2266 static double jsondec_number(jsondec* d) {
2267   const char* start = d->ptr;
2268 
2269   assert(jsondec_rawpeek(d) == JD_NUMBER);
2270 
2271   /* Skip over the syntax of a number, as specified by JSON. */
2272   if (*d->ptr == '-') d->ptr++;
2273 
2274   if (jsondec_tryparsech(d, '0')) {
2275     if (jsondec_tryskipdigits(d)) {
2276       jsondec_err(d, "number cannot have leading zero");
2277     }
2278   } else {
2279     jsondec_skipdigits(d);
2280   }
2281 
2282   if (d->ptr == d->end) goto parse;
2283   if (jsondec_tryparsech(d, '.')) {
2284     jsondec_skipdigits(d);
2285   }
2286   if (d->ptr == d->end) goto parse;
2287 
2288   if (*d->ptr == 'e' || *d->ptr == 'E') {
2289     d->ptr++;
2290     if (d->ptr == d->end) {
2291       jsondec_err(d, "Unexpected EOF in number");
2292     }
2293     if (*d->ptr == '+' || *d->ptr == '-') {
2294       d->ptr++;
2295     }
2296     jsondec_skipdigits(d);
2297   }
2298 
2299 parse:
2300   /* Having verified the syntax of a JSON number, use strtod() to parse
2301    * (strtod() accepts a superset of JSON syntax). */
2302   errno = 0;
2303   {
2304     char* end;
2305     double val = strtod(start, &end);
2306     assert(end == d->ptr);
2307 
2308     /* Currently the min/max-val conformance tests fail if we check this.  Does
2309      * this mean the conformance tests are wrong or strtod() is wrong, or
2310      * something else?  Investigate further. */
2311     /*
2312     if (errno == ERANGE) {
2313       jsondec_err(d, "Number out of range");
2314     }
2315     */
2316 
2317     if (val > DBL_MAX || val < -DBL_MAX) {
2318       jsondec_err(d, "Number out of range");
2319     }
2320 
2321     return val;
2322   }
2323 }
2324 
2325 /* JSON string ****************************************************************/
2326 
jsondec_escape(jsondec * d)2327 static char jsondec_escape(jsondec* d) {
2328   switch (*d->ptr++) {
2329     case '"':
2330       return '\"';
2331     case '\\':
2332       return '\\';
2333     case '/':
2334       return '/';
2335     case 'b':
2336       return '\b';
2337     case 'f':
2338       return '\f';
2339     case 'n':
2340       return '\n';
2341     case 'r':
2342       return '\r';
2343     case 't':
2344       return '\t';
2345     default:
2346       jsondec_err(d, "Invalid escape char");
2347   }
2348 }
2349 
jsondec_codepoint(jsondec * d)2350 static uint32_t jsondec_codepoint(jsondec* d) {
2351   uint32_t cp = 0;
2352   const char* end;
2353 
2354   if (d->end - d->ptr < 4) {
2355     jsondec_err(d, "EOF inside string");
2356   }
2357 
2358   end = d->ptr + 4;
2359   while (d->ptr < end) {
2360     char ch = *d->ptr++;
2361     if (ch >= '0' && ch <= '9') {
2362       ch -= '0';
2363     } else if (ch >= 'a' && ch <= 'f') {
2364       ch = ch - 'a' + 10;
2365     } else if (ch >= 'A' && ch <= 'F') {
2366       ch = ch - 'A' + 10;
2367     } else {
2368       jsondec_err(d, "Invalid hex digit");
2369     }
2370     cp = (cp << 4) | ch;
2371   }
2372 
2373   return cp;
2374 }
2375 
2376 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)2377 static size_t jsondec_unicode(jsondec* d, char* out) {
2378   uint32_t cp = jsondec_codepoint(d);
2379   if (cp >= 0xd800 && cp <= 0xdbff) {
2380     /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
2381     uint32_t high = cp;
2382     uint32_t low;
2383     jsondec_parselit(d, "\\u");
2384     low = jsondec_codepoint(d);
2385     if (low < 0xdc00 || low > 0xdfff) {
2386       jsondec_err(d, "Invalid low surrogate");
2387     }
2388     cp = (high & 0x3ff) << 10;
2389     cp |= (low & 0x3ff);
2390     cp += 0x10000;
2391   } else if (cp >= 0xdc00 && cp <= 0xdfff) {
2392     jsondec_err(d, "Unpaired low surrogate");
2393   }
2394 
2395   /* Write to UTF-8 */
2396   if (cp <= 0x7f) {
2397     out[0] = cp;
2398     return 1;
2399   } else if (cp <= 0x07FF) {
2400     out[0] = ((cp >> 6) & 0x1F) | 0xC0;
2401     out[1] = ((cp >> 0) & 0x3F) | 0x80;
2402     return 2;
2403   } else if (cp <= 0xFFFF) {
2404     out[0] = ((cp >> 12) & 0x0F) | 0xE0;
2405     out[1] = ((cp >> 6) & 0x3F) | 0x80;
2406     out[2] = ((cp >> 0) & 0x3F) | 0x80;
2407     return 3;
2408   } else if (cp < 0x10FFFF) {
2409     out[0] = ((cp >> 18) & 0x07) | 0xF0;
2410     out[1] = ((cp >> 12) & 0x3f) | 0x80;
2411     out[2] = ((cp >> 6) & 0x3f) | 0x80;
2412     out[3] = ((cp >> 0) & 0x3f) | 0x80;
2413     return 4;
2414   } else {
2415     jsondec_err(d, "Invalid codepoint");
2416   }
2417 }
2418 
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)2419 static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
2420   size_t oldsize = *buf_end - *buf;
2421   size_t len = *end - *buf;
2422   size_t size = UPB_MAX(8, 2 * oldsize);
2423 
2424   *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
2425   if (!*buf) jsondec_err(d, "Out of memory");
2426 
2427   *end = *buf + len;
2428   *buf_end = *buf + size;
2429 }
2430 
jsondec_string(jsondec * d)2431 static upb_StringView jsondec_string(jsondec* d) {
2432   char* buf = NULL;
2433   char* end = NULL;
2434   char* buf_end = NULL;
2435 
2436   jsondec_skipws(d);
2437 
2438   if (*d->ptr++ != '"') {
2439     jsondec_err(d, "Expected string");
2440   }
2441 
2442   while (d->ptr < d->end) {
2443     char ch = *d->ptr++;
2444 
2445     if (end == buf_end) {
2446       jsondec_resize(d, &buf, &end, &buf_end);
2447     }
2448 
2449     switch (ch) {
2450       case '"': {
2451         upb_StringView ret;
2452         ret.data = buf;
2453         ret.size = end - buf;
2454         *end = '\0'; /* Needed for possible strtod(). */
2455         return ret;
2456       }
2457       case '\\':
2458         if (d->ptr == d->end) goto eof;
2459         if (*d->ptr == 'u') {
2460           d->ptr++;
2461           if (buf_end - end < 4) {
2462             /* Allow space for maximum-sized code point (4 bytes). */
2463             jsondec_resize(d, &buf, &end, &buf_end);
2464           }
2465           end += jsondec_unicode(d, end);
2466         } else {
2467           *end++ = jsondec_escape(d);
2468         }
2469         break;
2470       default:
2471         if ((unsigned char)*d->ptr < 0x20) {
2472           jsondec_err(d, "Invalid char in JSON string");
2473         }
2474         *end++ = ch;
2475         break;
2476     }
2477   }
2478 
2479 eof:
2480   jsondec_err(d, "EOF inside string");
2481 }
2482 
jsondec_skipval(jsondec * d)2483 static void jsondec_skipval(jsondec* d) {
2484   switch (jsondec_peek(d)) {
2485     case JD_OBJECT:
2486       jsondec_objstart(d);
2487       while (jsondec_objnext(d)) {
2488         jsondec_string(d);
2489         jsondec_entrysep(d);
2490         jsondec_skipval(d);
2491       }
2492       jsondec_objend(d);
2493       break;
2494     case JD_ARRAY:
2495       jsondec_arrstart(d);
2496       while (jsondec_arrnext(d)) {
2497         jsondec_skipval(d);
2498       }
2499       jsondec_arrend(d);
2500       break;
2501     case JD_TRUE:
2502       jsondec_true(d);
2503       break;
2504     case JD_FALSE:
2505       jsondec_false(d);
2506       break;
2507     case JD_NULL:
2508       jsondec_null(d);
2509       break;
2510     case JD_STRING:
2511       jsondec_string(d);
2512       break;
2513     case JD_NUMBER:
2514       jsondec_number(d);
2515       break;
2516   }
2517 }
2518 
2519 /* Base64 decoding for bytes fields. ******************************************/
2520 
jsondec_base64_tablelookup(const char ch)2521 static unsigned int jsondec_base64_tablelookup(const char ch) {
2522   /* Table includes the normal base64 chars plus the URL-safe variant. */
2523   const signed char table[256] = {
2524       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2525       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2526       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2527       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2528       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2529       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2530       -1,       62 /*+*/, -1,       62 /*-*/, -1,       63 /*/ */, 52 /*0*/,
2531       53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/,  59 /*7*/,
2532       60 /*8*/, 61 /*9*/, -1,       -1,       -1,       -1,        -1,
2533       -1,       -1,       0 /*A*/,  1 /*B*/,  2 /*C*/,  3 /*D*/,   4 /*E*/,
2534       5 /*F*/,  6 /*G*/,  07 /*H*/, 8 /*I*/,  9 /*J*/,  10 /*K*/,  11 /*L*/,
2535       12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/,  18 /*S*/,
2536       19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/,  25 /*Z*/,
2537       -1,       -1,       -1,       -1,       63 /*_*/, -1,        26 /*a*/,
2538       27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/,  33 /*h*/,
2539       34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/,  40 /*o*/,
2540       41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/,  47 /*v*/,
2541       48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1,       -1,        -1,
2542       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2543       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2544       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2545       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2546       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2547       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2548       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2549       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2550       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2551       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2552       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2553       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2554       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2555       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2556       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2557       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2558       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2559       -1,       -1,       -1,       -1,       -1,       -1,        -1,
2560       -1,       -1,       -1,       -1};
2561 
2562   /* Sign-extend return value so high bit will be set on any unexpected char. */
2563   return table[(unsigned)ch];
2564 }
2565 
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)2566 static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
2567                                    char* out) {
2568   int32_t val = -1;
2569 
2570   switch (end - ptr) {
2571     case 2:
2572       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2573             jsondec_base64_tablelookup(ptr[1]) << 12;
2574       out[0] = val >> 16;
2575       out += 1;
2576       break;
2577     case 3:
2578       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2579             jsondec_base64_tablelookup(ptr[1]) << 12 |
2580             jsondec_base64_tablelookup(ptr[2]) << 6;
2581       out[0] = val >> 16;
2582       out[1] = (val >> 8) & 0xff;
2583       out += 2;
2584       break;
2585   }
2586 
2587   if (val < 0) {
2588     jsondec_err(d, "Corrupt base64");
2589   }
2590 
2591   return out;
2592 }
2593 
jsondec_base64(jsondec * d,upb_StringView str)2594 static size_t jsondec_base64(jsondec* d, upb_StringView str) {
2595   /* We decode in place. This is safe because this is a new buffer (not
2596    * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
2597   char* out = (char*)str.data;
2598   const char* ptr = str.data;
2599   const char* end = ptr + str.size;
2600   const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
2601 
2602   for (; ptr < end4; ptr += 4, out += 3) {
2603     int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
2604               jsondec_base64_tablelookup(ptr[1]) << 12 |
2605               jsondec_base64_tablelookup(ptr[2]) << 6 |
2606               jsondec_base64_tablelookup(ptr[3]) << 0;
2607 
2608     if (val < 0) {
2609       /* Junk chars or padding. Remove trailing padding, if any. */
2610       if (end - ptr == 4 && ptr[3] == '=') {
2611         if (ptr[2] == '=') {
2612           end -= 2;
2613         } else {
2614           end -= 1;
2615         }
2616       }
2617       break;
2618     }
2619 
2620     out[0] = val >> 16;
2621     out[1] = (val >> 8) & 0xff;
2622     out[2] = val & 0xff;
2623   }
2624 
2625   if (ptr < end) {
2626     /* Process remaining chars. We do not require padding. */
2627     out = jsondec_partialbase64(d, ptr, end, out);
2628   }
2629 
2630   return out - str.data;
2631 }
2632 
2633 /* Low-level integer parsing **************************************************/
2634 
2635 /* We use these hand-written routines instead of strto[u]l() because the "long
2636  * long" variants aren't in c89. Also our version allows setting a ptr limit. */
2637 
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)2638 static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
2639                                        const char* end, uint64_t* val) {
2640   uint64_t u64 = 0;
2641   while (ptr < end) {
2642     unsigned ch = *ptr - '0';
2643     if (ch >= 10) break;
2644     if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
2645       jsondec_err(d, "Integer overflow");
2646     }
2647     u64 *= 10;
2648     u64 += ch;
2649     ptr++;
2650   }
2651 
2652   *val = u64;
2653   return ptr;
2654 }
2655 
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val)2656 static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
2657                                       const char* end, int64_t* val) {
2658   bool neg = false;
2659   uint64_t u64;
2660 
2661   if (ptr != end && *ptr == '-') {
2662     ptr++;
2663     neg = true;
2664   }
2665 
2666   ptr = jsondec_buftouint64(d, ptr, end, &u64);
2667   if (u64 > (uint64_t)INT64_MAX + neg) {
2668     jsondec_err(d, "Integer overflow");
2669   }
2670 
2671   *val = neg ? -u64 : u64;
2672   return ptr;
2673 }
2674 
jsondec_strtouint64(jsondec * d,upb_StringView str)2675 static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
2676   const char* end = str.data + str.size;
2677   uint64_t ret;
2678   if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
2679     jsondec_err(d, "Non-number characters in quoted integer");
2680   }
2681   return ret;
2682 }
2683 
jsondec_strtoint64(jsondec * d,upb_StringView str)2684 static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
2685   const char* end = str.data + str.size;
2686   int64_t ret;
2687   if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
2688     jsondec_err(d, "Non-number characters in quoted integer");
2689   }
2690   return ret;
2691 }
2692 
2693 /* Primitive value types ******************************************************/
2694 
2695 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_FieldDef * f)2696 static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
2697   upb_MessageValue val;
2698 
2699   switch (jsondec_peek(d)) {
2700     case JD_NUMBER: {
2701       double dbl = jsondec_number(d);
2702       if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
2703         jsondec_err(d, "JSON number is out of range.");
2704       }
2705       val.int64_val = dbl; /* must be guarded, overflow here is UB */
2706       if (val.int64_val != dbl) {
2707         jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
2708                      val.int64_val);
2709       }
2710       break;
2711     }
2712     case JD_STRING: {
2713       upb_StringView str = jsondec_string(d);
2714       val.int64_val = jsondec_strtoint64(d, str);
2715       break;
2716     }
2717     default:
2718       jsondec_err(d, "Expected number or string");
2719   }
2720 
2721   if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
2722       upb_FieldDef_CType(f) == kUpb_CType_Enum) {
2723     if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
2724       jsondec_err(d, "Integer out of range.");
2725     }
2726     val.int32_val = (int32_t)val.int64_val;
2727   }
2728 
2729   return val;
2730 }
2731 
2732 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_FieldDef * f)2733 static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
2734   upb_MessageValue val = {0};
2735 
2736   switch (jsondec_peek(d)) {
2737     case JD_NUMBER: {
2738       double dbl = jsondec_number(d);
2739       if (dbl > 18446744073709549568.0 || dbl < 0) {
2740         jsondec_err(d, "JSON number is out of range.");
2741       }
2742       val.uint64_val = dbl; /* must be guarded, overflow here is UB */
2743       if (val.uint64_val != dbl) {
2744         jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
2745                      val.uint64_val);
2746       }
2747       break;
2748     }
2749     case JD_STRING: {
2750       upb_StringView str = jsondec_string(d);
2751       val.uint64_val = jsondec_strtouint64(d, str);
2752       break;
2753     }
2754     default:
2755       jsondec_err(d, "Expected number or string");
2756   }
2757 
2758   if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
2759     if (val.uint64_val > UINT32_MAX) {
2760       jsondec_err(d, "Integer out of range.");
2761     }
2762     val.uint32_val = (uint32_t)val.uint64_val;
2763   }
2764 
2765   return val;
2766 }
2767 
2768 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_FieldDef * f)2769 static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
2770   upb_StringView str;
2771   upb_MessageValue val = {0};
2772 
2773   switch (jsondec_peek(d)) {
2774     case JD_NUMBER:
2775       val.double_val = jsondec_number(d);
2776       break;
2777     case JD_STRING:
2778       str = jsondec_string(d);
2779       if (jsondec_streql(str, "NaN")) {
2780         val.double_val = NAN;
2781       } else if (jsondec_streql(str, "Infinity")) {
2782         val.double_val = INFINITY;
2783       } else if (jsondec_streql(str, "-Infinity")) {
2784         val.double_val = -INFINITY;
2785       } else {
2786         val.double_val = strtod(str.data, NULL);
2787       }
2788       break;
2789     default:
2790       jsondec_err(d, "Expected number or string");
2791   }
2792 
2793   if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
2794     if (val.double_val != INFINITY && val.double_val != -INFINITY &&
2795         (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
2796       jsondec_err(d, "Float out of range");
2797     }
2798     val.float_val = val.double_val;
2799   }
2800 
2801   return val;
2802 }
2803 
2804 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_FieldDef * f)2805 static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
2806   upb_MessageValue val;
2807   val.str_val = jsondec_string(d);
2808   if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
2809     val.str_val.size = jsondec_base64(d, val.str_val);
2810   }
2811   return val;
2812 }
2813 
jsondec_enum(jsondec * d,const upb_FieldDef * f)2814 static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
2815   switch (jsondec_peek(d)) {
2816     case JD_STRING: {
2817       upb_StringView str = jsondec_string(d);
2818       const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
2819       const upb_EnumValueDef* ev =
2820           upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
2821       upb_MessageValue val;
2822       if (ev) {
2823         val.int32_val = upb_EnumValueDef_Number(ev);
2824       } else {
2825         if (d->options & upb_JsonDecode_IgnoreUnknown) {
2826           val.int32_val = 0;
2827         } else {
2828           jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
2829                        UPB_STRINGVIEW_ARGS(str));
2830         }
2831       }
2832       return val;
2833     }
2834     case JD_NULL: {
2835       if (jsondec_isnullvalue(f)) {
2836         upb_MessageValue val;
2837         jsondec_null(d);
2838         val.int32_val = 0;
2839         return val;
2840       }
2841     }
2842       /* Fallthrough. */
2843     default:
2844       return jsondec_int(d, f);
2845   }
2846 }
2847 
jsondec_bool(jsondec * d,const upb_FieldDef * f)2848 static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
2849   bool is_map_key = upb_FieldDef_Number(f) == 1 &&
2850                     upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
2851   upb_MessageValue val;
2852 
2853   if (is_map_key) {
2854     upb_StringView str = jsondec_string(d);
2855     if (jsondec_streql(str, "true")) {
2856       val.bool_val = true;
2857     } else if (jsondec_streql(str, "false")) {
2858       val.bool_val = false;
2859     } else {
2860       jsondec_err(d, "Invalid boolean map key");
2861     }
2862   } else {
2863     switch (jsondec_peek(d)) {
2864       case JD_TRUE:
2865         val.bool_val = true;
2866         jsondec_true(d);
2867         break;
2868       case JD_FALSE:
2869         val.bool_val = false;
2870         jsondec_false(d);
2871         break;
2872       default:
2873         jsondec_err(d, "Expected true or false");
2874     }
2875   }
2876 
2877   return val;
2878 }
2879 
2880 /* Composite types (array/message/map) ****************************************/
2881 
jsondec_array(jsondec * d,upb_Message * msg,const upb_FieldDef * f)2882 static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
2883   upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
2884 
2885   jsondec_arrstart(d);
2886   while (jsondec_arrnext(d)) {
2887     upb_MessageValue elem = jsondec_value(d, f);
2888     upb_Array_Append(arr, elem, d->arena);
2889   }
2890   jsondec_arrend(d);
2891 }
2892 
jsondec_map(jsondec * d,upb_Message * msg,const upb_FieldDef * f)2893 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
2894   upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
2895   const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
2896   const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
2897   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
2898 
2899   jsondec_objstart(d);
2900   while (jsondec_objnext(d)) {
2901     upb_MessageValue key, val;
2902     key = jsondec_value(d, key_f);
2903     jsondec_entrysep(d);
2904     val = jsondec_value(d, val_f);
2905     upb_Map_Set(map, key, val, d->arena);
2906   }
2907   jsondec_objend(d);
2908 }
2909 
jsondec_tomsg(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2910 static void jsondec_tomsg(jsondec* d, upb_Message* msg,
2911                           const upb_MessageDef* m) {
2912   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
2913     jsondec_object(d, msg, m);
2914   } else {
2915     jsondec_wellknown(d, msg, m);
2916   }
2917 }
2918 
jsondec_msg(jsondec * d,const upb_FieldDef * f)2919 static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
2920   const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
2921   upb_Message* msg = upb_Message_New(m, d->arena);
2922   upb_MessageValue val;
2923 
2924   jsondec_tomsg(d, msg, m);
2925   val.msg_val = msg;
2926   return val;
2927 }
2928 
jsondec_field(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2929 static void jsondec_field(jsondec* d, upb_Message* msg,
2930                           const upb_MessageDef* m) {
2931   upb_StringView name;
2932   const upb_FieldDef* f;
2933   const upb_FieldDef* preserved;
2934 
2935   name = jsondec_string(d);
2936   jsondec_entrysep(d);
2937 
2938   if (name.size >= 2 && name.data[0] == '[' &&
2939       name.data[name.size - 1] == ']') {
2940     f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
2941                                                 name.size - 2);
2942     if (f && upb_FieldDef_ContainingType(f) != m) {
2943       jsondec_errf(
2944           d, "Extension %s extends message %s, but was seen in message %s",
2945           upb_FieldDef_FullName(f),
2946           upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
2947           upb_MessageDef_FullName(m));
2948     }
2949   } else {
2950     f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
2951   }
2952 
2953   if (!f) {
2954     if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
2955       jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
2956                    UPB_STRINGVIEW_ARGS(name));
2957     }
2958     jsondec_skipval(d);
2959     return;
2960   }
2961 
2962   if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
2963     /* JSON "null" indicates a default value, so no need to set anything. */
2964     jsondec_null(d);
2965     return;
2966   }
2967 
2968   if (upb_FieldDef_RealContainingOneof(f) &&
2969       upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
2970     jsondec_err(d, "More than one field for this oneof.");
2971   }
2972 
2973   preserved = d->debug_field;
2974   d->debug_field = f;
2975 
2976   if (upb_FieldDef_IsMap(f)) {
2977     jsondec_map(d, msg, f);
2978   } else if (upb_FieldDef_IsRepeated(f)) {
2979     jsondec_array(d, msg, f);
2980   } else if (upb_FieldDef_IsSubMessage(f)) {
2981     upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
2982     const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
2983     jsondec_tomsg(d, submsg, subm);
2984   } else {
2985     upb_MessageValue val = jsondec_value(d, f);
2986     upb_Message_Set(msg, f, val, d->arena);
2987   }
2988 
2989   d->debug_field = preserved;
2990 }
2991 
jsondec_object(jsondec * d,upb_Message * msg,const upb_MessageDef * m)2992 static void jsondec_object(jsondec* d, upb_Message* msg,
2993                            const upb_MessageDef* m) {
2994   jsondec_objstart(d);
2995   while (jsondec_objnext(d)) {
2996     jsondec_field(d, msg, m);
2997   }
2998   jsondec_objend(d);
2999 }
3000 
jsondec_value(jsondec * d,const upb_FieldDef * f)3001 static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
3002   switch (upb_FieldDef_CType(f)) {
3003     case kUpb_CType_Bool:
3004       return jsondec_bool(d, f);
3005     case kUpb_CType_Float:
3006     case kUpb_CType_Double:
3007       return jsondec_double(d, f);
3008     case kUpb_CType_UInt32:
3009     case kUpb_CType_UInt64:
3010       return jsondec_uint(d, f);
3011     case kUpb_CType_Int32:
3012     case kUpb_CType_Int64:
3013       return jsondec_int(d, f);
3014     case kUpb_CType_String:
3015     case kUpb_CType_Bytes:
3016       return jsondec_strfield(d, f);
3017     case kUpb_CType_Enum:
3018       return jsondec_enum(d, f);
3019     case kUpb_CType_Message:
3020       return jsondec_msg(d, f);
3021     default:
3022       UPB_UNREACHABLE();
3023   }
3024 }
3025 
3026 /* Well-known types ***********************************************************/
3027 
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)3028 static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
3029                             const char* after) {
3030   uint64_t val;
3031   const char* p = *ptr;
3032   const char* end = p + digits;
3033   size_t after_len = after ? strlen(after) : 0;
3034 
3035   UPB_ASSERT(digits <= 9); /* int can't overflow. */
3036 
3037   if (jsondec_buftouint64(d, p, end, &val) != end ||
3038       (after_len && memcmp(end, after, after_len) != 0)) {
3039     jsondec_err(d, "Malformed timestamp");
3040   }
3041 
3042   UPB_ASSERT(val < INT_MAX);
3043 
3044   *ptr = end + after_len;
3045   return (int)val;
3046 }
3047 
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)3048 static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
3049   uint64_t nanos = 0;
3050   const char* p = *ptr;
3051 
3052   if (p != end && *p == '.') {
3053     const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
3054     int digits = (int)(nano_end - p - 1);
3055     int exp_lg10 = 9 - digits;
3056     if (digits > 9) {
3057       jsondec_err(d, "Too many digits for partial seconds");
3058     }
3059     while (exp_lg10--) nanos *= 10;
3060     *ptr = nano_end;
3061   }
3062 
3063   UPB_ASSERT(nanos < INT_MAX);
3064 
3065   return (int)nanos;
3066 }
3067 
3068 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)3069 int jsondec_epochdays(int y, int m, int d) {
3070   const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
3071   const uint32_t m_adj = m - 3;    /* March-based month. */
3072   const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
3073   const uint32_t adjust = carry ? 12 : 0;
3074   const uint32_t y_adj = y + year_base - carry;
3075   const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
3076   const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
3077   return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
3078 }
3079 
jsondec_unixtime(int y,int m,int d,int h,int min,int s)3080 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
3081   return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
3082 }
3083 
jsondec_timestamp(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3084 static void jsondec_timestamp(jsondec* d, upb_Message* msg,
3085                               const upb_MessageDef* m) {
3086   upb_MessageValue seconds;
3087   upb_MessageValue nanos;
3088   upb_StringView str = jsondec_string(d);
3089   const char* ptr = str.data;
3090   const char* end = ptr + str.size;
3091 
3092   if (str.size < 20) goto malformed;
3093 
3094   {
3095     /* 1972-01-01T01:00:00 */
3096     int year = jsondec_tsdigits(d, &ptr, 4, "-");
3097     int mon = jsondec_tsdigits(d, &ptr, 2, "-");
3098     int day = jsondec_tsdigits(d, &ptr, 2, "T");
3099     int hour = jsondec_tsdigits(d, &ptr, 2, ":");
3100     int min = jsondec_tsdigits(d, &ptr, 2, ":");
3101     int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
3102 
3103     seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
3104   }
3105 
3106   nanos.int32_val = jsondec_nanos(d, &ptr, end);
3107 
3108   {
3109     /* [+-]08:00 or Z */
3110     int ofs_hour = 0;
3111     int ofs_min = 0;
3112     bool neg = false;
3113 
3114     if (ptr == end) goto malformed;
3115 
3116     switch (*ptr++) {
3117       case '-':
3118         neg = true;
3119         /* fallthrough */
3120       case '+':
3121         if ((end - ptr) != 5) goto malformed;
3122         ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
3123         ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
3124         ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
3125         seconds.int64_val += (neg ? ofs_min : -ofs_min);
3126         break;
3127       case 'Z':
3128         if (ptr != end) goto malformed;
3129         break;
3130       default:
3131         goto malformed;
3132     }
3133   }
3134 
3135   if (seconds.int64_val < -62135596800) {
3136     jsondec_err(d, "Timestamp out of range");
3137   }
3138 
3139   upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
3140                   d->arena);
3141   upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
3142   return;
3143 
3144 malformed:
3145   jsondec_err(d, "Malformed timestamp");
3146 }
3147 
jsondec_duration(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3148 static void jsondec_duration(jsondec* d, upb_Message* msg,
3149                              const upb_MessageDef* m) {
3150   upb_MessageValue seconds;
3151   upb_MessageValue nanos;
3152   upb_StringView str = jsondec_string(d);
3153   const char* ptr = str.data;
3154   const char* end = ptr + str.size;
3155   const int64_t max = (uint64_t)3652500 * 86400;
3156 
3157   /* "3.000000001s", "3s", etc. */
3158   ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
3159   nanos.int32_val = jsondec_nanos(d, &ptr, end);
3160 
3161   if (end - ptr != 1 || *ptr != 's') {
3162     jsondec_err(d, "Malformed duration");
3163   }
3164 
3165   if (seconds.int64_val < -max || seconds.int64_val > max) {
3166     jsondec_err(d, "Duration out of range");
3167   }
3168 
3169   if (seconds.int64_val < 0) {
3170     nanos.int32_val = -nanos.int32_val;
3171   }
3172 
3173   upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
3174                   d->arena);
3175   upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
3176 }
3177 
jsondec_listvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3178 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
3179                               const upb_MessageDef* m) {
3180   const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
3181   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
3182   upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
3183 
3184   jsondec_arrstart(d);
3185   while (jsondec_arrnext(d)) {
3186     upb_Message* value_msg = upb_Message_New(value_m, d->arena);
3187     upb_MessageValue value;
3188     value.msg_val = value_msg;
3189     upb_Array_Append(values, value, d->arena);
3190     jsondec_wellknownvalue(d, value_msg, value_m);
3191   }
3192   jsondec_arrend(d);
3193 }
3194 
jsondec_struct(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3195 static void jsondec_struct(jsondec* d, upb_Message* msg,
3196                            const upb_MessageDef* m) {
3197   const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
3198   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
3199   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
3200   const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
3201   upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
3202 
3203   jsondec_objstart(d);
3204   while (jsondec_objnext(d)) {
3205     upb_MessageValue key, value;
3206     upb_Message* value_msg = upb_Message_New(value_m, d->arena);
3207     key.str_val = jsondec_string(d);
3208     value.msg_val = value_msg;
3209     upb_Map_Set(fields, key, value, d->arena);
3210     jsondec_entrysep(d);
3211     jsondec_wellknownvalue(d, value_msg, value_m);
3212   }
3213   jsondec_objend(d);
3214 }
3215 
jsondec_wellknownvalue(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3216 static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
3217                                    const upb_MessageDef* m) {
3218   upb_MessageValue val;
3219   const upb_FieldDef* f;
3220   upb_Message* submsg;
3221 
3222   switch (jsondec_peek(d)) {
3223     case JD_NUMBER:
3224       /* double number_value = 2; */
3225       f = upb_MessageDef_FindFieldByNumber(m, 2);
3226       val.double_val = jsondec_number(d);
3227       break;
3228     case JD_STRING:
3229       /* string string_value = 3; */
3230       f = upb_MessageDef_FindFieldByNumber(m, 3);
3231       val.str_val = jsondec_string(d);
3232       break;
3233     case JD_FALSE:
3234       /* bool bool_value = 4; */
3235       f = upb_MessageDef_FindFieldByNumber(m, 4);
3236       val.bool_val = false;
3237       jsondec_false(d);
3238       break;
3239     case JD_TRUE:
3240       /* bool bool_value = 4; */
3241       f = upb_MessageDef_FindFieldByNumber(m, 4);
3242       val.bool_val = true;
3243       jsondec_true(d);
3244       break;
3245     case JD_NULL:
3246       /* NullValue null_value = 1; */
3247       f = upb_MessageDef_FindFieldByNumber(m, 1);
3248       val.int32_val = 0;
3249       jsondec_null(d);
3250       break;
3251     /* Note: these cases return, because upb_Message_Mutable() is enough. */
3252     case JD_OBJECT:
3253       /* Struct struct_value = 5; */
3254       f = upb_MessageDef_FindFieldByNumber(m, 5);
3255       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
3256       jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
3257       return;
3258     case JD_ARRAY:
3259       /* ListValue list_value = 6; */
3260       f = upb_MessageDef_FindFieldByNumber(m, 6);
3261       submsg = upb_Message_Mutable(msg, f, d->arena).msg;
3262       jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
3263       return;
3264     default:
3265       UPB_UNREACHABLE();
3266   }
3267 
3268   upb_Message_Set(msg, f, val, d->arena);
3269 }
3270 
jsondec_mask(jsondec * d,const char * buf,const char * end)3271 static upb_StringView jsondec_mask(jsondec* d, const char* buf,
3272                                    const char* end) {
3273   /* FieldMask fields grow due to inserted '_' characters, so we can't do the
3274    * transform in place. */
3275   const char* ptr = buf;
3276   upb_StringView ret;
3277   char* out;
3278 
3279   ret.size = end - ptr;
3280   while (ptr < end) {
3281     ret.size += (*ptr >= 'A' && *ptr <= 'Z');
3282     ptr++;
3283   }
3284 
3285   out = upb_Arena_Malloc(d->arena, ret.size);
3286   ptr = buf;
3287   ret.data = out;
3288 
3289   while (ptr < end) {
3290     char ch = *ptr++;
3291     if (ch >= 'A' && ch <= 'Z') {
3292       *out++ = '_';
3293       *out++ = ch + 32;
3294     } else if (ch == '_') {
3295       jsondec_err(d, "field mask may not contain '_'");
3296     } else {
3297       *out++ = ch;
3298     }
3299   }
3300 
3301   return ret;
3302 }
3303 
jsondec_fieldmask(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3304 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
3305                               const upb_MessageDef* m) {
3306   /* repeated string paths = 1; */
3307   const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
3308   upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
3309   upb_StringView str = jsondec_string(d);
3310   const char* ptr = str.data;
3311   const char* end = ptr + str.size;
3312   upb_MessageValue val;
3313 
3314   while (ptr < end) {
3315     const char* elem_end = memchr(ptr, ',', end - ptr);
3316     if (elem_end) {
3317       val.str_val = jsondec_mask(d, ptr, elem_end);
3318       ptr = elem_end + 1;
3319     } else {
3320       val.str_val = jsondec_mask(d, ptr, end);
3321       ptr = end;
3322     }
3323     upb_Array_Append(arr, val, d->arena);
3324   }
3325 }
3326 
jsondec_anyfield(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3327 static void jsondec_anyfield(jsondec* d, upb_Message* msg,
3328                              const upb_MessageDef* m) {
3329   if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
3330     /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
3331      * where f1, f2, etc. are the normal fields of this type. */
3332     jsondec_field(d, msg, m);
3333   } else {
3334     /* For well-known types: {"@type": "[well-known type]", "value": <X>}
3335      * where <X> is whatever encoding the WKT normally uses. */
3336     upb_StringView str = jsondec_string(d);
3337     jsondec_entrysep(d);
3338     if (!jsondec_streql(str, "value")) {
3339       jsondec_err(d, "Key for well-known type must be 'value'");
3340     }
3341     jsondec_wellknown(d, msg, m);
3342   }
3343 }
3344 
jsondec_typeurl(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3345 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
3346                                              const upb_MessageDef* m) {
3347   const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
3348   const upb_MessageDef* type_m;
3349   upb_StringView type_url = jsondec_string(d);
3350   const char* end = type_url.data + type_url.size;
3351   const char* ptr = end;
3352   upb_MessageValue val;
3353 
3354   val.str_val = type_url;
3355   upb_Message_Set(msg, type_url_f, val, d->arena);
3356 
3357   /* Find message name after the last '/' */
3358   while (ptr > type_url.data && *--ptr != '/') {
3359   }
3360 
3361   if (ptr == type_url.data || ptr == end) {
3362     jsondec_err(d, "Type url must have at least one '/' and non-empty host");
3363   }
3364 
3365   ptr++;
3366   type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
3367 
3368   if (!type_m) {
3369     jsondec_err(d, "Type was not found");
3370   }
3371 
3372   return type_m;
3373 }
3374 
jsondec_any(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3375 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
3376   /* string type_url = 1;
3377    * bytes value = 2; */
3378   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
3379   upb_Message* any_msg;
3380   const upb_MessageDef* any_m = NULL;
3381   const char* pre_type_data = NULL;
3382   const char* pre_type_end = NULL;
3383   upb_MessageValue encoded;
3384 
3385   jsondec_objstart(d);
3386 
3387   /* Scan looking for "@type", which is not necessarily first. */
3388   while (!any_m && jsondec_objnext(d)) {
3389     const char* start = d->ptr;
3390     upb_StringView name = jsondec_string(d);
3391     jsondec_entrysep(d);
3392     if (jsondec_streql(name, "@type")) {
3393       any_m = jsondec_typeurl(d, msg, m);
3394       if (pre_type_data) {
3395         pre_type_end = start;
3396         while (*pre_type_end != ',') pre_type_end--;
3397       }
3398     } else {
3399       if (!pre_type_data) pre_type_data = start;
3400       jsondec_skipval(d);
3401     }
3402   }
3403 
3404   if (!any_m) {
3405     jsondec_err(d, "Any object didn't contain a '@type' field");
3406   }
3407 
3408   any_msg = upb_Message_New(any_m, d->arena);
3409 
3410   if (pre_type_data) {
3411     size_t len = pre_type_end - pre_type_data + 1;
3412     char* tmp = upb_Arena_Malloc(d->arena, len);
3413     const char* saved_ptr = d->ptr;
3414     const char* saved_end = d->end;
3415     memcpy(tmp, pre_type_data, len - 1);
3416     tmp[len - 1] = '}';
3417     d->ptr = tmp;
3418     d->end = tmp + len;
3419     d->is_first = true;
3420     while (jsondec_objnext(d)) {
3421       jsondec_anyfield(d, any_msg, any_m);
3422     }
3423     d->ptr = saved_ptr;
3424     d->end = saved_end;
3425   }
3426 
3427   while (jsondec_objnext(d)) {
3428     jsondec_anyfield(d, any_msg, any_m);
3429   }
3430 
3431   jsondec_objend(d);
3432 
3433   encoded.str_val.data = upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0,
3434                                     d->arena, &encoded.str_val.size);
3435   upb_Message_Set(msg, value_f, encoded, d->arena);
3436 }
3437 
jsondec_wrapper(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3438 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
3439                             const upb_MessageDef* m) {
3440   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
3441   upb_MessageValue val = jsondec_value(d, value_f);
3442   upb_Message_Set(msg, value_f, val, d->arena);
3443 }
3444 
jsondec_wellknown(jsondec * d,upb_Message * msg,const upb_MessageDef * m)3445 static void jsondec_wellknown(jsondec* d, upb_Message* msg,
3446                               const upb_MessageDef* m) {
3447   switch (upb_MessageDef_WellKnownType(m)) {
3448     case kUpb_WellKnown_Any:
3449       jsondec_any(d, msg, m);
3450       break;
3451     case kUpb_WellKnown_FieldMask:
3452       jsondec_fieldmask(d, msg, m);
3453       break;
3454     case kUpb_WellKnown_Duration:
3455       jsondec_duration(d, msg, m);
3456       break;
3457     case kUpb_WellKnown_Timestamp:
3458       jsondec_timestamp(d, msg, m);
3459       break;
3460     case kUpb_WellKnown_Value:
3461       jsondec_wellknownvalue(d, msg, m);
3462       break;
3463     case kUpb_WellKnown_ListValue:
3464       jsondec_listvalue(d, msg, m);
3465       break;
3466     case kUpb_WellKnown_Struct:
3467       jsondec_struct(d, msg, m);
3468       break;
3469     case kUpb_WellKnown_DoubleValue:
3470     case kUpb_WellKnown_FloatValue:
3471     case kUpb_WellKnown_Int64Value:
3472     case kUpb_WellKnown_UInt64Value:
3473     case kUpb_WellKnown_Int32Value:
3474     case kUpb_WellKnown_UInt32Value:
3475     case kUpb_WellKnown_StringValue:
3476     case kUpb_WellKnown_BytesValue:
3477     case kUpb_WellKnown_BoolValue:
3478       jsondec_wrapper(d, msg, m);
3479       break;
3480     default:
3481       UPB_UNREACHABLE();
3482   }
3483 }
3484 
upb_JsonDecode(const char * buf,size_t size,upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * symtab,int options,upb_Arena * arena,upb_Status * status)3485 bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
3486                     const upb_MessageDef* m, const upb_DefPool* symtab,
3487                     int options, upb_Arena* arena, upb_Status* status) {
3488   jsondec d;
3489 
3490   if (size == 0) return true;
3491 
3492   d.ptr = buf;
3493   d.end = buf + size;
3494   d.arena = arena;
3495   d.symtab = symtab;
3496   d.status = status;
3497   d.options = options;
3498   d.depth = 64;
3499   d.line = 1;
3500   d.line_begin = d.ptr;
3501   d.debug_field = NULL;
3502   d.is_first = false;
3503 
3504   if (UPB_SETJMP(d.err)) return false;
3505 
3506   jsondec_tomsg(&d, msg, m);
3507   return true;
3508 }
3509 
3510 /** upb/json_encode.c ************************************************************/
3511 
3512 #include <ctype.h>
3513 #include <float.h>
3514 #include <inttypes.h>
3515 #include <math.h>
3516 #include <setjmp.h>
3517 #include <stdarg.h>
3518 #include <stdio.h>
3519 #include <string.h>
3520 
3521 
3522 /* Must be last. */
3523 
3524 typedef struct {
3525   char *buf, *ptr, *end;
3526   size_t overflow;
3527   int indent_depth;
3528   int options;
3529   const upb_DefPool* ext_pool;
3530   jmp_buf err;
3531   upb_Status* status;
3532   upb_Arena* arena;
3533 } jsonenc;
3534 
3535 static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
3536                         const upb_MessageDef* m);
3537 static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
3538                            const upb_FieldDef* f);
3539 static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
3540                              const upb_MessageDef* m);
3541 static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
3542                               const upb_MessageDef* m, bool first);
3543 static void jsonenc_value(jsonenc* e, const upb_Message* msg,
3544                           const upb_MessageDef* m);
3545 
jsonenc_err(jsonenc * e,const char * msg)3546 UPB_NORETURN static void jsonenc_err(jsonenc* e, const char* msg) {
3547   upb_Status_SetErrorMessage(e->status, msg);
3548   longjmp(e->err, 1);
3549 }
3550 
3551 UPB_PRINTF(2, 3)
jsonenc_errf(jsonenc * e,const char * fmt,...)3552 UPB_NORETURN static void jsonenc_errf(jsonenc* e, const char* fmt, ...) {
3553   va_list argp;
3554   va_start(argp, fmt);
3555   upb_Status_VSetErrorFormat(e->status, fmt, argp);
3556   va_end(argp);
3557   longjmp(e->err, 1);
3558 }
3559 
jsonenc_arena(jsonenc * e)3560 static upb_Arena* jsonenc_arena(jsonenc* e) {
3561   /* Create lazily, since it's only needed for Any */
3562   if (!e->arena) {
3563     e->arena = upb_Arena_New();
3564   }
3565   return e->arena;
3566 }
3567 
jsonenc_putbytes(jsonenc * e,const void * data,size_t len)3568 static void jsonenc_putbytes(jsonenc* e, const void* data, size_t len) {
3569   size_t have = e->end - e->ptr;
3570   if (UPB_LIKELY(have >= len)) {
3571     memcpy(e->ptr, data, len);
3572     e->ptr += len;
3573   } else {
3574     if (have) {
3575       memcpy(e->ptr, data, have);
3576       e->ptr += have;
3577     }
3578     e->overflow += (len - have);
3579   }
3580 }
3581 
jsonenc_putstr(jsonenc * e,const char * str)3582 static void jsonenc_putstr(jsonenc* e, const char* str) {
3583   jsonenc_putbytes(e, str, strlen(str));
3584 }
3585 
3586 UPB_PRINTF(2, 3)
jsonenc_printf(jsonenc * e,const char * fmt,...)3587 static void jsonenc_printf(jsonenc* e, const char* fmt, ...) {
3588   size_t n;
3589   size_t have = e->end - e->ptr;
3590   va_list args;
3591 
3592   va_start(args, fmt);
3593   n = _upb_vsnprintf(e->ptr, have, fmt, args);
3594   va_end(args);
3595 
3596   if (UPB_LIKELY(have > n)) {
3597     e->ptr += n;
3598   } else {
3599     e->ptr = UPB_PTRADD(e->ptr, have);
3600     e->overflow += (n - have);
3601   }
3602 }
3603 
jsonenc_nanos(jsonenc * e,int32_t nanos)3604 static void jsonenc_nanos(jsonenc* e, int32_t nanos) {
3605   int digits = 9;
3606 
3607   if (nanos == 0) return;
3608   if (nanos < 0 || nanos >= 1000000000) {
3609     jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
3610   }
3611 
3612   while (nanos % 1000 == 0) {
3613     nanos /= 1000;
3614     digits -= 3;
3615   }
3616 
3617   jsonenc_printf(e, ".%.*" PRId32, digits, nanos);
3618 }
3619 
jsonenc_timestamp(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3620 static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
3621                               const upb_MessageDef* m) {
3622   const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
3623   const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
3624   int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
3625   int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
3626   int L, N, I, J, K, hour, min, sec;
3627 
3628   if (seconds < -62135596800) {
3629     jsonenc_err(e,
3630                 "error formatting timestamp as JSON: minimum acceptable value "
3631                 "is 0001-01-01T00:00:00Z");
3632   } else if (seconds > 253402300799) {
3633     jsonenc_err(e,
3634                 "error formatting timestamp as JSON: maximum acceptable value "
3635                 "is 9999-12-31T23:59:59Z");
3636   }
3637 
3638   /* Julian Day -> Y/M/D, Algorithm from:
3639    * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
3640    *   Processing Calendar Dates," Communications of the Association of
3641    *   Computing Machines, vol. 11 (1968), p. 657.  */
3642   seconds += 62135596800;  // Ensure seconds is positive.
3643   L = (int)(seconds / 86400) - 719162 + 68569 + 2440588;
3644   N = 4 * L / 146097;
3645   L = L - (146097 * N + 3) / 4;
3646   I = 4000 * (L + 1) / 1461001;
3647   L = L - 1461 * I / 4 + 31;
3648   J = 80 * L / 2447;
3649   K = L - 2447 * J / 80;
3650   L = J / 11;
3651   J = J + 2 - 12 * L;
3652   I = 100 * (N - 49) + I + L;
3653 
3654   sec = seconds % 60;
3655   min = (seconds / 60) % 60;
3656   hour = (seconds / 3600) % 24;
3657 
3658   jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec);
3659   jsonenc_nanos(e, nanos);
3660   jsonenc_putstr(e, "Z\"");
3661 }
3662 
jsonenc_duration(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3663 static void jsonenc_duration(jsonenc* e, const upb_Message* msg,
3664                              const upb_MessageDef* m) {
3665   const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
3666   const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
3667   int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
3668   int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
3669 
3670   if (seconds > 315576000000 || seconds < -315576000000 ||
3671       (seconds < 0) != (nanos < 0)) {
3672     jsonenc_err(e, "bad duration");
3673   }
3674 
3675   if (nanos < 0) {
3676     nanos = -nanos;
3677   }
3678 
3679   jsonenc_printf(e, "\"%" PRId64, seconds);
3680   jsonenc_nanos(e, nanos);
3681   jsonenc_putstr(e, "s\"");
3682 }
3683 
jsonenc_enum(int32_t val,const upb_FieldDef * f,jsonenc * e)3684 static void jsonenc_enum(int32_t val, const upb_FieldDef* f, jsonenc* e) {
3685   const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
3686 
3687   if (strcmp(upb_EnumDef_FullName(e_def), "google.protobuf.NullValue") == 0) {
3688     jsonenc_putstr(e, "null");
3689   } else {
3690     const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
3691 
3692     if (ev) {
3693       jsonenc_printf(e, "\"%s\"", upb_EnumValueDef_Name(ev));
3694     } else {
3695       jsonenc_printf(e, "%" PRId32, val);
3696     }
3697   }
3698 }
3699 
jsonenc_bytes(jsonenc * e,upb_StringView str)3700 static void jsonenc_bytes(jsonenc* e, upb_StringView str) {
3701   /* This is the regular base64, not the "web-safe" version. */
3702   static const char base64[] =
3703       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3704   const unsigned char* ptr = (unsigned char*)str.data;
3705   const unsigned char* end = UPB_PTRADD(ptr, str.size);
3706   char buf[4];
3707 
3708   jsonenc_putstr(e, "\"");
3709 
3710   while (end - ptr >= 3) {
3711     buf[0] = base64[ptr[0] >> 2];
3712     buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
3713     buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
3714     buf[3] = base64[ptr[2] & 0x3f];
3715     jsonenc_putbytes(e, buf, 4);
3716     ptr += 3;
3717   }
3718 
3719   switch (end - ptr) {
3720     case 2:
3721       buf[0] = base64[ptr[0] >> 2];
3722       buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
3723       buf[2] = base64[(ptr[1] & 0xf) << 2];
3724       buf[3] = '=';
3725       jsonenc_putbytes(e, buf, 4);
3726       break;
3727     case 1:
3728       buf[0] = base64[ptr[0] >> 2];
3729       buf[1] = base64[((ptr[0] & 0x3) << 4)];
3730       buf[2] = '=';
3731       buf[3] = '=';
3732       jsonenc_putbytes(e, buf, 4);
3733       break;
3734   }
3735 
3736   jsonenc_putstr(e, "\"");
3737 }
3738 
jsonenc_stringbody(jsonenc * e,upb_StringView str)3739 static void jsonenc_stringbody(jsonenc* e, upb_StringView str) {
3740   const char* ptr = str.data;
3741   const char* end = UPB_PTRADD(ptr, str.size);
3742 
3743   while (ptr < end) {
3744     switch (*ptr) {
3745       case '\n':
3746         jsonenc_putstr(e, "\\n");
3747         break;
3748       case '\r':
3749         jsonenc_putstr(e, "\\r");
3750         break;
3751       case '\t':
3752         jsonenc_putstr(e, "\\t");
3753         break;
3754       case '\"':
3755         jsonenc_putstr(e, "\\\"");
3756         break;
3757       case '\f':
3758         jsonenc_putstr(e, "\\f");
3759         break;
3760       case '\b':
3761         jsonenc_putstr(e, "\\b");
3762         break;
3763       case '\\':
3764         jsonenc_putstr(e, "\\\\");
3765         break;
3766       default:
3767         if ((uint8_t)*ptr < 0x20) {
3768           jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
3769         } else {
3770           /* This could be a non-ASCII byte.  We rely on the string being valid
3771            * UTF-8. */
3772           jsonenc_putbytes(e, ptr, 1);
3773         }
3774         break;
3775     }
3776     ptr++;
3777   }
3778 }
3779 
jsonenc_string(jsonenc * e,upb_StringView str)3780 static void jsonenc_string(jsonenc* e, upb_StringView str) {
3781   jsonenc_putstr(e, "\"");
3782   jsonenc_stringbody(e, str);
3783   jsonenc_putstr(e, "\"");
3784 }
3785 
upb_JsonEncode_HandleSpecialDoubles(jsonenc * e,double val)3786 static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) {
3787   if (val == INFINITY) {
3788     jsonenc_putstr(e, "\"Infinity\"");
3789   } else if (val == -INFINITY) {
3790     jsonenc_putstr(e, "\"-Infinity\"");
3791   } else if (val != val) {
3792     jsonenc_putstr(e, "\"NaN\"");
3793   } else {
3794     return false;
3795   }
3796   return true;
3797 }
3798 
upb_JsonEncode_Double(jsonenc * e,double val)3799 static void upb_JsonEncode_Double(jsonenc* e, double val) {
3800   if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
3801   char buf[32];
3802   _upb_EncodeRoundTripDouble(val, buf, sizeof(buf));
3803   jsonenc_putstr(e, buf);
3804 }
3805 
upb_JsonEncode_Float(jsonenc * e,float val)3806 static void upb_JsonEncode_Float(jsonenc* e, float val) {
3807   if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
3808   char buf[32];
3809   _upb_EncodeRoundTripFloat(val, buf, sizeof(buf));
3810   jsonenc_putstr(e, buf);
3811 }
3812 
jsonenc_wrapper(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3813 static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
3814                             const upb_MessageDef* m) {
3815   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1);
3816   upb_MessageValue val = upb_Message_Get(msg, val_f);
3817   jsonenc_scalar(e, val, val_f);
3818 }
3819 
jsonenc_getanymsg(jsonenc * e,upb_StringView type_url)3820 static const upb_MessageDef* jsonenc_getanymsg(jsonenc* e,
3821                                                upb_StringView type_url) {
3822   /* Find last '/', if any. */
3823   const char* end = type_url.data + type_url.size;
3824   const char* ptr = end;
3825   const upb_MessageDef* ret;
3826 
3827   if (!e->ext_pool) {
3828     jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
3829   }
3830 
3831   if (type_url.size == 0) goto badurl;
3832 
3833   while (true) {
3834     if (--ptr == type_url.data) {
3835       /* Type URL must contain at least one '/', with host before. */
3836       goto badurl;
3837     }
3838     if (*ptr == '/') {
3839       ptr++;
3840       break;
3841     }
3842   }
3843 
3844   ret = upb_DefPool_FindMessageByNameWithSize(e->ext_pool, ptr, end - ptr);
3845 
3846   if (!ret) {
3847     jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
3848   }
3849 
3850   return ret;
3851 
3852 badurl:
3853   jsonenc_errf(e, "Bad type URL: " UPB_STRINGVIEW_FORMAT,
3854                UPB_STRINGVIEW_ARGS(type_url));
3855 }
3856 
jsonenc_any(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3857 static void jsonenc_any(jsonenc* e, const upb_Message* msg,
3858                         const upb_MessageDef* m) {
3859   const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
3860   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
3861   upb_StringView type_url = upb_Message_Get(msg, type_url_f).str_val;
3862   upb_StringView value = upb_Message_Get(msg, value_f).str_val;
3863   const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url);
3864   const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
3865   upb_Arena* arena = jsonenc_arena(e);
3866   upb_Message* any = upb_Message_New(any_m, arena);
3867 
3868   if (upb_Decode(value.data, value.size, any, any_layout, NULL, 0, arena) !=
3869       kUpb_DecodeStatus_Ok) {
3870     jsonenc_err(e, "Error decoding message in Any");
3871   }
3872 
3873   jsonenc_putstr(e, "{\"@type\":");
3874   jsonenc_string(e, type_url);
3875 
3876   if (upb_MessageDef_WellKnownType(any_m) == kUpb_WellKnown_Unspecified) {
3877     /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */
3878     jsonenc_msgfields(e, any, any_m, false);
3879   } else {
3880     /* Well-known type: {"@type": "...","value": <well-known encoding>} */
3881     jsonenc_putstr(e, ",\"value\":");
3882     jsonenc_msgfield(e, any, any_m);
3883   }
3884 
3885   jsonenc_putstr(e, "}");
3886 }
3887 
jsonenc_putsep(jsonenc * e,const char * str,bool * first)3888 static void jsonenc_putsep(jsonenc* e, const char* str, bool* first) {
3889   if (*first) {
3890     *first = false;
3891   } else {
3892     jsonenc_putstr(e, str);
3893   }
3894 }
3895 
jsonenc_fieldpath(jsonenc * e,upb_StringView path)3896 static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) {
3897   const char* ptr = path.data;
3898   const char* end = ptr + path.size;
3899 
3900   while (ptr < end) {
3901     char ch = *ptr;
3902 
3903     if (ch >= 'A' && ch <= 'Z') {
3904       jsonenc_err(e, "Field mask element may not have upper-case letter.");
3905     } else if (ch == '_') {
3906       if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') {
3907         jsonenc_err(e, "Underscore must be followed by a lowercase letter.");
3908       }
3909       ch = *++ptr - 32;
3910     }
3911 
3912     jsonenc_putbytes(e, &ch, 1);
3913     ptr++;
3914   }
3915 }
3916 
jsonenc_fieldmask(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3917 static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
3918                               const upb_MessageDef* m) {
3919   const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
3920   const upb_Array* paths = upb_Message_Get(msg, paths_f).array_val;
3921   bool first = true;
3922   size_t i, n = 0;
3923 
3924   if (paths) n = upb_Array_Size(paths);
3925 
3926   jsonenc_putstr(e, "\"");
3927 
3928   for (i = 0; i < n; i++) {
3929     jsonenc_putsep(e, ",", &first);
3930     jsonenc_fieldpath(e, upb_Array_Get(paths, i).str_val);
3931   }
3932 
3933   jsonenc_putstr(e, "\"");
3934 }
3935 
jsonenc_struct(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3936 static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
3937                            const upb_MessageDef* m) {
3938   const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
3939   const upb_Map* fields = upb_Message_Get(msg, fields_f).map_val;
3940   const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
3941   const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
3942   size_t iter = kUpb_Map_Begin;
3943   bool first = true;
3944 
3945   jsonenc_putstr(e, "{");
3946 
3947   if (fields) {
3948     while (upb_MapIterator_Next(fields, &iter)) {
3949       upb_MessageValue key = upb_MapIterator_Key(fields, iter);
3950       upb_MessageValue val = upb_MapIterator_Value(fields, iter);
3951 
3952       jsonenc_putsep(e, ",", &first);
3953       jsonenc_string(e, key.str_val);
3954       jsonenc_putstr(e, ":");
3955       jsonenc_value(e, val.msg_val, upb_FieldDef_MessageSubDef(value_f));
3956     }
3957   }
3958 
3959   jsonenc_putstr(e, "}");
3960 }
3961 
jsonenc_listvalue(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3962 static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg,
3963                               const upb_MessageDef* m) {
3964   const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
3965   const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f);
3966   const upb_Array* values = upb_Message_Get(msg, values_f).array_val;
3967   size_t i;
3968   bool first = true;
3969 
3970   jsonenc_putstr(e, "[");
3971 
3972   if (values) {
3973     const size_t size = upb_Array_Size(values);
3974     for (i = 0; i < size; i++) {
3975       upb_MessageValue elem = upb_Array_Get(values, i);
3976 
3977       jsonenc_putsep(e, ",", &first);
3978       jsonenc_value(e, elem.msg_val, values_m);
3979     }
3980   }
3981 
3982   jsonenc_putstr(e, "]");
3983 }
3984 
jsonenc_value(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)3985 static void jsonenc_value(jsonenc* e, const upb_Message* msg,
3986                           const upb_MessageDef* m) {
3987   /* TODO(haberman): do we want a reflection method to get oneof case? */
3988   size_t iter = kUpb_Message_Begin;
3989   const upb_FieldDef* f;
3990   upb_MessageValue val;
3991 
3992   if (!upb_Message_Next(msg, m, NULL, &f, &val, &iter)) {
3993     jsonenc_err(e, "No value set in Value proto");
3994   }
3995 
3996   switch (upb_FieldDef_Number(f)) {
3997     case 1:
3998       jsonenc_putstr(e, "null");
3999       break;
4000     case 2:
4001       upb_JsonEncode_Double(e, val.double_val);
4002       break;
4003     case 3:
4004       jsonenc_string(e, val.str_val);
4005       break;
4006     case 4:
4007       jsonenc_putstr(e, val.bool_val ? "true" : "false");
4008       break;
4009     case 5:
4010       jsonenc_struct(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4011       break;
4012     case 6:
4013       jsonenc_listvalue(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4014       break;
4015   }
4016 }
4017 
jsonenc_msgfield(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)4018 static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
4019                              const upb_MessageDef* m) {
4020   switch (upb_MessageDef_WellKnownType(m)) {
4021     case kUpb_WellKnown_Unspecified:
4022       jsonenc_msg(e, msg, m);
4023       break;
4024     case kUpb_WellKnown_Any:
4025       jsonenc_any(e, msg, m);
4026       break;
4027     case kUpb_WellKnown_FieldMask:
4028       jsonenc_fieldmask(e, msg, m);
4029       break;
4030     case kUpb_WellKnown_Duration:
4031       jsonenc_duration(e, msg, m);
4032       break;
4033     case kUpb_WellKnown_Timestamp:
4034       jsonenc_timestamp(e, msg, m);
4035       break;
4036     case kUpb_WellKnown_DoubleValue:
4037     case kUpb_WellKnown_FloatValue:
4038     case kUpb_WellKnown_Int64Value:
4039     case kUpb_WellKnown_UInt64Value:
4040     case kUpb_WellKnown_Int32Value:
4041     case kUpb_WellKnown_UInt32Value:
4042     case kUpb_WellKnown_StringValue:
4043     case kUpb_WellKnown_BytesValue:
4044     case kUpb_WellKnown_BoolValue:
4045       jsonenc_wrapper(e, msg, m);
4046       break;
4047     case kUpb_WellKnown_Value:
4048       jsonenc_value(e, msg, m);
4049       break;
4050     case kUpb_WellKnown_ListValue:
4051       jsonenc_listvalue(e, msg, m);
4052       break;
4053     case kUpb_WellKnown_Struct:
4054       jsonenc_struct(e, msg, m);
4055       break;
4056   }
4057 }
4058 
jsonenc_scalar(jsonenc * e,upb_MessageValue val,const upb_FieldDef * f)4059 static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
4060                            const upb_FieldDef* f) {
4061   switch (upb_FieldDef_CType(f)) {
4062     case kUpb_CType_Bool:
4063       jsonenc_putstr(e, val.bool_val ? "true" : "false");
4064       break;
4065     case kUpb_CType_Float:
4066       upb_JsonEncode_Float(e, val.float_val);
4067       break;
4068     case kUpb_CType_Double:
4069       upb_JsonEncode_Double(e, val.double_val);
4070       break;
4071     case kUpb_CType_Int32:
4072       jsonenc_printf(e, "%" PRId32, val.int32_val);
4073       break;
4074     case kUpb_CType_UInt32:
4075       jsonenc_printf(e, "%" PRIu32, val.uint32_val);
4076       break;
4077     case kUpb_CType_Int64:
4078       jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
4079       break;
4080     case kUpb_CType_UInt64:
4081       jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
4082       break;
4083     case kUpb_CType_String:
4084       jsonenc_string(e, val.str_val);
4085       break;
4086     case kUpb_CType_Bytes:
4087       jsonenc_bytes(e, val.str_val);
4088       break;
4089     case kUpb_CType_Enum:
4090       jsonenc_enum(val.int32_val, f, e);
4091       break;
4092     case kUpb_CType_Message:
4093       jsonenc_msgfield(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
4094       break;
4095   }
4096 }
4097 
jsonenc_mapkey(jsonenc * e,upb_MessageValue val,const upb_FieldDef * f)4098 static void jsonenc_mapkey(jsonenc* e, upb_MessageValue val,
4099                            const upb_FieldDef* f) {
4100   jsonenc_putstr(e, "\"");
4101 
4102   switch (upb_FieldDef_CType(f)) {
4103     case kUpb_CType_Bool:
4104       jsonenc_putstr(e, val.bool_val ? "true" : "false");
4105       break;
4106     case kUpb_CType_Int32:
4107       jsonenc_printf(e, "%" PRId32, val.int32_val);
4108       break;
4109     case kUpb_CType_UInt32:
4110       jsonenc_printf(e, "%" PRIu32, val.uint32_val);
4111       break;
4112     case kUpb_CType_Int64:
4113       jsonenc_printf(e, "%" PRId64, val.int64_val);
4114       break;
4115     case kUpb_CType_UInt64:
4116       jsonenc_printf(e, "%" PRIu64, val.uint64_val);
4117       break;
4118     case kUpb_CType_String:
4119       jsonenc_stringbody(e, val.str_val);
4120       break;
4121     default:
4122       UPB_UNREACHABLE();
4123   }
4124 
4125   jsonenc_putstr(e, "\":");
4126 }
4127 
jsonenc_array(jsonenc * e,const upb_Array * arr,const upb_FieldDef * f)4128 static void jsonenc_array(jsonenc* e, const upb_Array* arr,
4129                           const upb_FieldDef* f) {
4130   size_t i;
4131   size_t size = arr ? upb_Array_Size(arr) : 0;
4132   bool first = true;
4133 
4134   jsonenc_putstr(e, "[");
4135 
4136   for (i = 0; i < size; i++) {
4137     jsonenc_putsep(e, ",", &first);
4138     jsonenc_scalar(e, upb_Array_Get(arr, i), f);
4139   }
4140 
4141   jsonenc_putstr(e, "]");
4142 }
4143 
jsonenc_map(jsonenc * e,const upb_Map * map,const upb_FieldDef * f)4144 static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) {
4145   const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
4146   const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
4147   const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
4148   size_t iter = kUpb_Map_Begin;
4149   bool first = true;
4150 
4151   jsonenc_putstr(e, "{");
4152 
4153   if (map) {
4154     while (upb_MapIterator_Next(map, &iter)) {
4155       jsonenc_putsep(e, ",", &first);
4156       jsonenc_mapkey(e, upb_MapIterator_Key(map, iter), key_f);
4157       jsonenc_scalar(e, upb_MapIterator_Value(map, iter), val_f);
4158     }
4159   }
4160 
4161   jsonenc_putstr(e, "}");
4162 }
4163 
jsonenc_fieldval(jsonenc * e,const upb_FieldDef * f,upb_MessageValue val,bool * first)4164 static void jsonenc_fieldval(jsonenc* e, const upb_FieldDef* f,
4165                              upb_MessageValue val, bool* first) {
4166   const char* name;
4167 
4168   jsonenc_putsep(e, ",", first);
4169 
4170   if (upb_FieldDef_IsExtension(f)) {
4171     // TODO: For MessageSet, I would have expected this to print the message
4172     // name here, but Python doesn't appear to do this. We should do more
4173     // research here about what various implementations do.
4174     jsonenc_printf(e, "\"[%s]\":", upb_FieldDef_FullName(f));
4175   } else {
4176     if (e->options & upb_JsonEncode_UseProtoNames) {
4177       name = upb_FieldDef_Name(f);
4178     } else {
4179       name = upb_FieldDef_JsonName(f);
4180     }
4181     jsonenc_printf(e, "\"%s\":", name);
4182   }
4183 
4184   if (upb_FieldDef_IsMap(f)) {
4185     jsonenc_map(e, val.map_val, f);
4186   } else if (upb_FieldDef_IsRepeated(f)) {
4187     jsonenc_array(e, val.array_val, f);
4188   } else {
4189     jsonenc_scalar(e, val, f);
4190   }
4191 }
4192 
jsonenc_msgfields(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m,bool first)4193 static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
4194                               const upb_MessageDef* m, bool first) {
4195   upb_MessageValue val;
4196   const upb_FieldDef* f;
4197 
4198   if (e->options & upb_JsonEncode_EmitDefaults) {
4199     /* Iterate over all fields. */
4200     int i = 0;
4201     int n = upb_MessageDef_FieldCount(m);
4202     for (i = 0; i < n; i++) {
4203       f = upb_MessageDef_Field(m, i);
4204       if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
4205         jsonenc_fieldval(e, f, upb_Message_Get(msg, f), &first);
4206       }
4207     }
4208   } else {
4209     /* Iterate over non-empty fields. */
4210     size_t iter = kUpb_Message_Begin;
4211     while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
4212       jsonenc_fieldval(e, f, val, &first);
4213     }
4214   }
4215 }
4216 
jsonenc_msg(jsonenc * e,const upb_Message * msg,const upb_MessageDef * m)4217 static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
4218                         const upb_MessageDef* m) {
4219   jsonenc_putstr(e, "{");
4220   jsonenc_msgfields(e, msg, m, true);
4221   jsonenc_putstr(e, "}");
4222 }
4223 
jsonenc_nullz(jsonenc * e,size_t size)4224 static size_t jsonenc_nullz(jsonenc* e, size_t size) {
4225   size_t ret = e->ptr - e->buf + e->overflow;
4226 
4227   if (size > 0) {
4228     if (e->ptr == e->end) e->ptr--;
4229     *e->ptr = '\0';
4230   }
4231 
4232   return ret;
4233 }
4234 
upb_JsonEncode(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,int options,char * buf,size_t size,upb_Status * status)4235 size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m,
4236                       const upb_DefPool* ext_pool, int options, char* buf,
4237                       size_t size, upb_Status* status) {
4238   jsonenc e;
4239 
4240   e.buf = buf;
4241   e.ptr = buf;
4242   e.end = UPB_PTRADD(buf, size);
4243   e.overflow = 0;
4244   e.options = options;
4245   e.ext_pool = ext_pool;
4246   e.status = status;
4247   e.arena = NULL;
4248 
4249   if (setjmp(e.err)) return -1;
4250 
4251   jsonenc_msgfield(&e, msg, m);
4252   if (e.arena) upb_Arena_Free(e.arena);
4253   return jsonenc_nullz(&e, size);
4254 }
4255 
4256 /** upb/mini_table.c ************************************************************/
4257 
4258 #include <inttypes.h>
4259 #include <setjmp.h>
4260 
4261 
4262 // Must be last.
4263 
4264 typedef enum {
4265   kUpb_EncodedType_Double = 0,
4266   kUpb_EncodedType_Float = 1,
4267   kUpb_EncodedType_Fixed32 = 2,
4268   kUpb_EncodedType_Fixed64 = 3,
4269   kUpb_EncodedType_SFixed32 = 4,
4270   kUpb_EncodedType_SFixed64 = 5,
4271   kUpb_EncodedType_Int32 = 6,
4272   kUpb_EncodedType_UInt32 = 7,
4273   kUpb_EncodedType_SInt32 = 8,
4274   kUpb_EncodedType_Int64 = 9,
4275   kUpb_EncodedType_UInt64 = 10,
4276   kUpb_EncodedType_SInt64 = 11,
4277   kUpb_EncodedType_Enum = 12,
4278   kUpb_EncodedType_Bool = 13,
4279   kUpb_EncodedType_Bytes = 14,
4280   kUpb_EncodedType_String = 15,
4281   kUpb_EncodedType_Group = 16,
4282   kUpb_EncodedType_Message = 17,
4283 
4284   kUpb_EncodedType_RepeatedBase = 20,
4285 } upb_EncodedType;
4286 
4287 typedef enum {
4288   kUpb_EncodedFieldModifier_FlipPacked = 1 << 0,
4289   kUpb_EncodedFieldModifier_IsClosedEnum = 1 << 1,
4290   // upb only.
4291   kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
4292   kUpb_EncodedFieldModifier_IsRequired = 1 << 3,
4293 } upb_EncodedFieldModifier;
4294 
4295 enum {
4296   kUpb_EncodedValue_MinField = ' ',
4297   kUpb_EncodedValue_MaxField = 'K',
4298   kUpb_EncodedValue_MinModifier = 'L',
4299   kUpb_EncodedValue_MaxModifier = '[',
4300   kUpb_EncodedValue_End = '^',
4301   kUpb_EncodedValue_MinSkip = '_',
4302   kUpb_EncodedValue_MaxSkip = '~',
4303   kUpb_EncodedValue_OneofSeparator = '~',
4304   kUpb_EncodedValue_FieldSeparator = '|',
4305   kUpb_EncodedValue_MinOneofField = ' ',
4306   kUpb_EncodedValue_MaxOneofField = 'b',
4307   kUpb_EncodedValue_MaxEnumMask = 'A',
4308 };
4309 
upb_ToBase92(int8_t ch)4310 char upb_ToBase92(int8_t ch) {
4311   static const char kUpb_ToBase92[] = {
4312       ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
4313       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
4314       '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
4315       'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
4316       'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
4317       'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
4318       'w', 'x', 'y', 'z', '{', '|', '}', '~',
4319   };
4320 
4321   UPB_ASSERT(0 <= ch && ch < 92);
4322   return kUpb_ToBase92[ch];
4323 }
4324 
upb_FromBase92(uint8_t ch)4325 char upb_FromBase92(uint8_t ch) {
4326   static const int8_t kUpb_FromBase92[] = {
4327       0,  1,  -1, 2,  3,  4,  5,  -1, 6,  7,  8,  9,  10, 11, 12, 13,
4328       14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
4329       30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
4330       46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, 58, 59, 60,
4331       61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
4332       77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
4333   };
4334 
4335   if (' ' > ch || ch > '~') return -1;
4336   return kUpb_FromBase92[ch - ' '];
4337 }
4338 
upb_IsTypePackable(upb_FieldType type)4339 bool upb_IsTypePackable(upb_FieldType type) {
4340   // clang-format off
4341   static const unsigned kUnpackableTypes =
4342       (1 << kUpb_FieldType_String) |
4343       (1 << kUpb_FieldType_Bytes) |
4344       (1 << kUpb_FieldType_Message) |
4345       (1 << kUpb_FieldType_Group);
4346   // clang-format on
4347   return (1 << type) & ~kUnpackableTypes;
4348 }
4349 
4350 /** upb_MtDataEncoder *********************************************************/
4351 
4352 typedef struct {
4353   uint64_t present_values_mask;
4354   uint32_t last_written_value;
4355 } upb_MtDataEncoderInternal_EnumState;
4356 
4357 typedef struct {
4358   uint64_t msg_modifiers;
4359   uint32_t last_field_num;
4360   enum {
4361     kUpb_OneofState_NotStarted,
4362     kUpb_OneofState_StartedOneof,
4363     kUpb_OneofState_EmittedOneofField,
4364   } oneof_state;
4365 } upb_MtDataEncoderInternal_MsgState;
4366 
4367 typedef struct {
4368   char* buf_start;  // Only for checking kUpb_MtDataEncoder_MinSize.
4369   union {
4370     upb_MtDataEncoderInternal_EnumState enum_state;
4371     upb_MtDataEncoderInternal_MsgState msg_state;
4372   } state;
4373 } upb_MtDataEncoderInternal;
4374 
upb_MtDataEncoder_GetInternal(upb_MtDataEncoder * e,char * buf_start)4375 static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
4376     upb_MtDataEncoder* e, char* buf_start) {
4377   UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
4378   upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
4379   ret->buf_start = buf_start;
4380   return ret;
4381 }
4382 
upb_MtDataEncoder_Put(upb_MtDataEncoder * e,char * ptr,char ch)4383 static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
4384   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
4385   UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
4386   if (ptr == e->end) return NULL;
4387   *ptr++ = upb_ToBase92(ch);
4388   return ptr;
4389 }
4390 
upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder * e,char * ptr,uint32_t val,int min,int max)4391 static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
4392                                                uint32_t val, int min, int max) {
4393   int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
4394   UPB_ASSERT(shift <= 6);
4395   uint32_t mask = (1 << shift) - 1;
4396   do {
4397     uint32_t bits = val & mask;
4398     ptr = upb_MtDataEncoder_Put(e, ptr, bits + upb_FromBase92(min));
4399     if (!ptr) return NULL;
4400     val >>= shift;
4401   } while (val);
4402   return ptr;
4403 }
4404 
upb_MtDataEncoder_PutModifier(upb_MtDataEncoder * e,char * ptr,uint64_t mod)4405 char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
4406                                     uint64_t mod) {
4407   if (mod) {
4408     ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
4409                                             kUpb_EncodedValue_MinModifier,
4410                                             kUpb_EncodedValue_MaxModifier);
4411   }
4412   return ptr;
4413 }
4414 
upb_MtDataEncoder_StartMessage(upb_MtDataEncoder * e,char * ptr,uint64_t msg_mod)4415 char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
4416                                      uint64_t msg_mod) {
4417   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4418   in->state.msg_state.msg_modifiers = msg_mod;
4419   in->state.msg_state.last_field_num = 0;
4420   in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
4421   return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
4422 }
4423 
upb_MtDataEncoder_PutField(upb_MtDataEncoder * e,char * ptr,upb_FieldType type,uint32_t field_num,uint64_t field_mod)4424 char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
4425                                  upb_FieldType type, uint32_t field_num,
4426                                  uint64_t field_mod) {
4427   static const char kUpb_TypeToEncoded[] = {
4428       [kUpb_FieldType_Double] = kUpb_EncodedType_Double,
4429       [kUpb_FieldType_Float] = kUpb_EncodedType_Float,
4430       [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
4431       [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
4432       [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
4433       [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
4434       [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
4435       [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
4436       [kUpb_FieldType_String] = kUpb_EncodedType_String,
4437       [kUpb_FieldType_Group] = kUpb_EncodedType_Group,
4438       [kUpb_FieldType_Message] = kUpb_EncodedType_Message,
4439       [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
4440       [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
4441       [kUpb_FieldType_Enum] = kUpb_EncodedType_Enum,
4442       [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
4443       [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
4444       [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
4445       [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
4446   };
4447 
4448   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4449   if (field_num <= in->state.msg_state.last_field_num) return NULL;
4450   if (in->state.msg_state.last_field_num + 1 != field_num) {
4451     // Put skip.
4452     UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
4453     uint32_t skip = field_num - in->state.msg_state.last_field_num;
4454     ptr = upb_MtDataEncoder_PutBase92Varint(
4455         e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
4456     if (!ptr) return NULL;
4457   }
4458   in->state.msg_state.last_field_num = field_num;
4459 
4460   uint32_t encoded_modifiers = 0;
4461 
4462   // Put field type.
4463   if (type == kUpb_FieldType_Enum &&
4464       !(field_mod & kUpb_FieldModifier_IsClosedEnum)) {
4465     type = kUpb_FieldType_Int32;
4466   }
4467 
4468   int encoded_type = kUpb_TypeToEncoded[type];
4469   if (field_mod & kUpb_FieldModifier_IsRepeated) {
4470     // Repeated fields shift the type number up (unlike other modifiers which
4471     // are bit flags).
4472     encoded_type += kUpb_EncodedType_RepeatedBase;
4473 
4474     if (upb_IsTypePackable(type)) {
4475       bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
4476       bool default_is_packed = in->state.msg_state.msg_modifiers &
4477                                kUpb_MessageModifier_DefaultIsPacked;
4478       if (field_is_packed != default_is_packed) {
4479         encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
4480       }
4481     }
4482   }
4483   ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type);
4484   if (!ptr) return NULL;
4485 
4486   if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
4487     encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
4488   }
4489   if (field_mod & kUpb_FieldModifier_IsRequired) {
4490     encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
4491   }
4492   return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
4493 }
4494 
upb_MtDataEncoder_StartOneof(upb_MtDataEncoder * e,char * ptr)4495 char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
4496   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4497   if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
4498     ptr = upb_MtDataEncoder_Put(e, ptr, upb_FromBase92(kUpb_EncodedValue_End));
4499   } else {
4500     ptr = upb_MtDataEncoder_Put(
4501         e, ptr, upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
4502   }
4503   in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
4504   return ptr;
4505 }
4506 
upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder * e,char * ptr,uint32_t field_num)4507 char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
4508                                       uint32_t field_num) {
4509   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4510   if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
4511     ptr = upb_MtDataEncoder_Put(
4512         e, ptr, upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
4513     if (!ptr) return NULL;
4514   }
4515   ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, upb_ToBase92(0),
4516                                           upb_ToBase92(63));
4517   in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
4518   return ptr;
4519 }
4520 
upb_MtDataEncoder_StartEnum(upb_MtDataEncoder * e)4521 void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e) {
4522   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, NULL);
4523   in->state.enum_state.present_values_mask = 0;
4524   in->state.enum_state.last_written_value = 0;
4525 }
4526 
upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder * e,char * ptr)4527 static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
4528                                                   char* ptr) {
4529   upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
4530   ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
4531   in->state.enum_state.present_values_mask = 0;
4532   in->state.enum_state.last_written_value += 5;
4533   return ptr;
4534 }
4535 
upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder * e,char * ptr,uint32_t val)4536 char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
4537                                      uint32_t val) {
4538   // TODO(b/229641772): optimize this encoding.
4539   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4540   UPB_ASSERT(val >= in->state.enum_state.last_written_value);
4541   uint32_t delta = val - in->state.enum_state.last_written_value;
4542   if (delta >= 5 && in->state.enum_state.present_values_mask) {
4543     ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
4544     delta -= 5;
4545   }
4546 
4547   if (delta >= 5) {
4548     ptr = upb_MtDataEncoder_PutBase92Varint(
4549         e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
4550     in->state.enum_state.last_written_value += delta;
4551     delta = 0;
4552   }
4553 
4554   UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
4555   in->state.enum_state.present_values_mask |= 1ULL << delta;
4556   return ptr;
4557 }
4558 
upb_MtDataEncoder_EndEnum(upb_MtDataEncoder * e,char * ptr)4559 char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
4560   upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
4561   if (!in->state.enum_state.present_values_mask) return ptr;
4562   return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
4563 }
4564 
upb_MiniTable_FindFieldByNumber(const upb_MiniTable * table,uint32_t number)4565 const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
4566     const upb_MiniTable* table, uint32_t number) {
4567   int n = table->field_count;
4568   for (int i = 0; i < n; i++) {
4569     if (table->fields[i].number == number) {
4570       return &table->fields[i];
4571     }
4572   }
4573   return NULL;
4574 }
4575 
4576 /** Data decoder **************************************************************/
4577 
4578 // Note: we sort by this number when calculating layout order.
4579 typedef enum {
4580   kUpb_LayoutItemType_OneofCase,   // Oneof case.
4581   kUpb_LayoutItemType_OneofField,  // Oneof field data.
4582   kUpb_LayoutItemType_Field,       // Non-oneof field data.
4583 
4584   kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
4585 } upb_LayoutItemType;
4586 
4587 #define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
4588 
4589 typedef struct {
4590   // Index of the corresponding field.  When this is a oneof field, the field's
4591   // offset will be the index of the next field in a linked list.
4592   uint16_t field_index;
4593   uint16_t offset;
4594   upb_FieldRep rep;
4595   upb_LayoutItemType type;
4596 } upb_LayoutItem;
4597 
4598 typedef struct {
4599   upb_LayoutItem* data;
4600   size_t size;
4601   size_t capacity;
4602 } upb_LayoutItemVector;
4603 
4604 typedef struct {
4605   const char* end;
4606   upb_MiniTable* table;
4607   upb_MiniTable_Field* fields;
4608   upb_MiniTablePlatform platform;
4609   upb_LayoutItemVector vec;
4610   upb_Arena* arena;
4611   upb_Status* status;
4612   jmp_buf err;
4613 } upb_MtDecoder;
4614 
4615 UPB_PRINTF(2, 3)
upb_MtDecoder_ErrorFormat(upb_MtDecoder * d,const char * fmt,...)4616 UPB_NORETURN static void upb_MtDecoder_ErrorFormat(upb_MtDecoder* d,
4617                                                    const char* fmt, ...) {
4618   va_list argp;
4619   upb_Status_SetErrorMessage(d->status, "Error building mini table: ");
4620   va_start(argp, fmt);
4621   upb_Status_VAppendErrorFormat(d->status, fmt, argp);
4622   va_end(argp);
4623   UPB_LONGJMP(d->err, 1);
4624 }
4625 
upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder * d,const void * ptr)4626 static void upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder* d, const void* ptr) {
4627   if (!ptr) upb_MtDecoder_ErrorFormat(d, "Out of memory");
4628 }
4629 
4630 // In each field's offset, we temporarily store a presence classifier:
4631 enum PresenceClass {
4632   kNoPresence = 0,
4633   kHasbitPresence = 1,
4634   kRequiredPresence = 2,
4635   kOneofBase = 3,
4636   // Negative values refer to a specific oneof with that number.  Positive
4637   // values >= kOneofBase indicate that this field is in a oneof, and specify
4638   // the next field in this oneof's linked list.
4639 };
4640 
upb_MiniTable_DecodeBase92Varint(upb_MtDecoder * d,const char * ptr,char first_ch,uint8_t min,uint8_t max,uint32_t * out_val)4641 static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
4642                                                     const char* ptr,
4643                                                     char first_ch, uint8_t min,
4644                                                     uint8_t max,
4645                                                     uint32_t* out_val) {
4646   uint32_t val = 0;
4647   uint32_t shift = 0;
4648   const int bits_per_char =
4649       _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min));
4650   char ch = first_ch;
4651   while (1) {
4652     uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min);
4653     UPB_ASSERT(shift < 32);
4654     val |= bits << shift;
4655     if (ptr == d->end || *ptr < min || max < *ptr) {
4656       *out_val = val;
4657       return ptr;
4658     }
4659     ch = *ptr++;
4660     shift += bits_per_char;
4661   }
4662 }
4663 
upb_MiniTable_HasSub(upb_MiniTable_Field * field,uint64_t msg_modifiers)4664 static bool upb_MiniTable_HasSub(upb_MiniTable_Field* field,
4665                                  uint64_t msg_modifiers) {
4666   switch (field->descriptortype) {
4667     case kUpb_FieldType_Message:
4668     case kUpb_FieldType_Group:
4669     case kUpb_FieldType_Enum:
4670       return true;
4671     case kUpb_FieldType_String:
4672       if (!(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
4673         field->descriptortype = kUpb_FieldType_Bytes;
4674       }
4675       return false;
4676     default:
4677       return false;
4678   }
4679 }
4680 
upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field * field)4681 static bool upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field* field) {
4682   return (field->mode & kUpb_FieldMode_Array) &&
4683          upb_IsTypePackable(field->descriptortype);
4684 }
4685 
upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field * field,upb_FieldType type,uint32_t * sub_count,uint64_t msg_modifiers)4686 static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field,
4687                                         upb_FieldType type, uint32_t* sub_count,
4688                                         uint64_t msg_modifiers) {
4689   field->descriptortype = type;
4690   if (upb_MiniTable_HasSub(field, msg_modifiers)) {
4691     field->submsg_index = sub_count ? (*sub_count)++ : 0;
4692   } else {
4693     field->submsg_index = kUpb_NoSub;
4694   }
4695 
4696   if (upb_MtDecoder_FieldIsPackable(field) &&
4697       (msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
4698     field->mode |= kUpb_LabelFlags_IsPacked;
4699   }
4700 }
4701 
upb_MiniTable_SetField(upb_MtDecoder * d,uint8_t ch,upb_MiniTable_Field * field,uint64_t msg_modifiers,uint32_t * sub_count)4702 static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
4703                                    upb_MiniTable_Field* field,
4704                                    uint64_t msg_modifiers,
4705                                    uint32_t* sub_count) {
4706   static const char kUpb_EncodedToFieldRep[] = {
4707       [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
4708       [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
4709       [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
4710       [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
4711       [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
4712       [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
4713       [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
4714       [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
4715       [kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
4716       [kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer,
4717       [kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer,
4718       [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
4719       [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
4720       [kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte,
4721       [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
4722       [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
4723       [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
4724       [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
4725   };
4726 
4727   static const char kUpb_EncodedToType[] = {
4728       [kUpb_EncodedType_Double] = kUpb_FieldType_Double,
4729       [kUpb_EncodedType_Float] = kUpb_FieldType_Float,
4730       [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
4731       [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
4732       [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
4733       [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
4734       [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
4735       [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
4736       [kUpb_EncodedType_String] = kUpb_FieldType_String,
4737       [kUpb_EncodedType_Group] = kUpb_FieldType_Group,
4738       [kUpb_EncodedType_Message] = kUpb_FieldType_Message,
4739       [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
4740       [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
4741       [kUpb_EncodedType_Enum] = kUpb_FieldType_Enum,
4742       [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
4743       [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
4744       [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
4745       [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
4746   };
4747 
4748   int8_t type = upb_FromBase92(ch);
4749   if (ch >= upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
4750     type -= kUpb_EncodedType_RepeatedBase;
4751     field->mode = kUpb_FieldMode_Array;
4752     field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
4753     field->offset = kNoPresence;
4754   } else {
4755     field->mode = kUpb_FieldMode_Scalar;
4756     field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
4757     field->offset = kHasbitPresence;
4758   }
4759   if (type >= 18) {
4760     upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
4761     UPB_UNREACHABLE();
4762   }
4763   upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_count,
4764                               msg_modifiers);
4765 }
4766 
upb_MtDecoder_ModifyField(upb_MtDecoder * d,uint32_t message_modifiers,uint32_t field_modifiers,upb_MiniTable_Field * field)4767 static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
4768                                       uint32_t message_modifiers,
4769                                       uint32_t field_modifiers,
4770                                       upb_MiniTable_Field* field) {
4771   if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
4772     if (!upb_MtDecoder_FieldIsPackable(field)) {
4773       upb_MtDecoder_ErrorFormat(
4774           d, "Cannot flip packed on unpackable field %" PRIu32, field->number);
4775       UPB_UNREACHABLE();
4776     }
4777     field->mode ^= kUpb_LabelFlags_IsPacked;
4778   }
4779 
4780   bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
4781   bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
4782 
4783   // Validate.
4784   if ((singular || required) && field->offset != kHasbitPresence) {
4785     upb_MtDecoder_ErrorFormat(
4786         d, "Invalid modifier(s) for repeated field %" PRIu32, field->number);
4787     UPB_UNREACHABLE();
4788   }
4789   if (singular && required) {
4790     upb_MtDecoder_ErrorFormat(
4791         d, "Field %" PRIu32 " cannot be both singular and required",
4792         field->number);
4793     UPB_UNREACHABLE();
4794   }
4795 
4796   if (singular) field->offset = kNoPresence;
4797   if (required) {
4798     field->offset = kRequiredPresence;
4799   }
4800 }
4801 
upb_MtDecoder_PushItem(upb_MtDecoder * d,upb_LayoutItem item)4802 static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
4803   if (d->vec.size == d->vec.capacity) {
4804     size_t new_cap = UPB_MAX(8, d->vec.size * 2);
4805     d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
4806     upb_MtDecoder_CheckOutOfMemory(d, d->vec.data);
4807     d->vec.capacity = new_cap;
4808   }
4809   d->vec.data[d->vec.size++] = item;
4810 }
4811 
upb_MtDecoder_PushOneof(upb_MtDecoder * d,upb_LayoutItem item)4812 static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
4813   if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
4814     upb_MtDecoder_ErrorFormat(d, "Empty oneof");
4815     UPB_UNREACHABLE();
4816   }
4817   item.field_index -= kOneofBase;
4818 
4819   // Push oneof data.
4820   item.type = kUpb_LayoutItemType_OneofField;
4821   upb_MtDecoder_PushItem(d, item);
4822 
4823   // Push oneof case.
4824   item.rep = kUpb_FieldRep_4Byte;  // Field Number.
4825   item.type = kUpb_LayoutItemType_OneofCase;
4826   upb_MtDecoder_PushItem(d, item);
4827 }
4828 
upb_MtDecoder_SizeOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)4829 size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
4830                                upb_MiniTablePlatform platform) {
4831   static const uint8_t kRepToSize32[] = {
4832       [kUpb_FieldRep_1Byte] = 1,   [kUpb_FieldRep_4Byte] = 4,
4833       [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 8,
4834       [kUpb_FieldRep_8Byte] = 8,
4835   };
4836   static const uint8_t kRepToSize64[] = {
4837       [kUpb_FieldRep_1Byte] = 1,   [kUpb_FieldRep_4Byte] = 4,
4838       [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 16,
4839       [kUpb_FieldRep_8Byte] = 8,
4840   };
4841   UPB_ASSERT(sizeof(upb_StringView) ==
4842              UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
4843   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
4844                                                   : kRepToSize64[rep];
4845 }
4846 
upb_MtDecoder_AlignOfRep(upb_FieldRep rep,upb_MiniTablePlatform platform)4847 size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
4848                                 upb_MiniTablePlatform platform) {
4849   static const uint8_t kRepToAlign32[] = {
4850       [kUpb_FieldRep_1Byte] = 1,   [kUpb_FieldRep_4Byte] = 4,
4851       [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 4,
4852       [kUpb_FieldRep_8Byte] = 8,
4853   };
4854   static const uint8_t kRepToAlign64[] = {
4855       [kUpb_FieldRep_1Byte] = 1,   [kUpb_FieldRep_4Byte] = 4,
4856       [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 8,
4857       [kUpb_FieldRep_8Byte] = 8,
4858   };
4859   UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
4860              UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
4861   return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
4862                                                   : kRepToAlign64[rep];
4863 }
4864 
upb_MtDecoder_DecodeOneofField(upb_MtDecoder * d,const char * ptr,char first_ch,upb_LayoutItem * item)4865 static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
4866                                                   const char* ptr,
4867                                                   char first_ch,
4868                                                   upb_LayoutItem* item) {
4869   uint32_t field_num;
4870   ptr = upb_MiniTable_DecodeBase92Varint(
4871       d, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
4872       kUpb_EncodedValue_MaxOneofField, &field_num);
4873   upb_MiniTable_Field* f =
4874       (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
4875 
4876   if (!f) {
4877     upb_MtDecoder_ErrorFormat(d,
4878                               "Couldn't add field number %" PRIu32
4879                               " to oneof, no such field number.",
4880                               field_num);
4881     UPB_UNREACHABLE();
4882   }
4883   if (f->offset != kHasbitPresence) {
4884     upb_MtDecoder_ErrorFormat(
4885         d,
4886         "Cannot add repeated, required, or singular field %" PRIu32
4887         " to oneof.",
4888         field_num);
4889     UPB_UNREACHABLE();
4890   }
4891 
4892   // Oneof storage must be large enough to accommodate the largest member.
4893   int rep = f->mode >> kUpb_FieldRep_Shift;
4894   if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
4895       upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
4896     item->rep = rep;
4897   }
4898   // Prepend this field to the linked list.
4899   f->offset = item->field_index;
4900   item->field_index = (f - d->fields) + kOneofBase;
4901   return ptr;
4902 }
4903 
upb_MtDecoder_DecodeOneofs(upb_MtDecoder * d,const char * ptr)4904 static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
4905                                               const char* ptr) {
4906   upb_LayoutItem item = {.rep = 0,
4907                          .field_index = kUpb_LayoutItem_IndexSentinel};
4908   while (ptr < d->end) {
4909     char ch = *ptr++;
4910     if (ch == kUpb_EncodedValue_FieldSeparator) {
4911       // Field separator, no action needed.
4912     } else if (ch == kUpb_EncodedValue_OneofSeparator) {
4913       // End of oneof.
4914       upb_MtDecoder_PushOneof(d, item);
4915       item.field_index = kUpb_LayoutItem_IndexSentinel;  // Move to next oneof.
4916     } else {
4917       ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
4918     }
4919   }
4920 
4921   // Push final oneof.
4922   upb_MtDecoder_PushOneof(d, item);
4923   return ptr;
4924 }
4925 
upb_MtDecoder_ParseModifier(upb_MtDecoder * d,const char * ptr,char first_ch,upb_MiniTable_Field * last_field,uint64_t * msg_modifiers)4926 static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
4927                                                const char* ptr, char first_ch,
4928                                                upb_MiniTable_Field* last_field,
4929                                                uint64_t* msg_modifiers) {
4930   uint32_t mod;
4931   ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, first_ch,
4932                                          kUpb_EncodedValue_MinModifier,
4933                                          kUpb_EncodedValue_MaxModifier, &mod);
4934   if (last_field) {
4935     upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
4936   } else {
4937     if (!d->table) {
4938       upb_MtDecoder_ErrorFormat(d, "Extensions cannot have message modifiers");
4939       UPB_UNREACHABLE();
4940     }
4941     *msg_modifiers = mod;
4942   }
4943 
4944   return ptr;
4945 }
4946 
upb_MtDecoder_AllocateSubs(upb_MtDecoder * d,uint32_t sub_count)4947 static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) {
4948   size_t subs_bytes = sizeof(*d->table->subs) * sub_count;
4949   d->table->subs = upb_Arena_Malloc(d->arena, subs_bytes);
4950   upb_MtDecoder_CheckOutOfMemory(d, d->table->subs);
4951 }
4952 
upb_MtDecoder_Parse(upb_MtDecoder * d,const char * ptr,size_t len,void * fields,size_t field_size,uint16_t * field_count,uint32_t * sub_count)4953 static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
4954                                 void* fields, size_t field_size,
4955                                 uint16_t* field_count, uint32_t* sub_count) {
4956   uint64_t msg_modifiers = 0;
4957   uint32_t last_field_number = 0;
4958   upb_MiniTable_Field* last_field = NULL;
4959   bool need_dense_below = d->table != NULL;
4960 
4961   d->end = UPB_PTRADD(ptr, len);
4962 
4963   while (ptr < d->end) {
4964     char ch = *ptr++;
4965     if (ch <= kUpb_EncodedValue_MaxField) {
4966       upb_MiniTable_Field* field = fields;
4967       *field_count += 1;
4968       fields = (char*)fields + field_size;
4969       field->number = ++last_field_number;
4970       last_field = field;
4971       upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_count);
4972     } else if (kUpb_EncodedValue_MinModifier <= ch &&
4973                ch <= kUpb_EncodedValue_MaxModifier) {
4974       ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
4975       if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
4976         d->table->ext |= kUpb_ExtMode_Extendable;
4977       }
4978     } else if (ch == kUpb_EncodedValue_End) {
4979       if (!d->table) {
4980         upb_MtDecoder_ErrorFormat(d, "Extensions cannot have oneofs.");
4981         UPB_UNREACHABLE();
4982       }
4983       ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
4984     } else if (kUpb_EncodedValue_MinSkip <= ch &&
4985                ch <= kUpb_EncodedValue_MaxSkip) {
4986       if (need_dense_below) {
4987         d->table->dense_below = d->table->field_count;
4988         need_dense_below = false;
4989       }
4990       uint32_t skip;
4991       ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, ch,
4992                                              kUpb_EncodedValue_MinSkip,
4993                                              kUpb_EncodedValue_MaxSkip, &skip);
4994       last_field_number += skip;
4995       last_field_number--;  // Next field seen will increment.
4996     }
4997   }
4998 
4999   if (need_dense_below) {
5000     d->table->dense_below = d->table->field_count;
5001   }
5002 }
5003 
upb_MtDecoder_ParseMessage(upb_MtDecoder * d,const char * data,size_t len)5004 static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
5005                                        size_t len) {
5006   // Buffer length is an upper bound on the number of fields. We will return
5007   // what we don't use.
5008   d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
5009   upb_MtDecoder_CheckOutOfMemory(d, d->fields);
5010 
5011   uint32_t sub_count = 0;
5012   d->table->field_count = 0;
5013   d->table->fields = d->fields;
5014   upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
5015                       &d->table->field_count, &sub_count);
5016 
5017   upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
5018                        sizeof(*d->fields) * d->table->field_count);
5019   d->table->fields = d->fields;
5020   upb_MtDecoder_AllocateSubs(d, sub_count);
5021 }
5022 
upb_MtDecoder_CompareFields(const void * _a,const void * _b)5023 int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
5024   const upb_LayoutItem* a = _a;
5025   const upb_LayoutItem* b = _b;
5026   // Currently we just sort by:
5027   //  1. rep (smallest fields first)
5028   //  2. type (oneof cases first)
5029   //  2. field_index (smallest numbers first)
5030   // The main goal of this is to reduce space lost to padding.
5031   // Later we may have more subtle reasons to prefer a different ordering.
5032   const int rep_bits = _upb_Log2Ceiling(kUpb_FieldRep_Max);
5033   const int type_bits = _upb_Log2Ceiling(kUpb_LayoutItemType_Max);
5034   const int idx_bits = (sizeof(a->field_index) * 8);
5035   UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
5036 #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
5037   uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
5038   uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
5039   assert(a_packed != b_packed);
5040 #undef UPB_COMBINE
5041   return a_packed < b_packed ? -1 : 1;
5042 }
5043 
upb_MtDecoder_SortLayoutItems(upb_MtDecoder * d)5044 static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
5045   // Add items for all non-oneof fields (oneofs were already added).
5046   int n = d->table->field_count;
5047   for (int i = 0; i < n; i++) {
5048     upb_MiniTable_Field* f = &d->fields[i];
5049     if (f->offset >= kOneofBase) continue;
5050     upb_LayoutItem item = {.field_index = i,
5051                            .rep = f->mode >> kUpb_FieldRep_Shift,
5052                            .type = kUpb_LayoutItemType_Field};
5053     upb_MtDecoder_PushItem(d, item);
5054   }
5055 
5056   if (d->vec.size) {
5057     qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
5058           upb_MtDecoder_CompareFields);
5059   }
5060 
5061   return true;
5062 }
5063 
upb_MiniTable_DivideRoundUp(size_t n,size_t d)5064 static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
5065   return (n + d - 1) / d;
5066 }
5067 
upb_MtDecoder_AssignHasbits(upb_MiniTable * ret)5068 static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) {
5069   int n = ret->field_count;
5070   int last_hasbit = 0;  // 0 cannot be used.
5071 
5072   // First assign required fields, which must have the lowest hasbits.
5073   for (int i = 0; i < n; i++) {
5074     upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
5075     if (field->offset == kRequiredPresence) {
5076       field->presence = ++last_hasbit;
5077     } else if (field->offset == kNoPresence) {
5078       field->presence = 0;
5079     }
5080   }
5081   ret->required_count = last_hasbit;
5082 
5083   // Next assign non-required hasbit fields.
5084   for (int i = 0; i < n; i++) {
5085     upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
5086     if (field->offset == kHasbitPresence) {
5087       field->presence = ++last_hasbit;
5088     }
5089   }
5090 
5091   ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
5092 }
5093 
upb_MtDecoder_Place(upb_MtDecoder * d,upb_FieldRep rep)5094 size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
5095   size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
5096   size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
5097   size_t ret = UPB_ALIGN_UP(d->table->size, align);
5098   d->table->size = ret + size;
5099   return ret;
5100 }
5101 
upb_MtDecoder_AssignOffsets(upb_MtDecoder * d)5102 static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
5103   upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
5104 
5105   // Compute offsets.
5106   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5107     item->offset = upb_MtDecoder_Place(d, item->rep);
5108   }
5109 
5110   // Assign oneof case offsets.  We must do these first, since assigning
5111   // actual offsets will overwrite the links of the linked list.
5112   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5113     if (item->type != kUpb_LayoutItemType_OneofCase) continue;
5114     upb_MiniTable_Field* f = &d->fields[item->field_index];
5115     while (true) {
5116       f->presence = ~item->offset;
5117       if (f->offset == kUpb_LayoutItem_IndexSentinel) break;
5118       UPB_ASSERT(f->offset - kOneofBase < d->table->field_count);
5119       f = &d->fields[f->offset - kOneofBase];
5120     }
5121   }
5122 
5123   // Assign offsets.
5124   for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
5125     upb_MiniTable_Field* f = &d->fields[item->field_index];
5126     switch (item->type) {
5127       case kUpb_LayoutItemType_OneofField:
5128         while (true) {
5129           uint16_t next_offset = f->offset;
5130           f->offset = item->offset;
5131           if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
5132           f = &d->fields[next_offset - kOneofBase];
5133         }
5134         break;
5135       case kUpb_LayoutItemType_Field:
5136         f->offset = item->offset;
5137         break;
5138       default:
5139         break;
5140     }
5141   }
5142 
5143   // The fasttable parser (supported on 64-bit only) depends on this being a
5144   // multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
5145   //
5146   // On 32-bit we could potentially make this smaller, but there is no
5147   // compelling reason to optimize this right now.
5148   d->table->size = UPB_ALIGN_UP(d->table->size, 8);
5149 }
5150 
upb_MiniTable_BuildWithBuf(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,void ** buf,size_t * buf_size,upb_Status * status)5151 upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
5152                                           upb_MiniTablePlatform platform,
5153                                           upb_Arena* arena, void** buf,
5154                                           size_t* buf_size,
5155                                           upb_Status* status) {
5156   upb_MtDecoder decoder = {
5157       .platform = platform,
5158       .vec =
5159           {
5160               .data = *buf,
5161               .capacity = *buf_size / sizeof(*decoder.vec.data),
5162               .size = 0,
5163           },
5164       .arena = arena,
5165       .status = status,
5166       .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
5167   };
5168 
5169   if (UPB_SETJMP(decoder.err)) {
5170     decoder.table = NULL;
5171     goto done;
5172   }
5173 
5174   upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
5175 
5176   decoder.table->size = 0;
5177   decoder.table->field_count = 0;
5178   decoder.table->ext = kUpb_ExtMode_NonExtendable;
5179   decoder.table->dense_below = 0;
5180   decoder.table->table_mask = -1;
5181   decoder.table->required_count = 0;
5182 
5183   upb_MtDecoder_ParseMessage(&decoder, data, len);
5184   upb_MtDecoder_AssignHasbits(decoder.table);
5185   upb_MtDecoder_SortLayoutItems(&decoder);
5186   upb_MtDecoder_AssignOffsets(&decoder);
5187 
5188 done:
5189   *buf = decoder.vec.data;
5190   *buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data);
5191   return decoder.table;
5192 }
5193 
upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,upb_Arena * arena)5194 upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
5195                                              upb_Arena* arena) {
5196   upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
5197   if (!ret) return NULL;
5198 
5199   ret->size = 0;
5200   ret->field_count = 0;
5201   ret->ext = kUpb_ExtMode_IsMessageSet;
5202   ret->dense_below = 0;
5203   ret->table_mask = -1;
5204   ret->required_count = 0;
5205   return ret;
5206 }
5207 
upb_MiniTable_BuildMapEntry(upb_FieldType key_type,upb_FieldType value_type,bool value_is_proto3_enum,upb_MiniTablePlatform platform,upb_Arena * arena)5208 upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
5209                                            upb_FieldType value_type,
5210                                            bool value_is_proto3_enum,
5211                                            upb_MiniTablePlatform platform,
5212                                            upb_Arena* arena) {
5213   upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
5214   upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2);
5215   if (!ret || !fields) return NULL;
5216 
5217   upb_MiniTable_Sub* subs = NULL;
5218   if (value_is_proto3_enum) value_type = kUpb_FieldType_Int32;
5219   if (value_type == kUpb_FieldType_Message ||
5220       value_type == kUpb_FieldType_Group || value_type == kUpb_FieldType_Enum) {
5221     subs = upb_Arena_Malloc(arena, sizeof(*subs));
5222     if (!subs) return NULL;
5223   }
5224 
5225   size_t field_size =
5226       upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform);
5227 
5228   fields[0].number = 1;
5229   fields[1].number = 2;
5230   fields[0].mode = kUpb_FieldMode_Scalar;
5231   fields[1].mode = kUpb_FieldMode_Scalar;
5232   fields[0].presence = 0;
5233   fields[1].presence = 0;
5234   fields[0].offset = 0;
5235   fields[1].offset = field_size;
5236 
5237   upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0);
5238   upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0);
5239 
5240   ret->size = UPB_ALIGN_UP(2 * field_size, 8);
5241   ret->field_count = 2;
5242   ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
5243   ret->dense_below = 2;
5244   ret->table_mask = -1;
5245   ret->required_count = 0;
5246   ret->subs = subs;
5247   ret->fields = fields;
5248   return ret;
5249 }
5250 
upb_MiniTable_BuildEnumValue(upb_MtDecoder * d,upb_MiniTable_Enum * table,uint32_t val,upb_Arena * arena)5251 static bool upb_MiniTable_BuildEnumValue(upb_MtDecoder* d,
5252                                          upb_MiniTable_Enum* table,
5253                                          uint32_t val, upb_Arena* arena) {
5254   if (val < 64) {
5255     table->mask |= 1ULL << val;
5256     return true;
5257   }
5258 
5259   int32_t* values = (void*)table->values;
5260   values = upb_Arena_Realloc(arena, values, table->value_count * 4,
5261                              (table->value_count + 1) * 4);
5262   upb_MtDecoder_CheckOutOfMemory(d, values);
5263   values[table->value_count++] = (int32_t)val;
5264   table->values = values;
5265   return true;
5266 }
5267 
upb_MiniTable_BuildEnum(const char * data,size_t len,upb_Arena * arena,upb_Status * status)5268 upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
5269                                             upb_Arena* arena,
5270                                             upb_Status* status) {
5271   upb_MtDecoder d = {
5272       .status = status,
5273       .end = UPB_PTRADD(data, len),
5274   };
5275 
5276   if (UPB_SETJMP(d.err)) {
5277     return NULL;
5278   }
5279 
5280   upb_MiniTable_Enum* table = upb_Arena_Malloc(arena, sizeof(*table));
5281   upb_MtDecoder_CheckOutOfMemory(&d, table);
5282 
5283   table->mask = 0;
5284   table->value_count = 0;
5285   table->values = NULL;
5286 
5287   const char* ptr = data;
5288   uint32_t base = 0;
5289 
5290   while (ptr < d.end) {
5291     char ch = *ptr++;
5292     if (ch <= kUpb_EncodedValue_MaxEnumMask) {
5293       uint32_t mask = upb_FromBase92(ch);
5294       for (int i = 0; i < 5; i++, base++, mask >>= 1) {
5295         if (mask & 1) {
5296           if (!upb_MiniTable_BuildEnumValue(&d, table, base, arena)) {
5297             return NULL;
5298           }
5299         }
5300       }
5301     } else if (kUpb_EncodedValue_MinSkip <= ch &&
5302                ch <= kUpb_EncodedValue_MaxSkip) {
5303       uint32_t skip;
5304       ptr = upb_MiniTable_DecodeBase92Varint(&d, ptr, ch,
5305                                              kUpb_EncodedValue_MinSkip,
5306                                              kUpb_EncodedValue_MaxSkip, &skip);
5307       base += skip;
5308     } else {
5309       upb_Status_SetErrorFormat(status, "Unexpected character: %c", ch);
5310       return NULL;
5311     }
5312   }
5313 
5314   return table;
5315 }
5316 
upb_MiniTable_BuildExtension(const char * data,size_t len,upb_MiniTable_Extension * ext,upb_MiniTable_Sub sub,upb_Status * status)5317 bool upb_MiniTable_BuildExtension(const char* data, size_t len,
5318                                   upb_MiniTable_Extension* ext,
5319                                   upb_MiniTable_Sub sub, upb_Status* status) {
5320   upb_MtDecoder decoder = {
5321       .arena = NULL,
5322       .status = status,
5323       .table = NULL,
5324   };
5325 
5326   if (UPB_SETJMP(decoder.err)) {
5327     return false;
5328   }
5329 
5330   uint16_t count = 0;
5331   upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL);
5332   ext->field.mode |= kUpb_LabelFlags_IsExtension;
5333   ext->field.offset = 0;
5334   return true;
5335 }
5336 
upb_MiniTable_Build(const char * data,size_t len,upb_MiniTablePlatform platform,upb_Arena * arena,upb_Status * status)5337 upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
5338                                    upb_MiniTablePlatform platform,
5339                                    upb_Arena* arena, upb_Status* status) {
5340   void* buf = NULL;
5341   size_t size = 0;
5342   upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
5343                                                   &buf, &size, status);
5344   free(buf);
5345   return ret;
5346 }
5347 
upb_MiniTable_SetSubMessage(upb_MiniTable * table,upb_MiniTable_Field * field,const upb_MiniTable * sub)5348 void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
5349                                  upb_MiniTable_Field* field,
5350                                  const upb_MiniTable* sub) {
5351   UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
5352              (uintptr_t)field <
5353                  (uintptr_t)(table->fields + table->field_count));
5354   if (sub->ext & kUpb_ExtMode_IsMapEntry) {
5355     field->mode =
5356         (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift) | kUpb_FieldMode_Map;
5357   }
5358   upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
5359   table_sub->submsg = sub;
5360 }
5361 
upb_MiniTable_SetSubEnum(upb_MiniTable * table,upb_MiniTable_Field * field,const upb_MiniTable_Enum * sub)5362 void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
5363                               const upb_MiniTable_Enum* sub) {
5364   UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
5365              (uintptr_t)field <
5366                  (uintptr_t)(table->fields + table->field_count));
5367   upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
5368   table_sub->subenum = sub;
5369 }
5370 
5371 /** upb/def.c ************************************************************/
5372 
5373 #include <ctype.h>
5374 #include <errno.h>
5375 #include <setjmp.h>
5376 #include <stdlib.h>
5377 #include <string.h>
5378 
5379 
5380 /* Must be last. */
5381 
5382 typedef struct {
5383   size_t len;
5384   char str[1]; /* Null-terminated string data follows. */
5385 } str_t;
5386 
5387 /* The upb core does not generally have a concept of default instances. However
5388  * for descriptor options we make an exception since the max size is known and
5389  * modest (<200 bytes). All types can share a default instance since it is
5390  * initialized to zeroes.
5391  *
5392  * We have to allocate an extra pointer for upb's internal metadata. */
5393 static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
5394 static const char* opt_default = &opt_default_buf[sizeof(void*)];
5395 
5396 struct upb_FieldDef {
5397   const google_protobuf_FieldOptions* opts;
5398   const upb_FileDef* file;
5399   const upb_MessageDef* msgdef;
5400   const char* full_name;
5401   const char* json_name;
5402   union {
5403     int64_t sint;
5404     uint64_t uint;
5405     double dbl;
5406     float flt;
5407     bool boolean;
5408     str_t* str;
5409   } defaultval;
5410   union {
5411     const upb_OneofDef* oneof;
5412     const upb_MessageDef* extension_scope;
5413   } scope;
5414   union {
5415     const upb_MessageDef* msgdef;
5416     const upb_EnumDef* enumdef;
5417     const google_protobuf_FieldDescriptorProto* unresolved;
5418   } sub;
5419   uint32_t number_;
5420   uint16_t index_;
5421   uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */
5422   bool has_default;
5423   bool is_extension_;
5424   bool packed_;
5425   bool proto3_optional_;
5426   bool has_json_name_;
5427   upb_FieldType type_;
5428   upb_Label label_;
5429 #if UINTPTR_MAX == 0xffffffff
5430   uint32_t padding;  // Increase size to a multiple of 8.
5431 #endif
5432 };
5433 
5434 struct upb_ExtensionRange {
5435   const google_protobuf_ExtensionRangeOptions* opts;
5436   int32_t start;
5437   int32_t end;
5438 };
5439 
5440 struct upb_MessageDef {
5441   const google_protobuf_MessageOptions* opts;
5442   const upb_MiniTable* layout;
5443   const upb_FileDef* file;
5444   const upb_MessageDef* containing_type;
5445   const char* full_name;
5446 
5447   /* Tables for looking up fields by number and name. */
5448   upb_inttable itof;
5449   upb_strtable ntof;
5450 
5451   /* All nested defs.
5452    * MEM: We could save some space here by putting nested defs in a contiguous
5453    * region and calculating counts from offsets or vice-versa. */
5454   const upb_FieldDef* fields;
5455   const upb_OneofDef* oneofs;
5456   const upb_ExtensionRange* ext_ranges;
5457   const upb_StringView* res_names;
5458   const upb_MessageDef* nested_msgs;
5459   const upb_MessageReservedRange* res_ranges;
5460   const upb_EnumDef* nested_enums;
5461   const upb_FieldDef* nested_exts;
5462   int field_count;
5463   int real_oneof_count;
5464   int oneof_count;
5465   int ext_range_count;
5466   int res_range_count;
5467   int res_name_count;
5468   int nested_msg_count;
5469   int nested_enum_count;
5470   int nested_ext_count;
5471   bool in_message_set;
5472   upb_WellKnown well_known_type;
5473 #if UINTPTR_MAX == 0xffffffff
5474   uint32_t padding;  // Increase size to a multiple of 8.
5475 #endif
5476 };
5477 
5478 struct upb_EnumDef {
5479   const google_protobuf_EnumOptions* opts;
5480   const upb_MiniTable_Enum* layout;  // Only for proto2.
5481   const upb_FileDef* file;
5482   const upb_MessageDef* containing_type;  // Could be merged with "file".
5483   const char* full_name;
5484   upb_strtable ntoi;
5485   upb_inttable iton;
5486   const upb_EnumValueDef* values;
5487   const upb_EnumReservedRange* res_ranges;
5488   const upb_StringView* res_names;
5489   int value_count;
5490   int res_range_count;
5491   int res_name_count;
5492   int32_t defaultval;
5493 #if UINTPTR_MAX == 0xffffffff
5494   uint32_t padding;  // Increase size to a multiple of 8.
5495 #endif
5496 };
5497 
5498 struct upb_EnumValueDef {
5499   const google_protobuf_EnumValueOptions* opts;
5500   const upb_EnumDef* parent;
5501   const char* full_name;
5502   int32_t number;
5503 };
5504 
5505 struct upb_OneofDef {
5506   const google_protobuf_OneofOptions* opts;
5507   const upb_MessageDef* parent;
5508   const char* full_name;
5509   int field_count;
5510   bool synthetic;
5511   const upb_FieldDef** fields;
5512   upb_strtable ntof;
5513   upb_inttable itof;
5514 #if UINTPTR_MAX == 0xffffffff
5515   uint32_t padding;  // Increase size to a multiple of 8.
5516 #endif
5517 };
5518 
5519 struct upb_FileDef {
5520   const google_protobuf_FileOptions* opts;
5521   const char* name;
5522   const char* package;
5523 
5524   const upb_FileDef** deps;
5525   const int32_t* public_deps;
5526   const int32_t* weak_deps;
5527   const upb_MessageDef* top_lvl_msgs;
5528   const upb_EnumDef* top_lvl_enums;
5529   const upb_FieldDef* top_lvl_exts;
5530   const upb_ServiceDef* services;
5531   const upb_MiniTable_Extension** ext_layouts;
5532   const upb_DefPool* symtab;
5533 
5534   int dep_count;
5535   int public_dep_count;
5536   int weak_dep_count;
5537   int top_lvl_msg_count;
5538   int top_lvl_enum_count;
5539   int top_lvl_ext_count;
5540   int service_count;
5541   int ext_count; /* All exts in the file. */
5542   upb_Syntax syntax;
5543 };
5544 
5545 struct upb_MethodDef {
5546   const google_protobuf_MethodOptions* opts;
5547   upb_ServiceDef* service;
5548   const char* full_name;
5549   const upb_MessageDef* input_type;
5550   const upb_MessageDef* output_type;
5551   int index;
5552   bool client_streaming;
5553   bool server_streaming;
5554 };
5555 
5556 struct upb_ServiceDef {
5557   const google_protobuf_ServiceOptions* opts;
5558   const upb_FileDef* file;
5559   const char* full_name;
5560   upb_MethodDef* methods;
5561   int method_count;
5562   int index;
5563 };
5564 
5565 struct upb_DefPool {
5566   upb_Arena* arena;
5567   upb_strtable syms;  /* full_name -> packed def ptr */
5568   upb_strtable files; /* file_name -> upb_FileDef* */
5569   upb_inttable exts;  /* upb_MiniTable_Extension* -> upb_FieldDef* */
5570   upb_ExtensionRegistry* extreg;
5571   size_t bytes_loaded;
5572 };
5573 
5574 /* Inside a symtab we store tagged pointers to specific def types. */
5575 typedef enum {
5576   UPB_DEFTYPE_MASK = 7,
5577 
5578   /* Only inside symtab table. */
5579   UPB_DEFTYPE_EXT = 0,
5580   UPB_DEFTYPE_MSG = 1,
5581   UPB_DEFTYPE_ENUM = 2,
5582   UPB_DEFTYPE_ENUMVAL = 3,
5583   UPB_DEFTYPE_SERVICE = 4,
5584 
5585   /* Only inside message table. */
5586   UPB_DEFTYPE_FIELD = 0,
5587   UPB_DEFTYPE_ONEOF = 1,
5588   UPB_DEFTYPE_FIELD_JSONNAME = 2,
5589 
5590   /* Only inside file table. */
5591   UPB_DEFTYPE_FILE = 0,
5592   UPB_DEFTYPE_LAYOUT = 1
5593 } upb_deftype_t;
5594 
5595 #define FIELD_TYPE_UNSPECIFIED 0
5596 
5597 struct upb_MessageReservedRange {
5598   int32_t start;
5599   int32_t end;
5600 };
5601 
5602 struct symtab_addctx {
5603   upb_DefPool* symtab;
5604   upb_FileDef* file;                /* File we are building. */
5605   upb_Arena* arena;                 /* Allocate defs here. */
5606   upb_Arena* tmp_arena;             /* For temporary allocations. */
5607   const upb_MiniTable_File* layout; /* NULL if we should build layouts. */
5608   int enum_count;                   /* Count of enums built so far. */
5609   int msg_count;                    /* Count of messages built so far. */
5610   int ext_count;                    /* Count of extensions built so far. */
5611   upb_Status* status;               /* Record errors here. */
5612   jmp_buf err;                      /* longjmp() on error. */
5613 };
5614 
deftype(upb_value v)5615 static upb_deftype_t deftype(upb_value v) {
5616   uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
5617   return num & UPB_DEFTYPE_MASK;
5618 }
5619 
unpack_def(upb_value v,upb_deftype_t type)5620 static const void* unpack_def(upb_value v, upb_deftype_t type) {
5621   uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
5622   return (num & UPB_DEFTYPE_MASK) == type
5623              ? (const void*)(num & ~UPB_DEFTYPE_MASK)
5624              : NULL;
5625 }
5626 
pack_def(const void * ptr,upb_deftype_t type)5627 static upb_value pack_def(const void* ptr, upb_deftype_t type) {
5628   // Our 3-bit pointer tagging requires all pointers to be multiples of 8.
5629   // The arena will always yield 8-byte-aligned addresses, however we put
5630   // the defs into arrays.  For each element in the array to be 8-byte-aligned,
5631   // the sizes of each def type must also be a multiple of 8.
5632   //
5633   // If any of these asserts fail, we need to add or remove padding on 32-bit
5634   // machines (64-bit machines will have 8-byte alignment already due to
5635   // pointers, which all of these structs have).
5636   UPB_ASSERT((sizeof(upb_FieldDef) & UPB_DEFTYPE_MASK) == 0);
5637   UPB_ASSERT((sizeof(upb_MessageDef) & UPB_DEFTYPE_MASK) == 0);
5638   UPB_ASSERT((sizeof(upb_EnumDef) & UPB_DEFTYPE_MASK) == 0);
5639   UPB_ASSERT((sizeof(upb_EnumValueDef) & UPB_DEFTYPE_MASK) == 0);
5640   UPB_ASSERT((sizeof(upb_ServiceDef) & UPB_DEFTYPE_MASK) == 0);
5641   UPB_ASSERT((sizeof(upb_OneofDef) & UPB_DEFTYPE_MASK) == 0);
5642   uintptr_t num = (uintptr_t)ptr;
5643   UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
5644   num |= type;
5645   return upb_value_constptr((const void*)num);
5646 }
5647 
5648 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)5649 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
5650   return c >= low && c <= high;
5651 }
5652 
upb_ascii_lower(char ch)5653 static char upb_ascii_lower(char ch) {
5654   // Per ASCII this will lower-case a letter.  If the result is a letter, the
5655   // input was definitely a letter.  If the output is not a letter, this may
5656   // have transformed the character unpredictably.
5657   return ch | 0x20;
5658 }
5659 
upb_isletter(char c)5660 static bool upb_isletter(char c) {
5661   char lower = upb_ascii_lower(c);
5662   return upb_isbetween(lower, 'a', 'z') || c == '_';
5663 }
5664 
upb_isalphanum(char c)5665 static bool upb_isalphanum(char c) {
5666   return upb_isletter(c) || upb_isbetween(c, '0', '9');
5667 }
5668 
shortdefname(const char * fullname)5669 static const char* shortdefname(const char* fullname) {
5670   const char* p;
5671 
5672   if (fullname == NULL) {
5673     return NULL;
5674   } else if ((p = strrchr(fullname, '.')) == NULL) {
5675     /* No '.' in the name, return the full string. */
5676     return fullname;
5677   } else {
5678     /* Return one past the last '.'. */
5679     return p + 1;
5680   }
5681 }
5682 
5683 /* All submessage fields are lower than all other fields.
5684  * Secondly, fields are increasing in order. */
field_rank(const upb_FieldDef * f)5685 uint32_t field_rank(const upb_FieldDef* f) {
5686   uint32_t ret = upb_FieldDef_Number(f);
5687   const uint32_t high_bit = 1 << 30;
5688   UPB_ASSERT(ret < high_bit);
5689   if (!upb_FieldDef_IsSubMessage(f)) ret |= high_bit;
5690   return ret;
5691 }
5692 
cmp_fields(const void * p1,const void * p2)5693 int cmp_fields(const void* p1, const void* p2) {
5694   const upb_FieldDef* f1 = *(upb_FieldDef* const*)p1;
5695   const upb_FieldDef* f2 = *(upb_FieldDef* const*)p2;
5696   return field_rank(f1) - field_rank(f2);
5697 }
5698 
upb_Status_setoom(upb_Status * status)5699 static void upb_Status_setoom(upb_Status* status) {
5700   upb_Status_SetErrorMessage(status, "out of memory");
5701 }
5702 
assign_msg_wellknowntype(upb_MessageDef * m)5703 static void assign_msg_wellknowntype(upb_MessageDef* m) {
5704   const char* name = upb_MessageDef_FullName(m);
5705   if (name == NULL) {
5706     m->well_known_type = kUpb_WellKnown_Unspecified;
5707     return;
5708   }
5709   if (!strcmp(name, "google.protobuf.Any")) {
5710     m->well_known_type = kUpb_WellKnown_Any;
5711   } else if (!strcmp(name, "google.protobuf.FieldMask")) {
5712     m->well_known_type = kUpb_WellKnown_FieldMask;
5713   } else if (!strcmp(name, "google.protobuf.Duration")) {
5714     m->well_known_type = kUpb_WellKnown_Duration;
5715   } else if (!strcmp(name, "google.protobuf.Timestamp")) {
5716     m->well_known_type = kUpb_WellKnown_Timestamp;
5717   } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
5718     m->well_known_type = kUpb_WellKnown_DoubleValue;
5719   } else if (!strcmp(name, "google.protobuf.FloatValue")) {
5720     m->well_known_type = kUpb_WellKnown_FloatValue;
5721   } else if (!strcmp(name, "google.protobuf.Int64Value")) {
5722     m->well_known_type = kUpb_WellKnown_Int64Value;
5723   } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
5724     m->well_known_type = kUpb_WellKnown_UInt64Value;
5725   } else if (!strcmp(name, "google.protobuf.Int32Value")) {
5726     m->well_known_type = kUpb_WellKnown_Int32Value;
5727   } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
5728     m->well_known_type = kUpb_WellKnown_UInt32Value;
5729   } else if (!strcmp(name, "google.protobuf.BoolValue")) {
5730     m->well_known_type = kUpb_WellKnown_BoolValue;
5731   } else if (!strcmp(name, "google.protobuf.StringValue")) {
5732     m->well_known_type = kUpb_WellKnown_StringValue;
5733   } else if (!strcmp(name, "google.protobuf.BytesValue")) {
5734     m->well_known_type = kUpb_WellKnown_BytesValue;
5735   } else if (!strcmp(name, "google.protobuf.Value")) {
5736     m->well_known_type = kUpb_WellKnown_Value;
5737   } else if (!strcmp(name, "google.protobuf.ListValue")) {
5738     m->well_known_type = kUpb_WellKnown_ListValue;
5739   } else if (!strcmp(name, "google.protobuf.Struct")) {
5740     m->well_known_type = kUpb_WellKnown_Struct;
5741   } else {
5742     m->well_known_type = kUpb_WellKnown_Unspecified;
5743   }
5744 }
5745 
5746 /* upb_EnumDef ****************************************************************/
5747 
upb_EnumDef_Options(const upb_EnumDef * e)5748 const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) {
5749   return e->opts;
5750 }
5751 
upb_EnumDef_HasOptions(const upb_EnumDef * e)5752 bool upb_EnumDef_HasOptions(const upb_EnumDef* e) {
5753   return e->opts != (void*)opt_default;
5754 }
5755 
upb_EnumDef_FullName(const upb_EnumDef * e)5756 const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; }
5757 
upb_EnumDef_Name(const upb_EnumDef * e)5758 const char* upb_EnumDef_Name(const upb_EnumDef* e) {
5759   return shortdefname(e->full_name);
5760 }
5761 
upb_EnumDef_File(const upb_EnumDef * e)5762 const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; }
5763 
upb_EnumDef_ContainingType(const upb_EnumDef * e)5764 const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) {
5765   return e->containing_type;
5766 }
5767 
upb_EnumDef_Default(const upb_EnumDef * e)5768 int32_t upb_EnumDef_Default(const upb_EnumDef* e) {
5769   UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval));
5770   return e->defaultval;
5771 }
5772 
upb_EnumDef_ReservedRangeCount(const upb_EnumDef * e)5773 int upb_EnumDef_ReservedRangeCount(const upb_EnumDef* e) {
5774   return e->res_range_count;
5775 }
5776 
5777 /* upb_EnumReservedRange ******************************************************/
5778 
5779 struct upb_EnumReservedRange {
5780   int32_t start;
5781   int32_t end;
5782 };
5783 
_upb_EnumReservedRange_At(const upb_EnumReservedRange * r,int i)5784 upb_EnumReservedRange* _upb_EnumReservedRange_At(const upb_EnumReservedRange* r,
5785                                                  int i) {
5786   return (upb_EnumReservedRange*)&r[i];
5787 }
5788 
upb_EnumReservedRange_Start(const upb_EnumReservedRange * r)5789 int32_t upb_EnumReservedRange_Start(const upb_EnumReservedRange* r) {
5790   return r->start;
5791 }
upb_EnumReservedRange_End(const upb_EnumReservedRange * r)5792 int32_t upb_EnumReservedRange_End(const upb_EnumReservedRange* r) {
5793   return r->end;
5794 }
5795 
symtab_errf(symtab_addctx * ctx,const char * fmt,...)5796 UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) static void symtab_errf(
5797     symtab_addctx* ctx, const char* fmt, ...) {
5798   va_list argp;
5799   va_start(argp, fmt);
5800   upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
5801   va_end(argp);
5802   UPB_LONGJMP(ctx->err, 1);
5803 }
5804 
_upb_EnumReservedRanges_New(symtab_addctx * ctx,int n,const google_protobuf_EnumDescriptorProto_EnumReservedRange * const * protos,const upb_EnumDef * e)5805 upb_EnumReservedRange* _upb_EnumReservedRanges_New(
5806     symtab_addctx* ctx, int n,
5807     const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* protos,
5808     const upb_EnumDef* e) {
5809   upb_EnumReservedRange* r =
5810       upb_Arena_Malloc(ctx->arena, sizeof(upb_EnumReservedRange) * n);
5811 
5812   for (int i = 0; i < n; i++) {
5813     const int32_t start =
5814         google_protobuf_EnumDescriptorProto_EnumReservedRange_start(protos[i]);
5815     const int32_t end =
5816         google_protobuf_EnumDescriptorProto_EnumReservedRange_end(protos[i]);
5817 
5818     // A full validation would also check that each range is disjoint, and that
5819     // none of the fields overlap with the extension ranges, but we are just
5820     // sanity checking here.
5821 
5822     // Note: Not a typo! Unlike extension ranges and message reserved ranges,
5823     // the end value of an enum reserved range is *inclusive*!
5824     if (end < start) {
5825       symtab_errf(ctx, "Reserved range (%d, %d) is invalid, enum=%s\n",
5826                            (int)start, (int)end, upb_EnumDef_FullName(e));
5827     }
5828 
5829     r[i].start = start;
5830     r[i].end = end;
5831   }
5832 
5833   return r;
5834 }
5835 
upb_EnumDef_ReservedRange(const upb_EnumDef * e,int i)5836 const upb_EnumReservedRange* upb_EnumDef_ReservedRange(const upb_EnumDef* e,
5837                                                        int i) {
5838   UPB_ASSERT(0 <= i && i < e->res_range_count);
5839   return _upb_EnumReservedRange_At(e->res_ranges, i);
5840 }
5841 
upb_EnumDef_ReservedNameCount(const upb_EnumDef * e)5842 int upb_EnumDef_ReservedNameCount(const upb_EnumDef* e) {
5843   return e->res_name_count;
5844 }
5845 
upb_EnumDef_ReservedName(const upb_EnumDef * e,int i)5846 upb_StringView upb_EnumDef_ReservedName(const upb_EnumDef* e, int i) {
5847   UPB_ASSERT(0 <= i && i < e->res_name_count);
5848   return e->res_names[i];
5849 }
5850 
upb_EnumDef_ValueCount(const upb_EnumDef * e)5851 int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }
5852 
upb_EnumDef_FindValueByNameWithSize(const upb_EnumDef * def,const char * name,size_t len)5853 const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
5854     const upb_EnumDef* def, const char* name, size_t len) {
5855   upb_value v;
5856   return upb_strtable_lookup2(&def->ntoi, name, len, &v)
5857              ? upb_value_getconstptr(v)
5858              : NULL;
5859 }
5860 
upb_EnumDef_FindValueByNumber(const upb_EnumDef * def,int32_t num)5861 const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* def,
5862                                                       int32_t num) {
5863   upb_value v;
5864   return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getconstptr(v)
5865                                                   : NULL;
5866 }
5867 
upb_EnumDef_CheckNumber(const upb_EnumDef * e,int32_t num)5868 bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) {
5869   // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect
5870   // this to be faster (especially for small numbers).
5871   return upb_MiniTable_Enum_CheckValue(e->layout, num);
5872 }
5873 
upb_EnumDef_Value(const upb_EnumDef * e,int i)5874 const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) {
5875   UPB_ASSERT(0 <= i && i < e->value_count);
5876   return &e->values[i];
5877 }
5878 
5879 /* upb_EnumValueDef ***********************************************************/
5880 
upb_EnumValueDef_Options(const upb_EnumValueDef * e)5881 const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options(
5882     const upb_EnumValueDef* e) {
5883   return e->opts;
5884 }
5885 
upb_EnumValueDef_HasOptions(const upb_EnumValueDef * e)5886 bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e) {
5887   return e->opts != (void*)opt_default;
5888 }
5889 
upb_EnumValueDef_Enum(const upb_EnumValueDef * ev)5890 const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* ev) {
5891   return ev->parent;
5892 }
5893 
upb_EnumValueDef_FullName(const upb_EnumValueDef * ev)5894 const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* ev) {
5895   return ev->full_name;
5896 }
5897 
upb_EnumValueDef_Name(const upb_EnumValueDef * ev)5898 const char* upb_EnumValueDef_Name(const upb_EnumValueDef* ev) {
5899   return shortdefname(ev->full_name);
5900 }
5901 
upb_EnumValueDef_Number(const upb_EnumValueDef * ev)5902 int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* ev) {
5903   return ev->number;
5904 }
5905 
upb_EnumValueDef_Index(const upb_EnumValueDef * ev)5906 uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* ev) {
5907   // Compute index in our parent's array.
5908   return ev - ev->parent->values;
5909 }
5910 
5911 /* upb_ExtensionRange
5912  * ***************************************************************/
5913 
upb_ExtensionRange_Options(const upb_ExtensionRange * r)5914 const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options(
5915     const upb_ExtensionRange* r) {
5916   return r->opts;
5917 }
5918 
upb_ExtensionRange_HasOptions(const upb_ExtensionRange * r)5919 bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) {
5920   return r->opts != (void*)opt_default;
5921 }
5922 
upb_ExtensionRange_Start(const upb_ExtensionRange * e)5923 int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* e) {
5924   return e->start;
5925 }
5926 
upb_ExtensionRange_End(const upb_ExtensionRange * e)5927 int32_t upb_ExtensionRange_End(const upb_ExtensionRange* e) { return e->end; }
5928 
5929 /* upb_FieldDef ***************************************************************/
5930 
upb_FieldDef_Options(const upb_FieldDef * f)5931 const google_protobuf_FieldOptions* upb_FieldDef_Options(
5932     const upb_FieldDef* f) {
5933   return f->opts;
5934 }
5935 
upb_FieldDef_HasOptions(const upb_FieldDef * f)5936 bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
5937   return f->opts != (void*)opt_default;
5938 }
5939 
upb_FieldDef_FullName(const upb_FieldDef * f)5940 const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
5941   return f->full_name;
5942 }
5943 
upb_FieldDef_CType(const upb_FieldDef * f)5944 upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
5945   switch (f->type_) {
5946     case kUpb_FieldType_Double:
5947       return kUpb_CType_Double;
5948     case kUpb_FieldType_Float:
5949       return kUpb_CType_Float;
5950     case kUpb_FieldType_Int64:
5951     case kUpb_FieldType_SInt64:
5952     case kUpb_FieldType_SFixed64:
5953       return kUpb_CType_Int64;
5954     case kUpb_FieldType_Int32:
5955     case kUpb_FieldType_SFixed32:
5956     case kUpb_FieldType_SInt32:
5957       return kUpb_CType_Int32;
5958     case kUpb_FieldType_UInt64:
5959     case kUpb_FieldType_Fixed64:
5960       return kUpb_CType_UInt64;
5961     case kUpb_FieldType_UInt32:
5962     case kUpb_FieldType_Fixed32:
5963       return kUpb_CType_UInt32;
5964     case kUpb_FieldType_Enum:
5965       return kUpb_CType_Enum;
5966     case kUpb_FieldType_Bool:
5967       return kUpb_CType_Bool;
5968     case kUpb_FieldType_String:
5969       return kUpb_CType_String;
5970     case kUpb_FieldType_Bytes:
5971       return kUpb_CType_Bytes;
5972     case kUpb_FieldType_Group:
5973     case kUpb_FieldType_Message:
5974       return kUpb_CType_Message;
5975   }
5976   UPB_UNREACHABLE();
5977 }
5978 
upb_FieldDef_Type(const upb_FieldDef * f)5979 upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
5980 
upb_FieldDef_Index(const upb_FieldDef * f)5981 uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
5982 
upb_FieldDef_Label(const upb_FieldDef * f)5983 upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
5984 
upb_FieldDef_Number(const upb_FieldDef * f)5985 uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
5986 
upb_FieldDef_IsExtension(const upb_FieldDef * f)5987 bool upb_FieldDef_IsExtension(const upb_FieldDef* f) {
5988   return f->is_extension_;
5989 }
5990 
upb_FieldDef_IsPacked(const upb_FieldDef * f)5991 bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; }
5992 
upb_FieldDef_Name(const upb_FieldDef * f)5993 const char* upb_FieldDef_Name(const upb_FieldDef* f) {
5994   return shortdefname(f->full_name);
5995 }
5996 
upb_FieldDef_JsonName(const upb_FieldDef * f)5997 const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
5998   return f->json_name;
5999 }
6000 
upb_FieldDef_HasJsonName(const upb_FieldDef * f)6001 bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
6002   return f->has_json_name_;
6003 }
6004 
upb_FieldDef_File(const upb_FieldDef * f)6005 const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
6006 
upb_FieldDef_ContainingType(const upb_FieldDef * f)6007 const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
6008   return f->msgdef;
6009 }
6010 
upb_FieldDef_ExtensionScope(const upb_FieldDef * f)6011 const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
6012   return f->is_extension_ ? f->scope.extension_scope : NULL;
6013 }
6014 
upb_FieldDef_ContainingOneof(const upb_FieldDef * f)6015 const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
6016   return f->is_extension_ ? NULL : f->scope.oneof;
6017 }
6018 
upb_FieldDef_RealContainingOneof(const upb_FieldDef * f)6019 const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
6020   const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
6021   if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
6022   return oneof;
6023 }
6024 
upb_FieldDef_Default(const upb_FieldDef * f)6025 upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
6026   UPB_ASSERT(!upb_FieldDef_IsSubMessage(f));
6027   upb_MessageValue ret;
6028 
6029   switch (upb_FieldDef_CType(f)) {
6030     case kUpb_CType_Bool:
6031       return (upb_MessageValue){.bool_val = f->defaultval.boolean};
6032     case kUpb_CType_Int64:
6033       return (upb_MessageValue){.int64_val = f->defaultval.sint};
6034     case kUpb_CType_UInt64:
6035       return (upb_MessageValue){.uint64_val = f->defaultval.uint};
6036     case kUpb_CType_Enum:
6037     case kUpb_CType_Int32:
6038       return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
6039     case kUpb_CType_UInt32:
6040       return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
6041     case kUpb_CType_Float:
6042       return (upb_MessageValue){.float_val = f->defaultval.flt};
6043     case kUpb_CType_Double:
6044       return (upb_MessageValue){.double_val = f->defaultval.dbl};
6045     case kUpb_CType_String:
6046     case kUpb_CType_Bytes: {
6047       str_t* str = f->defaultval.str;
6048       if (str) {
6049         return (upb_MessageValue){
6050             .str_val = (upb_StringView){.data = str->str, .size = str->len}};
6051       } else {
6052         return (upb_MessageValue){
6053             .str_val = (upb_StringView){.data = NULL, .size = 0}};
6054       }
6055     }
6056     default:
6057       UPB_UNREACHABLE();
6058   }
6059 
6060   return ret;
6061 }
6062 
upb_FieldDef_MessageSubDef(const upb_FieldDef * f)6063 const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
6064   return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL;
6065 }
6066 
upb_FieldDef_EnumSubDef(const upb_FieldDef * f)6067 const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
6068   return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL;
6069 }
6070 
upb_FieldDef_MiniTable(const upb_FieldDef * f)6071 const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
6072   UPB_ASSERT(!upb_FieldDef_IsExtension(f));
6073   return &f->msgdef->layout->fields[f->layout_index];
6074 }
6075 
_upb_FieldDef_ExtensionMiniTable(const upb_FieldDef * f)6076 const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable(
6077     const upb_FieldDef* f) {
6078   UPB_ASSERT(upb_FieldDef_IsExtension(f));
6079   return f->file->ext_layouts[f->layout_index];
6080 }
6081 
_upb_FieldDef_IsProto3Optional(const upb_FieldDef * f)6082 bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
6083   return f->proto3_optional_;
6084 }
6085 
upb_FieldDef_IsSubMessage(const upb_FieldDef * f)6086 bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
6087   return upb_FieldDef_CType(f) == kUpb_CType_Message;
6088 }
6089 
upb_FieldDef_IsString(const upb_FieldDef * f)6090 bool upb_FieldDef_IsString(const upb_FieldDef* f) {
6091   return upb_FieldDef_CType(f) == kUpb_CType_String ||
6092          upb_FieldDef_CType(f) == kUpb_CType_Bytes;
6093 }
6094 
upb_FieldDef_IsRepeated(const upb_FieldDef * f)6095 bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
6096   return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
6097 }
6098 
upb_FieldDef_IsPrimitive(const upb_FieldDef * f)6099 bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
6100   return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
6101 }
6102 
upb_FieldDef_IsMap(const upb_FieldDef * f)6103 bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
6104   return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
6105          upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
6106 }
6107 
upb_FieldDef_HasDefault(const upb_FieldDef * f)6108 bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
6109 
upb_FieldDef_HasSubDef(const upb_FieldDef * f)6110 bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
6111   return upb_FieldDef_IsSubMessage(f) ||
6112          upb_FieldDef_CType(f) == kUpb_CType_Enum;
6113 }
6114 
upb_FieldDef_HasPresence(const upb_FieldDef * f)6115 bool upb_FieldDef_HasPresence(const upb_FieldDef* f) {
6116   if (upb_FieldDef_IsRepeated(f)) return false;
6117   return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) ||
6118          f->file->syntax == kUpb_Syntax_Proto2;
6119 }
6120 
between(int32_t x,int32_t low,int32_t high)6121 static bool between(int32_t x, int32_t low, int32_t high) {
6122   return x >= low && x <= high;
6123 }
6124 
upb_FieldDef_checklabel(int32_t label)6125 bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_FieldDef_checktype(int32_t type)6126 bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
upb_FieldDef_checkintfmt(int32_t fmt)6127 bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
6128 
upb_FieldDef_checkdescriptortype(int32_t type)6129 bool upb_FieldDef_checkdescriptortype(int32_t type) {
6130   return between(type, 1, 18);
6131 }
6132 
6133 /* upb_MessageDef
6134  * *****************************************************************/
6135 
upb_MessageDef_Options(const upb_MessageDef * m)6136 const google_protobuf_MessageOptions* upb_MessageDef_Options(
6137     const upb_MessageDef* m) {
6138   return m->opts;
6139 }
6140 
upb_MessageDef_HasOptions(const upb_MessageDef * m)6141 bool upb_MessageDef_HasOptions(const upb_MessageDef* m) {
6142   return m->opts != (void*)opt_default;
6143 }
6144 
upb_MessageDef_FullName(const upb_MessageDef * m)6145 const char* upb_MessageDef_FullName(const upb_MessageDef* m) {
6146   return m->full_name;
6147 }
6148 
upb_MessageDef_File(const upb_MessageDef * m)6149 const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) {
6150   return m->file;
6151 }
6152 
upb_MessageDef_ContainingType(const upb_MessageDef * m)6153 const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) {
6154   return m->containing_type;
6155 }
6156 
upb_MessageDef_Name(const upb_MessageDef * m)6157 const char* upb_MessageDef_Name(const upb_MessageDef* m) {
6158   return shortdefname(m->full_name);
6159 }
6160 
upb_MessageDef_Syntax(const upb_MessageDef * m)6161 upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
6162   return m->file->syntax;
6163 }
6164 
upb_MessageDef_FindFieldByNumber(const upb_MessageDef * m,uint32_t i)6165 const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
6166                                                      uint32_t i) {
6167   upb_value val;
6168   return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
6169                                                 : NULL;
6170 }
6171 
upb_MessageDef_FindFieldByNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6172 const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
6173     const upb_MessageDef* m, const char* name, size_t len) {
6174   upb_value val;
6175 
6176   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6177     return NULL;
6178   }
6179 
6180   return unpack_def(val, UPB_DEFTYPE_FIELD);
6181 }
6182 
upb_MessageDef_FindOneofByNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6183 const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
6184     const upb_MessageDef* m, const char* name, size_t len) {
6185   upb_value val;
6186 
6187   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6188     return NULL;
6189   }
6190 
6191   return unpack_def(val, UPB_DEFTYPE_ONEOF);
6192 }
6193 
upb_MessageDef_FindByNameWithSize(const upb_MessageDef * m,const char * name,size_t len,const upb_FieldDef ** out_f,const upb_OneofDef ** out_o)6194 bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
6195                                        const char* name, size_t len,
6196                                        const upb_FieldDef** out_f,
6197                                        const upb_OneofDef** out_o) {
6198   upb_value val;
6199 
6200   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6201     return false;
6202   }
6203 
6204   const upb_FieldDef* f = unpack_def(val, UPB_DEFTYPE_FIELD);
6205   const upb_OneofDef* o = unpack_def(val, UPB_DEFTYPE_ONEOF);
6206   if (out_f) *out_f = f;
6207   if (out_o) *out_o = o;
6208   return f || o; /* False if this was a JSON name. */
6209 }
6210 
upb_MessageDef_FindByJsonNameWithSize(const upb_MessageDef * m,const char * name,size_t len)6211 const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
6212     const upb_MessageDef* m, const char* name, size_t len) {
6213   upb_value val;
6214   const upb_FieldDef* f;
6215 
6216   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
6217     return NULL;
6218   }
6219 
6220   f = unpack_def(val, UPB_DEFTYPE_FIELD);
6221   if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
6222 
6223   return f;
6224 }
6225 
upb_MessageDef_numfields(const upb_MessageDef * m)6226 int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; }
6227 
upb_MessageDef_numoneofs(const upb_MessageDef * m)6228 int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; }
6229 
upb_MessageDef_numrealoneofs(const upb_MessageDef * m)6230 int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) {
6231   return m->real_oneof_count;
6232 }
6233 
upb_MessageDef_ExtensionRangeCount(const upb_MessageDef * m)6234 int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) {
6235   return m->ext_range_count;
6236 }
6237 
upb_MessageDef_ReservedRangeCount(const upb_MessageDef * m)6238 int upb_MessageDef_ReservedRangeCount(const upb_MessageDef* m) {
6239   return m->res_range_count;
6240 }
6241 
upb_MessageDef_ReservedNameCount(const upb_MessageDef * m)6242 int upb_MessageDef_ReservedNameCount(const upb_MessageDef* m) {
6243   return m->res_name_count;
6244 }
6245 
upb_MessageDef_FieldCount(const upb_MessageDef * m)6246 int upb_MessageDef_FieldCount(const upb_MessageDef* m) {
6247   return m->field_count;
6248 }
6249 
upb_MessageDef_OneofCount(const upb_MessageDef * m)6250 int upb_MessageDef_OneofCount(const upb_MessageDef* m) {
6251   return m->oneof_count;
6252 }
6253 
upb_MessageDef_NestedMessageCount(const upb_MessageDef * m)6254 int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) {
6255   return m->nested_msg_count;
6256 }
6257 
upb_MessageDef_NestedEnumCount(const upb_MessageDef * m)6258 int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) {
6259   return m->nested_enum_count;
6260 }
6261 
upb_MessageDef_NestedExtensionCount(const upb_MessageDef * m)6262 int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) {
6263   return m->nested_ext_count;
6264 }
6265 
upb_MessageDef_realoneofcount(const upb_MessageDef * m)6266 int upb_MessageDef_realoneofcount(const upb_MessageDef* m) {
6267   return m->real_oneof_count;
6268 }
6269 
upb_MessageDef_MiniTable(const upb_MessageDef * m)6270 const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) {
6271   return m->layout;
6272 }
6273 
upb_MessageDef_ExtensionRange(const upb_MessageDef * m,int i)6274 const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
6275                                                         int i) {
6276   UPB_ASSERT(0 <= i && i < m->ext_range_count);
6277   return &m->ext_ranges[i];
6278 }
6279 
_upb_MessageReservedRange_At(const upb_MessageReservedRange * r,int i)6280 upb_MessageReservedRange* _upb_MessageReservedRange_At(
6281     const upb_MessageReservedRange* r, int i) {
6282   return (upb_MessageReservedRange*)&r[i];
6283 }
6284 
upb_MessageDef_ReservedRange(const upb_MessageDef * m,int i)6285 const upb_MessageReservedRange* upb_MessageDef_ReservedRange(
6286     const upb_MessageDef* m, int i) {
6287   UPB_ASSERT(0 <= i && i < m->res_range_count);
6288   return _upb_MessageReservedRange_At(m->res_ranges, i);
6289 }
6290 
upb_MessageDef_ReservedName(const upb_MessageDef * m,int i)6291 upb_StringView upb_MessageDef_ReservedName(const upb_MessageDef* m, int i) {
6292   UPB_ASSERT(0 <= i && i < m->res_name_count);
6293   return m->res_names[i];
6294 }
6295 
upb_MessageReservedRange_Start(const upb_MessageReservedRange * r)6296 int32_t upb_MessageReservedRange_Start(const upb_MessageReservedRange* r) {
6297   return r->start;
6298 }
upb_MessageReservedRange_End(const upb_MessageReservedRange * r)6299 int32_t upb_MessageReservedRange_End(const upb_MessageReservedRange* r) {
6300   return r->end;
6301 }
6302 
_upb_MessageReservedRanges_New(symtab_addctx * ctx,int n,const google_protobuf_DescriptorProto_ReservedRange * const * protos,const upb_MessageDef * m)6303 upb_MessageReservedRange* _upb_MessageReservedRanges_New(
6304     symtab_addctx* ctx, int n,
6305     const google_protobuf_DescriptorProto_ReservedRange* const* protos,
6306     const upb_MessageDef* m) {
6307   upb_MessageReservedRange* r =
6308       upb_Arena_Malloc(ctx->arena, sizeof(upb_MessageReservedRange) * n);
6309 
6310   for (int i = 0; i < n; i++) {
6311     const int32_t start = google_protobuf_DescriptorProto_ReservedRange_start(protos[i]);
6312     const int32_t end = google_protobuf_DescriptorProto_ReservedRange_end(protos[i]);
6313     const int32_t max = kUpb_MaxFieldNumber + 1;
6314 
6315     // A full validation would also check that each range is disjoint, and that
6316     // none of the fields overlap with the extension ranges, but we are just
6317     // sanity checking here.
6318     if (start < 1 || end <= start || end > max) {
6319       symtab_errf(ctx,
6320                            "Reserved range (%d, %d) is invalid, message=%s\n",
6321                            (int)start, (int)end, upb_MessageDef_FullName(m));
6322     }
6323 
6324     r[i].start = start;
6325     r[i].end = end;
6326   }
6327 
6328   return r;
6329 }
6330 
upb_MessageDef_Field(const upb_MessageDef * m,int i)6331 const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) {
6332   UPB_ASSERT(0 <= i && i < m->field_count);
6333   return &m->fields[i];
6334 }
6335 
upb_MessageDef_Oneof(const upb_MessageDef * m,int i)6336 const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) {
6337   UPB_ASSERT(0 <= i && i < m->oneof_count);
6338   return &m->oneofs[i];
6339 }
6340 
upb_MessageDef_NestedMessage(const upb_MessageDef * m,int i)6341 const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
6342                                                    int i) {
6343   UPB_ASSERT(0 <= i && i < m->nested_msg_count);
6344   return &m->nested_msgs[i];
6345 }
6346 
upb_MessageDef_NestedEnum(const upb_MessageDef * m,int i)6347 const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) {
6348   UPB_ASSERT(0 <= i && i < m->nested_enum_count);
6349   return &m->nested_enums[i];
6350 }
6351 
upb_MessageDef_NestedExtension(const upb_MessageDef * m,int i)6352 const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
6353                                                    int i) {
6354   UPB_ASSERT(0 <= i && i < m->nested_ext_count);
6355   return &m->nested_exts[i];
6356 }
6357 
upb_MessageDef_WellKnownType(const upb_MessageDef * m)6358 upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) {
6359   return m->well_known_type;
6360 }
6361 
6362 /* upb_OneofDef ***************************************************************/
6363 
upb_OneofDef_Options(const upb_OneofDef * o)6364 const google_protobuf_OneofOptions* upb_OneofDef_Options(
6365     const upb_OneofDef* o) {
6366   return o->opts;
6367 }
6368 
upb_OneofDef_HasOptions(const upb_OneofDef * o)6369 bool upb_OneofDef_HasOptions(const upb_OneofDef* o) {
6370   return o->opts != (void*)opt_default;
6371 }
6372 
upb_OneofDef_Name(const upb_OneofDef * o)6373 const char* upb_OneofDef_Name(const upb_OneofDef* o) {
6374   return shortdefname(o->full_name);
6375 }
6376 
upb_OneofDef_ContainingType(const upb_OneofDef * o)6377 const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) {
6378   return o->parent;
6379 }
6380 
upb_OneofDef_FieldCount(const upb_OneofDef * o)6381 int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; }
6382 
upb_OneofDef_Field(const upb_OneofDef * o,int i)6383 const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) {
6384   UPB_ASSERT(i < o->field_count);
6385   return o->fields[i];
6386 }
6387 
upb_OneofDef_numfields(const upb_OneofDef * o)6388 int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; }
6389 
upb_OneofDef_Index(const upb_OneofDef * o)6390 uint32_t upb_OneofDef_Index(const upb_OneofDef* o) {
6391   // Compute index in our parent's array.
6392   return o - o->parent->oneofs;
6393 }
6394 
upb_OneofDef_IsSynthetic(const upb_OneofDef * o)6395 bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; }
6396 
upb_OneofDef_LookupNameWithSize(const upb_OneofDef * o,const char * name,size_t length)6397 const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
6398                                                     const char* name,
6399                                                     size_t length) {
6400   upb_value val;
6401   return upb_strtable_lookup2(&o->ntof, name, length, &val)
6402              ? upb_value_getptr(val)
6403              : NULL;
6404 }
6405 
upb_OneofDef_LookupNumber(const upb_OneofDef * o,uint32_t num)6406 const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
6407                                               uint32_t num) {
6408   upb_value val;
6409   return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
6410                                                   : NULL;
6411 }
6412 
6413 /* upb_FileDef ****************************************************************/
6414 
upb_FileDef_Options(const upb_FileDef * f)6415 const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) {
6416   return f->opts;
6417 }
6418 
upb_FileDef_HasOptions(const upb_FileDef * f)6419 bool upb_FileDef_HasOptions(const upb_FileDef* f) {
6420   return f->opts != (void*)opt_default;
6421 }
6422 
upb_FileDef_Name(const upb_FileDef * f)6423 const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; }
6424 
upb_FileDef_Package(const upb_FileDef * f)6425 const char* upb_FileDef_Package(const upb_FileDef* f) {
6426   return f->package ? f->package : "";
6427 }
6428 
upb_FileDef_Syntax(const upb_FileDef * f)6429 upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
6430 
upb_FileDef_TopLevelMessageCount(const upb_FileDef * f)6431 int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) {
6432   return f->top_lvl_msg_count;
6433 }
6434 
upb_FileDef_DependencyCount(const upb_FileDef * f)6435 int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; }
6436 
upb_FileDef_PublicDependencyCount(const upb_FileDef * f)6437 int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) {
6438   return f->public_dep_count;
6439 }
6440 
upb_FileDef_WeakDependencyCount(const upb_FileDef * f)6441 int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) {
6442   return f->weak_dep_count;
6443 }
6444 
_upb_FileDef_PublicDependencyIndexes(const upb_FileDef * f)6445 const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) {
6446   return f->public_deps;
6447 }
6448 
_upb_FileDef_WeakDependencyIndexes(const upb_FileDef * f)6449 const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) {
6450   return f->weak_deps;
6451 }
6452 
upb_FileDef_TopLevelEnumCount(const upb_FileDef * f)6453 int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) {
6454   return f->top_lvl_enum_count;
6455 }
6456 
upb_FileDef_TopLevelExtensionCount(const upb_FileDef * f)6457 int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) {
6458   return f->top_lvl_ext_count;
6459 }
6460 
upb_FileDef_ServiceCount(const upb_FileDef * f)6461 int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; }
6462 
upb_FileDef_Dependency(const upb_FileDef * f,int i)6463 const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) {
6464   UPB_ASSERT(0 <= i && i < f->dep_count);
6465   return f->deps[i];
6466 }
6467 
upb_FileDef_PublicDependency(const upb_FileDef * f,int i)6468 const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) {
6469   UPB_ASSERT(0 <= i && i < f->public_dep_count);
6470   return f->deps[f->public_deps[i]];
6471 }
6472 
upb_FileDef_WeakDependency(const upb_FileDef * f,int i)6473 const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) {
6474   UPB_ASSERT(0 <= i && i < f->public_dep_count);
6475   return f->deps[f->weak_deps[i]];
6476 }
6477 
upb_FileDef_TopLevelMessage(const upb_FileDef * f,int i)6478 const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) {
6479   UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count);
6480   return &f->top_lvl_msgs[i];
6481 }
6482 
upb_FileDef_TopLevelEnum(const upb_FileDef * f,int i)6483 const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) {
6484   UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count);
6485   return &f->top_lvl_enums[i];
6486 }
6487 
upb_FileDef_TopLevelExtension(const upb_FileDef * f,int i)6488 const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) {
6489   UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count);
6490   return &f->top_lvl_exts[i];
6491 }
6492 
upb_FileDef_Service(const upb_FileDef * f,int i)6493 const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) {
6494   UPB_ASSERT(0 <= i && i < f->service_count);
6495   return &f->services[i];
6496 }
6497 
upb_FileDef_Pool(const upb_FileDef * f)6498 const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }
6499 
6500 /* upb_MethodDef **************************************************************/
6501 
upb_MethodDef_Options(const upb_MethodDef * m)6502 const google_protobuf_MethodOptions* upb_MethodDef_Options(
6503     const upb_MethodDef* m) {
6504   return m->opts;
6505 }
6506 
upb_MethodDef_HasOptions(const upb_MethodDef * m)6507 bool upb_MethodDef_HasOptions(const upb_MethodDef* m) {
6508   return m->opts != (void*)opt_default;
6509 }
6510 
upb_MethodDef_FullName(const upb_MethodDef * m)6511 const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
6512   return m->full_name;
6513 }
6514 
upb_MethodDef_Index(const upb_MethodDef * m)6515 int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; }
6516 
upb_MethodDef_Name(const upb_MethodDef * m)6517 const char* upb_MethodDef_Name(const upb_MethodDef* m) {
6518   return shortdefname(m->full_name);
6519 }
6520 
upb_MethodDef_Service(const upb_MethodDef * m)6521 const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) {
6522   return m->service;
6523 }
6524 
upb_MethodDef_InputType(const upb_MethodDef * m)6525 const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) {
6526   return m->input_type;
6527 }
6528 
upb_MethodDef_OutputType(const upb_MethodDef * m)6529 const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) {
6530   return m->output_type;
6531 }
6532 
upb_MethodDef_ClientStreaming(const upb_MethodDef * m)6533 bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) {
6534   return m->client_streaming;
6535 }
6536 
upb_MethodDef_ServerStreaming(const upb_MethodDef * m)6537 bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {
6538   return m->server_streaming;
6539 }
6540 
6541 /* upb_ServiceDef *************************************************************/
6542 
upb_ServiceDef_Options(const upb_ServiceDef * s)6543 const google_protobuf_ServiceOptions* upb_ServiceDef_Options(
6544     const upb_ServiceDef* s) {
6545   return s->opts;
6546 }
6547 
upb_ServiceDef_HasOptions(const upb_ServiceDef * s)6548 bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) {
6549   return s->opts != (void*)opt_default;
6550 }
6551 
upb_ServiceDef_FullName(const upb_ServiceDef * s)6552 const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) {
6553   return s->full_name;
6554 }
6555 
upb_ServiceDef_Name(const upb_ServiceDef * s)6556 const char* upb_ServiceDef_Name(const upb_ServiceDef* s) {
6557   return shortdefname(s->full_name);
6558 }
6559 
upb_ServiceDef_Index(const upb_ServiceDef * s)6560 int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; }
6561 
upb_ServiceDef_File(const upb_ServiceDef * s)6562 const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) {
6563   return s->file;
6564 }
6565 
upb_ServiceDef_MethodCount(const upb_ServiceDef * s)6566 int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) {
6567   return s->method_count;
6568 }
6569 
upb_ServiceDef_Method(const upb_ServiceDef * s,int i)6570 const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) {
6571   return i < 0 || i >= s->method_count ? NULL : &s->methods[i];
6572 }
6573 
upb_ServiceDef_FindMethodByName(const upb_ServiceDef * s,const char * name)6574 const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
6575                                                      const char* name) {
6576   for (int i = 0; i < s->method_count; i++) {
6577     if (strcmp(name, upb_MethodDef_Name(&s->methods[i])) == 0) {
6578       return &s->methods[i];
6579     }
6580   }
6581   return NULL;
6582 }
6583 
6584 /* upb_DefPool ****************************************************************/
6585 
upb_DefPool_Free(upb_DefPool * s)6586 void upb_DefPool_Free(upb_DefPool* s) {
6587   upb_Arena_Free(s->arena);
6588   upb_gfree(s);
6589 }
6590 
upb_DefPool_New(void)6591 upb_DefPool* upb_DefPool_New(void) {
6592   upb_DefPool* s = upb_gmalloc(sizeof(*s));
6593 
6594   if (!s) {
6595     return NULL;
6596   }
6597 
6598   s->arena = upb_Arena_New();
6599   s->bytes_loaded = 0;
6600 
6601   if (!upb_strtable_init(&s->syms, 32, s->arena) ||
6602       !upb_strtable_init(&s->files, 4, s->arena) ||
6603       !upb_inttable_init(&s->exts, s->arena)) {
6604     goto err;
6605   }
6606 
6607   s->extreg = upb_ExtensionRegistry_New(s->arena);
6608   if (!s->extreg) goto err;
6609   return s;
6610 
6611 err:
6612   upb_Arena_Free(s->arena);
6613   upb_gfree(s);
6614   return NULL;
6615 }
6616 
symtab_lookup(const upb_DefPool * s,const char * sym,upb_deftype_t type)6617 static const void* symtab_lookup(const upb_DefPool* s, const char* sym,
6618                                  upb_deftype_t type) {
6619   upb_value v;
6620   return upb_strtable_lookup(&s->syms, sym, &v) ? unpack_def(v, type) : NULL;
6621 }
6622 
symtab_lookup2(const upb_DefPool * s,const char * sym,size_t size,upb_deftype_t type)6623 static const void* symtab_lookup2(const upb_DefPool* s, const char* sym,
6624                                   size_t size, upb_deftype_t type) {
6625   upb_value v;
6626   return upb_strtable_lookup2(&s->syms, sym, size, &v) ? unpack_def(v, type)
6627                                                        : NULL;
6628 }
6629 
upb_DefPool_FindMessageByName(const upb_DefPool * s,const char * sym)6630 const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s,
6631                                                     const char* sym) {
6632   return symtab_lookup(s, sym, UPB_DEFTYPE_MSG);
6633 }
6634 
upb_DefPool_FindMessageByNameWithSize(const upb_DefPool * s,const char * sym,size_t len)6635 const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
6636     const upb_DefPool* s, const char* sym, size_t len) {
6637   return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG);
6638 }
6639 
upb_DefPool_FindEnumByName(const upb_DefPool * s,const char * sym)6640 const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
6641                                               const char* sym) {
6642   return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM);
6643 }
6644 
upb_DefPool_FindEnumByNameval(const upb_DefPool * s,const char * sym)6645 const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
6646                                                       const char* sym) {
6647   return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL);
6648 }
6649 
upb_DefPool_FindFileByName(const upb_DefPool * s,const char * name)6650 const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
6651                                               const char* name) {
6652   upb_value v;
6653   return upb_strtable_lookup(&s->files, name, &v)
6654              ? unpack_def(v, UPB_DEFTYPE_FILE)
6655              : NULL;
6656 }
6657 
upb_DefPool_FindFileByNameWithSize(const upb_DefPool * s,const char * name,size_t len)6658 const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
6659                                                       const char* name,
6660                                                       size_t len) {
6661   upb_value v;
6662   return upb_strtable_lookup2(&s->files, name, len, &v)
6663              ? unpack_def(v, UPB_DEFTYPE_FILE)
6664              : NULL;
6665 }
6666 
upb_DefPool_FindExtensionByNameWithSize(const upb_DefPool * s,const char * name,size_t size)6667 const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
6668     const upb_DefPool* s, const char* name, size_t size) {
6669   upb_value v;
6670   if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL;
6671 
6672   switch (deftype(v)) {
6673     case UPB_DEFTYPE_FIELD:
6674       return unpack_def(v, UPB_DEFTYPE_FIELD);
6675     case UPB_DEFTYPE_MSG: {
6676       const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6677       return m->in_message_set ? &m->nested_exts[0] : NULL;
6678     }
6679     default:
6680       break;
6681   }
6682 
6683   return NULL;
6684 }
6685 
upb_DefPool_FindExtensionByName(const upb_DefPool * s,const char * sym)6686 const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
6687                                                     const char* sym) {
6688   return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym));
6689 }
6690 
upb_DefPool_FindServiceByName(const upb_DefPool * s,const char * name)6691 const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
6692                                                     const char* name) {
6693   return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE);
6694 }
6695 
upb_DefPool_FindServiceByNameWithSize(const upb_DefPool * s,const char * name,size_t size)6696 const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize(
6697     const upb_DefPool* s, const char* name, size_t size) {
6698   return symtab_lookup2(s, name, size, UPB_DEFTYPE_SERVICE);
6699 }
6700 
upb_DefPool_FindFileContainingSymbol(const upb_DefPool * s,const char * name)6701 const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s,
6702                                                         const char* name) {
6703   upb_value v;
6704   // TODO(haberman): non-extension fields and oneofs.
6705   if (upb_strtable_lookup(&s->syms, name, &v)) {
6706     switch (deftype(v)) {
6707       case UPB_DEFTYPE_EXT: {
6708         const upb_FieldDef* f = unpack_def(v, UPB_DEFTYPE_EXT);
6709         return upb_FieldDef_File(f);
6710       }
6711       case UPB_DEFTYPE_MSG: {
6712         const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6713         return upb_MessageDef_File(m);
6714       }
6715       case UPB_DEFTYPE_ENUM: {
6716         const upb_EnumDef* e = unpack_def(v, UPB_DEFTYPE_ENUM);
6717         return upb_EnumDef_File(e);
6718       }
6719       case UPB_DEFTYPE_ENUMVAL: {
6720         const upb_EnumValueDef* ev = unpack_def(v, UPB_DEFTYPE_ENUMVAL);
6721         return upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
6722       }
6723       case UPB_DEFTYPE_SERVICE: {
6724         const upb_ServiceDef* service = unpack_def(v, UPB_DEFTYPE_SERVICE);
6725         return upb_ServiceDef_File(service);
6726       }
6727       default:
6728         UPB_UNREACHABLE();
6729     }
6730   }
6731 
6732   const char* last_dot = strrchr(name, '.');
6733   if (last_dot) {
6734     const upb_MessageDef* parent =
6735         upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name);
6736     if (parent) {
6737       const char* shortname = last_dot + 1;
6738       if (upb_MessageDef_FindByNameWithSize(parent, shortname,
6739                                             strlen(shortname), NULL, NULL)) {
6740         return upb_MessageDef_File(parent);
6741       }
6742     }
6743   }
6744 
6745   return NULL;
6746 }
6747 
6748 /* Code to build defs from descriptor protos. *********************************/
6749 
6750 /* There is a question of how much validation to do here.  It will be difficult
6751  * to perfectly match the amount of validation performed by proto2.  But since
6752  * this code is used to directly build defs from Ruby (for example) we do need
6753  * to validate important constraints like uniqueness of names and numbers. */
6754 
6755 #define CHK_OOM(x)      \
6756   if (!(x)) {           \
6757     symtab_oomerr(ctx); \
6758   }
6759 
symtab_oomerr(symtab_addctx * ctx)6760 UPB_NORETURN UPB_NOINLINE static void symtab_oomerr(symtab_addctx* ctx) {
6761   upb_Status_setoom(ctx->status);
6762   UPB_LONGJMP(ctx->err, 1);
6763 }
6764 
symtab_alloc(symtab_addctx * ctx,size_t bytes)6765 void* symtab_alloc(symtab_addctx* ctx, size_t bytes) {
6766   if (bytes == 0) return NULL;
6767   void* ret = upb_Arena_Malloc(ctx->arena, bytes);
6768   if (!ret) symtab_oomerr(ctx);
6769   return ret;
6770 }
6771 
6772 // We want to copy the options verbatim into the destination options proto.
6773 // We use serialize+parse as our deep copy.
6774 #define SET_OPTIONS(target, desc_type, options_type, proto)                   \
6775   if (google_protobuf_##desc_type##_has_options(proto)) {                     \
6776     size_t size;                                                              \
6777     char* pb = google_protobuf_##options_type##_serialize(                    \
6778         google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \
6779     CHK_OOM(pb);                                                              \
6780     target = google_protobuf_##options_type##_parse(pb, size, ctx->arena);    \
6781     CHK_OOM(target);                                                          \
6782   } else {                                                                    \
6783     target = (const google_protobuf_##options_type*)opt_default;              \
6784   }
6785 
check_ident(symtab_addctx * ctx,upb_StringView name,bool full)6786 static void check_ident(symtab_addctx* ctx, upb_StringView name, bool full) {
6787   const char* str = name.data;
6788   size_t len = name.size;
6789   bool start = true;
6790   size_t i;
6791   for (i = 0; i < len; i++) {
6792     char c = str[i];
6793     if (c == '.') {
6794       if (start || !full) {
6795         symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
6796       }
6797       start = true;
6798     } else if (start) {
6799       if (!upb_isletter(c)) {
6800         symtab_errf(
6801             ctx,
6802             "invalid name: path components must start with a letter (%.*s)",
6803             (int)len, str);
6804       }
6805       start = false;
6806     } else {
6807       if (!upb_isalphanum(c)) {
6808         symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
6809                     (int)len, str);
6810       }
6811     }
6812   }
6813   if (start) {
6814     symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
6815   }
6816 }
6817 
div_round_up(size_t n,size_t d)6818 static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; }
6819 
upb_MessageValue_sizeof(upb_CType type)6820 static size_t upb_MessageValue_sizeof(upb_CType type) {
6821   switch (type) {
6822     case kUpb_CType_Double:
6823     case kUpb_CType_Int64:
6824     case kUpb_CType_UInt64:
6825       return 8;
6826     case kUpb_CType_Enum:
6827     case kUpb_CType_Int32:
6828     case kUpb_CType_UInt32:
6829     case kUpb_CType_Float:
6830       return 4;
6831     case kUpb_CType_Bool:
6832       return 1;
6833     case kUpb_CType_Message:
6834       return sizeof(void*);
6835     case kUpb_CType_Bytes:
6836     case kUpb_CType_String:
6837       return sizeof(upb_StringView);
6838   }
6839   UPB_UNREACHABLE();
6840 }
6841 
upb_msg_fielddefsize(const upb_FieldDef * f)6842 static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) {
6843   if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) {
6844     upb_MapEntry ent;
6845     UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
6846     return sizeof(ent.k);
6847   } else if (upb_FieldDef_IsRepeated(f)) {
6848     return sizeof(void*);
6849   } else {
6850     return upb_MessageValue_sizeof(upb_FieldDef_CType(f));
6851   }
6852 }
6853 
upb_MiniTable_place(symtab_addctx * ctx,upb_MiniTable * l,size_t size,const upb_MessageDef * m)6854 static uint32_t upb_MiniTable_place(symtab_addctx* ctx, upb_MiniTable* l,
6855                                     size_t size, const upb_MessageDef* m) {
6856   size_t ofs = UPB_ALIGN_UP(l->size, size);
6857   size_t next = ofs + size;
6858 
6859   if (next > UINT16_MAX) {
6860     symtab_errf(ctx, "size of message %s exceeded max size of %zu bytes",
6861                 upb_MessageDef_FullName(m), (size_t)UINT16_MAX);
6862   }
6863 
6864   l->size = next;
6865   return ofs;
6866 }
6867 
field_number_cmp(const void * p1,const void * p2)6868 static int field_number_cmp(const void* p1, const void* p2) {
6869   const upb_MiniTable_Field* f1 = p1;
6870   const upb_MiniTable_Field* f2 = p2;
6871   return f1->number - f2->number;
6872 }
6873 
assign_layout_indices(const upb_MessageDef * m,upb_MiniTable * l,upb_MiniTable_Field * fields)6874 static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l,
6875                                   upb_MiniTable_Field* fields) {
6876   int i;
6877   int n = upb_MessageDef_numfields(m);
6878   int dense_below = 0;
6879   for (i = 0; i < n; i++) {
6880     upb_FieldDef* f =
6881         (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number);
6882     UPB_ASSERT(f);
6883     f->layout_index = i;
6884     if (i < UINT8_MAX && fields[i].number == i + 1 &&
6885         (i == 0 || fields[i - 1].number == i)) {
6886       dense_below = i + 1;
6887     }
6888   }
6889   l->dense_below = dense_below;
6890 }
6891 
map_descriptortype(const upb_FieldDef * f)6892 static uint8_t map_descriptortype(const upb_FieldDef* f) {
6893   uint8_t type = upb_FieldDef_Type(f);
6894   /* See TableDescriptorType() in upbc/generator.cc for details and
6895    * rationale of these exceptions. */
6896   if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) {
6897     return kUpb_FieldType_Bytes;
6898   } else if (type == kUpb_FieldType_Enum &&
6899              (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 ||
6900               UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 ||
6901               // TODO(https://github.com/protocolbuffers/upb/issues/541):
6902               // fix map enum values to check for unknown enum values and put
6903               // them in the unknown field set.
6904               upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) {
6905     return kUpb_FieldType_Int32;
6906   }
6907   return type;
6908 }
6909 
fill_fieldlayout(upb_MiniTable_Field * field,const upb_FieldDef * f)6910 static void fill_fieldlayout(upb_MiniTable_Field* field,
6911                              const upb_FieldDef* f) {
6912   field->number = upb_FieldDef_Number(f);
6913   field->descriptortype = map_descriptortype(f);
6914 
6915   if (upb_FieldDef_IsMap(f)) {
6916     field->mode =
6917         kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
6918   } else if (upb_FieldDef_IsRepeated(f)) {
6919     field->mode =
6920         kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
6921   } else {
6922     /* Maps descriptor type -> elem_size_lg2.  */
6923     static const uint8_t sizes[] = {
6924         -1,                       /* invalid descriptor type */
6925         kUpb_FieldRep_8Byte,      /* DOUBLE */
6926         kUpb_FieldRep_4Byte,      /* FLOAT */
6927         kUpb_FieldRep_8Byte,      /* INT64 */
6928         kUpb_FieldRep_8Byte,      /* UINT64 */
6929         kUpb_FieldRep_4Byte,      /* INT32 */
6930         kUpb_FieldRep_8Byte,      /* FIXED64 */
6931         kUpb_FieldRep_4Byte,      /* FIXED32 */
6932         kUpb_FieldRep_1Byte,      /* BOOL */
6933         kUpb_FieldRep_StringView, /* STRING */
6934         kUpb_FieldRep_Pointer,    /* GROUP */
6935         kUpb_FieldRep_Pointer,    /* MESSAGE */
6936         kUpb_FieldRep_StringView, /* BYTES */
6937         kUpb_FieldRep_4Byte,      /* UINT32 */
6938         kUpb_FieldRep_4Byte,      /* ENUM */
6939         kUpb_FieldRep_4Byte,      /* SFIXED32 */
6940         kUpb_FieldRep_8Byte,      /* SFIXED64 */
6941         kUpb_FieldRep_4Byte,      /* SINT32 */
6942         kUpb_FieldRep_8Byte,      /* SINT64 */
6943     };
6944     field->mode = kUpb_FieldMode_Scalar |
6945                   (sizes[field->descriptortype] << kUpb_FieldRep_Shift);
6946   }
6947 
6948   if (upb_FieldDef_IsPacked(f)) {
6949     field->mode |= kUpb_LabelFlags_IsPacked;
6950   }
6951 
6952   if (upb_FieldDef_IsExtension(f)) {
6953     field->mode |= kUpb_LabelFlags_IsExtension;
6954   }
6955 }
6956 
6957 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
6958  * It computes a dynamic layout for all of the fields in |m|. */
make_layout(symtab_addctx * ctx,const upb_MessageDef * m)6959 static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
6960   upb_MiniTable* l = (upb_MiniTable*)m->layout;
6961   size_t field_count = upb_MessageDef_numfields(m);
6962   size_t sublayout_count = 0;
6963   upb_MiniTable_Sub* subs;
6964   upb_MiniTable_Field* fields;
6965 
6966   memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry));
6967 
6968   /* Count sub-messages. */
6969   for (size_t i = 0; i < field_count; i++) {
6970     const upb_FieldDef* f = &m->fields[i];
6971     if (upb_FieldDef_IsSubMessage(f)) {
6972       sublayout_count++;
6973     }
6974     if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
6975         f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
6976       sublayout_count++;
6977     }
6978   }
6979 
6980   fields = symtab_alloc(ctx, field_count * sizeof(*fields));
6981   subs = symtab_alloc(ctx, sublayout_count * sizeof(*subs));
6982 
6983   l->field_count = upb_MessageDef_numfields(m);
6984   l->fields = fields;
6985   l->subs = subs;
6986   l->table_mask = 0;
6987   l->required_count = 0;
6988 
6989   if (upb_MessageDef_ExtensionRangeCount(m) > 0) {
6990     if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) {
6991       l->ext = kUpb_ExtMode_IsMessageSet;
6992     } else {
6993       l->ext = kUpb_ExtMode_Extendable;
6994     }
6995   } else {
6996     l->ext = kUpb_ExtMode_NonExtendable;
6997   }
6998 
6999   /* TODO(haberman): initialize fast tables so that reflection-based parsing
7000    * can get the same speeds as linked-in types. */
7001   l->fasttable[0].field_parser = &fastdecode_generic;
7002   l->fasttable[0].field_data = 0;
7003 
7004   if (upb_MessageDef_IsMapEntry(m)) {
7005     /* TODO(haberman): refactor this method so this special case is more
7006      * elegant. */
7007     const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1);
7008     const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2);
7009     if (key == NULL || val == NULL) {
7010       symtab_errf(ctx, "Malformed map entry from message: %s",
7011                   upb_MessageDef_FullName(m));
7012     }
7013     fields[0].number = 1;
7014     fields[1].number = 2;
7015     fields[0].mode = kUpb_FieldMode_Scalar;
7016     fields[1].mode = kUpb_FieldMode_Scalar;
7017     fields[0].presence = 0;
7018     fields[1].presence = 0;
7019     fields[0].descriptortype = map_descriptortype(key);
7020     fields[1].descriptortype = map_descriptortype(val);
7021     fields[0].offset = 0;
7022     fields[1].offset = sizeof(upb_StringView);
7023     fields[1].submsg_index = 0;
7024 
7025     if (upb_FieldDef_CType(val) == kUpb_CType_Message) {
7026       subs[0].submsg = upb_FieldDef_MessageSubDef(val)->layout;
7027     }
7028 
7029     upb_FieldDef* fielddefs = (upb_FieldDef*)&m->fields[0];
7030     UPB_ASSERT(fielddefs[0].number_ == 1);
7031     UPB_ASSERT(fielddefs[1].number_ == 2);
7032     fielddefs[0].layout_index = 0;
7033     fielddefs[1].layout_index = 1;
7034 
7035     l->field_count = 2;
7036     l->size = 2 * sizeof(upb_StringView);
7037     l->size = UPB_ALIGN_UP(l->size, 8);
7038     l->dense_below = 2;
7039     return;
7040   }
7041 
7042   /* Allocate data offsets in three stages:
7043    *
7044    * 1. hasbits.
7045    * 2. regular fields.
7046    * 3. oneof fields.
7047    *
7048    * OPT: There is a lot of room for optimization here to minimize the size.
7049    */
7050 
7051   /* Assign hasbits for required fields first. */
7052   size_t hasbit = 0;
7053 
7054   for (int i = 0; i < m->field_count; i++) {
7055     const upb_FieldDef* f = &m->fields[i];
7056     upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
7057     if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
7058       field->presence = ++hasbit;
7059       if (hasbit >= 63) {
7060         symtab_errf(ctx, "Message with >=63 required fields: %s",
7061                     upb_MessageDef_FullName(m));
7062       }
7063       l->required_count++;
7064     }
7065   }
7066 
7067   /* Allocate hasbits and set basic field attributes. */
7068   sublayout_count = 0;
7069   for (int i = 0; i < m->field_count; i++) {
7070     const upb_FieldDef* f = &m->fields[i];
7071     upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
7072 
7073     fill_fieldlayout(field, f);
7074 
7075     if (field->descriptortype == kUpb_FieldType_Message ||
7076         field->descriptortype == kUpb_FieldType_Group) {
7077       field->submsg_index = sublayout_count++;
7078       subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
7079     } else if (field->descriptortype == kUpb_FieldType_Enum) {
7080       field->submsg_index = sublayout_count++;
7081       subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
7082       UPB_ASSERT(subs[field->submsg_index].subenum);
7083     }
7084 
7085     if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
7086       /* Hasbit was already assigned. */
7087     } else if (upb_FieldDef_HasPresence(f) &&
7088                !upb_FieldDef_RealContainingOneof(f)) {
7089       /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
7090        * table. This wastes one hasbit, but we don't worry about it for now. */
7091       field->presence = ++hasbit;
7092     } else {
7093       field->presence = 0;
7094     }
7095   }
7096 
7097   /* Account for space used by hasbits. */
7098   l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0;
7099 
7100   /* Allocate non-oneof fields. */
7101   for (int i = 0; i < m->field_count; i++) {
7102     const upb_FieldDef* f = &m->fields[i];
7103     size_t field_size = upb_msg_fielddefsize(f);
7104     size_t index = upb_FieldDef_Index(f);
7105 
7106     if (upb_FieldDef_RealContainingOneof(f)) {
7107       /* Oneofs are handled separately below. */
7108       continue;
7109     }
7110 
7111     fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m);
7112   }
7113 
7114   /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
7115    * and space for the actual data. */
7116   for (int i = 0; i < m->oneof_count; i++) {
7117     const upb_OneofDef* o = &m->oneofs[i];
7118     size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
7119     size_t field_size = 0;
7120     uint32_t case_offset;
7121     uint32_t data_offset;
7122 
7123     if (upb_OneofDef_IsSynthetic(o)) continue;
7124 
7125     if (o->field_count == 0) {
7126       symtab_errf(ctx, "Oneof must have at least one field (%s)", o->full_name);
7127     }
7128 
7129     /* Calculate field size: the max of all field sizes. */
7130     for (int j = 0; j < o->field_count; j++) {
7131       const upb_FieldDef* f = o->fields[j];
7132       field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
7133     }
7134 
7135     /* Align and allocate case offset. */
7136     case_offset = upb_MiniTable_place(ctx, l, case_size, m);
7137     data_offset = upb_MiniTable_place(ctx, l, field_size, m);
7138 
7139     for (int i = 0; i < o->field_count; i++) {
7140       const upb_FieldDef* f = o->fields[i];
7141       fields[upb_FieldDef_Index(f)].offset = data_offset;
7142       fields[upb_FieldDef_Index(f)].presence = ~case_offset;
7143     }
7144   }
7145 
7146   /* Size of the entire structure should be a multiple of its greatest
7147    * alignment.  TODO: track overall alignment for real? */
7148   l->size = UPB_ALIGN_UP(l->size, 8);
7149 
7150   /* Sort fields by number. */
7151   if (fields) {
7152     qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields),
7153           field_number_cmp);
7154   }
7155   assign_layout_indices(m, l, fields);
7156 }
7157 
strviewdup(symtab_addctx * ctx,upb_StringView view)7158 static char* strviewdup(symtab_addctx* ctx, upb_StringView view) {
7159   char* ret = upb_strdup2(view.data, view.size, ctx->arena);
7160   CHK_OOM(ret);
7161   return ret;
7162 }
7163 
streql2(const char * a,size_t n,const char * b)7164 static bool streql2(const char* a, size_t n, const char* b) {
7165   return n == strlen(b) && memcmp(a, b, n) == 0;
7166 }
7167 
streql_view(upb_StringView view,const char * b)7168 static bool streql_view(upb_StringView view, const char* b) {
7169   return streql2(view.data, view.size, b);
7170 }
7171 
makefullname(symtab_addctx * ctx,const char * prefix,upb_StringView name)7172 static const char* makefullname(symtab_addctx* ctx, const char* prefix,
7173                                 upb_StringView name) {
7174   if (prefix) {
7175     /* ret = prefix + '.' + name; */
7176     size_t n = strlen(prefix);
7177     char* ret = symtab_alloc(ctx, n + name.size + 2);
7178     strcpy(ret, prefix);
7179     ret[n] = '.';
7180     memcpy(&ret[n + 1], name.data, name.size);
7181     ret[n + 1 + name.size] = '\0';
7182     return ret;
7183   } else {
7184     return strviewdup(ctx, name);
7185   }
7186 }
7187 
finalize_oneofs(symtab_addctx * ctx,upb_MessageDef * m)7188 static void finalize_oneofs(symtab_addctx* ctx, upb_MessageDef* m) {
7189   int i;
7190   int synthetic_count = 0;
7191   upb_OneofDef* mutable_oneofs = (upb_OneofDef*)m->oneofs;
7192 
7193   for (i = 0; i < m->oneof_count; i++) {
7194     upb_OneofDef* o = &mutable_oneofs[i];
7195 
7196     if (o->synthetic && o->field_count != 1) {
7197       symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
7198                   o->field_count, upb_OneofDef_Name(o));
7199     }
7200 
7201     if (o->synthetic) {
7202       synthetic_count++;
7203     } else if (synthetic_count != 0) {
7204       symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
7205                   upb_OneofDef_Name(o));
7206     }
7207 
7208     o->fields = symtab_alloc(ctx, sizeof(upb_FieldDef*) * o->field_count);
7209     o->field_count = 0;
7210   }
7211 
7212   for (i = 0; i < m->field_count; i++) {
7213     const upb_FieldDef* f = &m->fields[i];
7214     upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f);
7215     if (o) {
7216       o->fields[o->field_count++] = f;
7217     }
7218   }
7219 
7220   m->real_oneof_count = m->oneof_count - synthetic_count;
7221 }
7222 
getjsonname(const char * name,char * buf,size_t len)7223 size_t getjsonname(const char* name, char* buf, size_t len) {
7224   size_t src, dst = 0;
7225   bool ucase_next = false;
7226 
7227 #define WRITE(byte)      \
7228   ++dst;                 \
7229   if (dst < len)         \
7230     buf[dst - 1] = byte; \
7231   else if (dst == len)   \
7232   buf[dst - 1] = '\0'
7233 
7234   if (!name) {
7235     WRITE('\0');
7236     return 0;
7237   }
7238 
7239   /* Implement the transformation as described in the spec:
7240    *   1. upper case all letters after an underscore.
7241    *   2. remove all underscores.
7242    */
7243   for (src = 0; name[src]; src++) {
7244     if (name[src] == '_') {
7245       ucase_next = true;
7246       continue;
7247     }
7248 
7249     if (ucase_next) {
7250       WRITE(toupper(name[src]));
7251       ucase_next = false;
7252     } else {
7253       WRITE(name[src]);
7254     }
7255   }
7256 
7257   WRITE('\0');
7258   return dst;
7259 
7260 #undef WRITE
7261 }
7262 
makejsonname(symtab_addctx * ctx,const char * name)7263 static char* makejsonname(symtab_addctx* ctx, const char* name) {
7264   size_t size = getjsonname(name, NULL, 0);
7265   char* json_name = symtab_alloc(ctx, size);
7266   getjsonname(name, json_name, size);
7267   return json_name;
7268 }
7269 
7270 /* Adds a symbol |v| to the symtab, which must be a def pointer previously
7271  * packed with pack_def().  The def's pointer to upb_FileDef* must be set before
7272  * adding, so we know which entries to remove if building this file fails. */
symtab_add(symtab_addctx * ctx,const char * name,upb_value v)7273 static void symtab_add(symtab_addctx* ctx, const char* name, upb_value v) {
7274   // TODO: table should support an operation "tryinsert" to avoid the double
7275   // lookup.
7276   if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
7277     symtab_errf(ctx, "duplicate symbol '%s'", name);
7278   }
7279   size_t len = strlen(name);
7280   CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v,
7281                               ctx->symtab->arena));
7282 }
7283 
remove_component(char * base,size_t * len)7284 static bool remove_component(char* base, size_t* len) {
7285   if (*len == 0) return false;
7286 
7287   for (size_t i = *len - 1; i > 0; i--) {
7288     if (base[i] == '.') {
7289       *len = i;
7290       return true;
7291     }
7292   }
7293 
7294   *len = 0;
7295   return true;
7296 }
7297 
7298 /* Given a symbol and the base symbol inside which it is defined, find the
7299  * symbol's definition in t. */
symtab_resolveany(symtab_addctx * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)7300 static const void* symtab_resolveany(symtab_addctx* ctx,
7301                                      const char* from_name_dbg,
7302                                      const char* base, upb_StringView sym,
7303                                      upb_deftype_t* type) {
7304   const upb_strtable* t = &ctx->symtab->syms;
7305   if (sym.size == 0) goto notfound;
7306   upb_value v;
7307   if (sym.data[0] == '.') {
7308     /* Symbols starting with '.' are absolute, so we do a single lookup.
7309      * Slice to omit the leading '.' */
7310     if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
7311       goto notfound;
7312     }
7313   } else {
7314     /* Remove components from base until we find an entry or run out. */
7315     size_t baselen = base ? strlen(base) : 0;
7316     char* tmp = malloc(sym.size + baselen + 1);
7317     while (1) {
7318       char* p = tmp;
7319       if (baselen) {
7320         memcpy(p, base, baselen);
7321         p[baselen] = '.';
7322         p += baselen + 1;
7323       }
7324       memcpy(p, sym.data, sym.size);
7325       p += sym.size;
7326       if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) {
7327         break;
7328       }
7329       if (!remove_component(tmp, &baselen)) {
7330         free(tmp);
7331         goto notfound;
7332       }
7333     }
7334     free(tmp);
7335   }
7336 
7337   *type = deftype(v);
7338   return unpack_def(v, *type);
7339 
7340 notfound:
7341   symtab_errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
7342               UPB_STRINGVIEW_ARGS(sym));
7343 }
7344 
symtab_resolve(symtab_addctx * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)7345 static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg,
7346                                   const char* base, upb_StringView sym,
7347                                   upb_deftype_t type) {
7348   upb_deftype_t found_type;
7349   const void* ret =
7350       symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type);
7351   if (ret && found_type != type) {
7352     symtab_errf(ctx,
7353                 "type mismatch when resolving %s: couldn't find "
7354                 "name " UPB_STRINGVIEW_FORMAT " with type=%d",
7355                 from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
7356   }
7357   return ret;
7358 }
7359 
create_oneofdef(symtab_addctx * ctx,upb_MessageDef * m,const google_protobuf_OneofDescriptorProto * oneof_proto,const upb_OneofDef * _o)7360 static void create_oneofdef(
7361     symtab_addctx* ctx, upb_MessageDef* m,
7362     const google_protobuf_OneofDescriptorProto* oneof_proto,
7363     const upb_OneofDef* _o) {
7364   upb_OneofDef* o = (upb_OneofDef*)_o;
7365   upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
7366   upb_value v;
7367 
7368   o->parent = m;
7369   o->full_name = makefullname(ctx, m->full_name, name);
7370   o->field_count = 0;
7371   o->synthetic = false;
7372 
7373   SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
7374 
7375   upb_value existing_v;
7376   if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) {
7377     symtab_errf(ctx, "duplicate oneof name (%s)", o->full_name);
7378   }
7379 
7380   v = pack_def(o, UPB_DEFTYPE_ONEOF);
7381   CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
7382 
7383   CHK_OOM(upb_inttable_init(&o->itof, ctx->arena));
7384   CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena));
7385 }
7386 
newstr(symtab_addctx * ctx,const char * data,size_t len)7387 static str_t* newstr(symtab_addctx* ctx, const char* data, size_t len) {
7388   str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7389   CHK_OOM(ret);
7390   ret->len = len;
7391   if (len) memcpy(ret->str, data, len);
7392   ret->str[len] = '\0';
7393   return ret;
7394 }
7395 
upb_DefPool_TryGetChar(const char ** src,const char * end,char * ch)7396 static bool upb_DefPool_TryGetChar(const char** src, const char* end,
7397                                    char* ch) {
7398   if (*src == end) return false;
7399   *ch = **src;
7400   *src += 1;
7401   return true;
7402 }
7403 
upb_DefPool_TryGetHexDigit(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7404 static char upb_DefPool_TryGetHexDigit(symtab_addctx* ctx,
7405                                        const upb_FieldDef* f, const char** src,
7406                                        const char* end) {
7407   char ch;
7408   if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7409   if ('0' <= ch && ch <= '9') {
7410     return ch - '0';
7411   }
7412   ch = upb_ascii_lower(ch);
7413   if ('a' <= ch && ch <= 'f') {
7414     return ch - 'a' + 0xa;
7415   }
7416   *src -= 1;  // Char wasn't actually a hex digit.
7417   return -1;
7418 }
7419 
upb_DefPool_ParseHexEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7420 static char upb_DefPool_ParseHexEscape(symtab_addctx* ctx,
7421                                        const upb_FieldDef* f, const char** src,
7422                                        const char* end) {
7423   char hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end);
7424   if (hex_digit < 0) {
7425     symtab_errf(ctx,
7426                 "\\x cannot be followed by non-hex digit in field '%s' default",
7427                 upb_FieldDef_FullName(f));
7428     return 0;
7429   }
7430   unsigned int ret = hex_digit;
7431   while ((hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end)) >= 0) {
7432     ret = (ret << 4) | hex_digit;
7433   }
7434   if (ret > 0xff) {
7435     symtab_errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
7436                 upb_FieldDef_FullName(f));
7437     return 0;
7438   }
7439   return ret;
7440 }
7441 
upb_DefPool_TryGetOctalDigit(const char ** src,const char * end)7442 char upb_DefPool_TryGetOctalDigit(const char** src, const char* end) {
7443   char ch;
7444   if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7445   if ('0' <= ch && ch <= '7') {
7446     return ch - '0';
7447   }
7448   *src -= 1;  // Char wasn't actually an octal digit.
7449   return -1;
7450 }
7451 
upb_DefPool_ParseOctalEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7452 static char upb_DefPool_ParseOctalEscape(symtab_addctx* ctx,
7453                                          const upb_FieldDef* f,
7454                                          const char** src, const char* end) {
7455   char ch = 0;
7456   for (int i = 0; i < 3; i++) {
7457     char digit;
7458     if ((digit = upb_DefPool_TryGetOctalDigit(src, end)) >= 0) {
7459       ch = (ch << 3) | digit;
7460     }
7461   }
7462   return ch;
7463 }
7464 
upb_DefPool_ParseEscape(symtab_addctx * ctx,const upb_FieldDef * f,const char ** src,const char * end)7465 static char upb_DefPool_ParseEscape(symtab_addctx* ctx, const upb_FieldDef* f,
7466                                     const char** src, const char* end) {
7467   char ch;
7468   if (!upb_DefPool_TryGetChar(src, end, &ch)) {
7469     symtab_errf(ctx, "unterminated escape sequence in field %s",
7470                 upb_FieldDef_FullName(f));
7471     return 0;
7472   }
7473   switch (ch) {
7474     case 'a':
7475       return '\a';
7476     case 'b':
7477       return '\b';
7478     case 'f':
7479       return '\f';
7480     case 'n':
7481       return '\n';
7482     case 'r':
7483       return '\r';
7484     case 't':
7485       return '\t';
7486     case 'v':
7487       return '\v';
7488     case '\\':
7489       return '\\';
7490     case '\'':
7491       return '\'';
7492     case '\"':
7493       return '\"';
7494     case '?':
7495       return '\?';
7496     case 'x':
7497     case 'X':
7498       return upb_DefPool_ParseHexEscape(ctx, f, src, end);
7499     case '0':
7500     case '1':
7501     case '2':
7502     case '3':
7503     case '4':
7504     case '5':
7505     case '6':
7506     case '7':
7507       *src -= 1;
7508       return upb_DefPool_ParseOctalEscape(ctx, f, src, end);
7509   }
7510   symtab_errf(ctx, "Unknown escape sequence: \\%c", ch);
7511 }
7512 
unescape(symtab_addctx * ctx,const upb_FieldDef * f,const char * data,size_t len)7513 static str_t* unescape(symtab_addctx* ctx, const upb_FieldDef* f,
7514                        const char* data, size_t len) {
7515   // Size here is an upper bound; escape sequences could ultimately shrink it.
7516   str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7517   char* dst = &ret->str[0];
7518   const char* src = data;
7519   const char* end = data + len;
7520 
7521   while (src < end) {
7522     if (*src == '\\') {
7523       src++;
7524       *dst++ = upb_DefPool_ParseEscape(ctx, f, &src, end);
7525     } else {
7526       *dst++ = *src++;
7527     }
7528   }
7529 
7530   ret->len = dst - &ret->str[0];
7531   return ret;
7532 }
7533 
parse_default(symtab_addctx * ctx,const char * str,size_t len,upb_FieldDef * f)7534 static void parse_default(symtab_addctx* ctx, const char* str, size_t len,
7535                           upb_FieldDef* f) {
7536   char* end;
7537   char nullz[64];
7538   errno = 0;
7539 
7540   switch (upb_FieldDef_CType(f)) {
7541     case kUpb_CType_Int32:
7542     case kUpb_CType_Int64:
7543     case kUpb_CType_UInt32:
7544     case kUpb_CType_UInt64:
7545     case kUpb_CType_Double:
7546     case kUpb_CType_Float:
7547       /* Standard C number parsing functions expect null-terminated strings. */
7548       if (len >= sizeof(nullz) - 1) {
7549         symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
7550       }
7551       memcpy(nullz, str, len);
7552       nullz[len] = '\0';
7553       str = nullz;
7554       break;
7555     default:
7556       break;
7557   }
7558 
7559   switch (upb_FieldDef_CType(f)) {
7560     case kUpb_CType_Int32: {
7561       long val = strtol(str, &end, 0);
7562       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
7563         goto invalid;
7564       }
7565       f->defaultval.sint = val;
7566       break;
7567     }
7568     case kUpb_CType_Enum: {
7569       const upb_EnumDef* e = f->sub.enumdef;
7570       const upb_EnumValueDef* ev =
7571           upb_EnumDef_FindValueByNameWithSize(e, str, len);
7572       if (!ev) {
7573         goto invalid;
7574       }
7575       f->defaultval.sint = ev->number;
7576       break;
7577     }
7578     case kUpb_CType_Int64: {
7579       long long val = strtoll(str, &end, 0);
7580       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
7581         goto invalid;
7582       }
7583       f->defaultval.sint = val;
7584       break;
7585     }
7586     case kUpb_CType_UInt32: {
7587       unsigned long val = strtoul(str, &end, 0);
7588       if (val > UINT32_MAX || errno == ERANGE || *end) {
7589         goto invalid;
7590       }
7591       f->defaultval.uint = val;
7592       break;
7593     }
7594     case kUpb_CType_UInt64: {
7595       unsigned long long val = strtoull(str, &end, 0);
7596       if (val > UINT64_MAX || errno == ERANGE || *end) {
7597         goto invalid;
7598       }
7599       f->defaultval.uint = val;
7600       break;
7601     }
7602     case kUpb_CType_Double: {
7603       double val = strtod(str, &end);
7604       if (errno == ERANGE || *end) {
7605         goto invalid;
7606       }
7607       f->defaultval.dbl = val;
7608       break;
7609     }
7610     case kUpb_CType_Float: {
7611       float val = strtof(str, &end);
7612       if (errno == ERANGE || *end) {
7613         goto invalid;
7614       }
7615       f->defaultval.flt = val;
7616       break;
7617     }
7618     case kUpb_CType_Bool: {
7619       if (streql2(str, len, "false")) {
7620         f->defaultval.boolean = false;
7621       } else if (streql2(str, len, "true")) {
7622         f->defaultval.boolean = true;
7623       } else {
7624         goto invalid;
7625       }
7626       break;
7627     }
7628     case kUpb_CType_String:
7629       f->defaultval.str = newstr(ctx, str, len);
7630       break;
7631     case kUpb_CType_Bytes:
7632       f->defaultval.str = unescape(ctx, f, str, len);
7633       break;
7634     case kUpb_CType_Message:
7635       /* Should not have a default value. */
7636       symtab_errf(ctx, "Message should not have a default (%s)",
7637                   upb_FieldDef_FullName(f));
7638   }
7639 
7640   return;
7641 
7642 invalid:
7643   symtab_errf(ctx, "Invalid default '%.*s' for field %s of type %d", (int)len,
7644               str, upb_FieldDef_FullName(f), (int)upb_FieldDef_Type(f));
7645 }
7646 
set_default_default(symtab_addctx * ctx,upb_FieldDef * f)7647 static void set_default_default(symtab_addctx* ctx, upb_FieldDef* f) {
7648   switch (upb_FieldDef_CType(f)) {
7649     case kUpb_CType_Int32:
7650     case kUpb_CType_Int64:
7651       f->defaultval.sint = 0;
7652       break;
7653     case kUpb_CType_UInt64:
7654     case kUpb_CType_UInt32:
7655       f->defaultval.uint = 0;
7656       break;
7657     case kUpb_CType_Double:
7658     case kUpb_CType_Float:
7659       f->defaultval.dbl = 0;
7660       break;
7661     case kUpb_CType_String:
7662     case kUpb_CType_Bytes:
7663       f->defaultval.str = newstr(ctx, NULL, 0);
7664       break;
7665     case kUpb_CType_Bool:
7666       f->defaultval.boolean = false;
7667       break;
7668     case kUpb_CType_Enum:
7669       f->defaultval.sint = f->sub.enumdef->values[0].number;
7670     case kUpb_CType_Message:
7671       break;
7672   }
7673 }
7674 
create_fielddef(symtab_addctx * ctx,const char * prefix,upb_MessageDef * m,const google_protobuf_FieldDescriptorProto * field_proto,const upb_FieldDef * _f,bool is_extension)7675 static void create_fielddef(
7676     symtab_addctx* ctx, const char* prefix, upb_MessageDef* m,
7677     const google_protobuf_FieldDescriptorProto* field_proto,
7678     const upb_FieldDef* _f, bool is_extension) {
7679   upb_FieldDef* f = (upb_FieldDef*)_f;
7680   upb_StringView name;
7681   const char* full_name;
7682   const char* json_name;
7683   const char* shortname;
7684   int32_t field_number;
7685 
7686   f->file = ctx->file; /* Must happen prior to symtab_add(). */
7687 
7688   if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
7689     symtab_errf(ctx, "field has no name");
7690   }
7691 
7692   name = google_protobuf_FieldDescriptorProto_name(field_proto);
7693   check_ident(ctx, name, false);
7694   full_name = makefullname(ctx, prefix, name);
7695   shortname = shortdefname(full_name);
7696 
7697   if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
7698     json_name = strviewdup(
7699         ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
7700     f->has_json_name_ = true;
7701   } else {
7702     json_name = makejsonname(ctx, shortname);
7703     f->has_json_name_ = false;
7704   }
7705 
7706   field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
7707 
7708   f->full_name = full_name;
7709   f->json_name = json_name;
7710   f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
7711   f->number_ = field_number;
7712   f->scope.oneof = NULL;
7713   f->proto3_optional_ =
7714       google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
7715 
7716   bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto);
7717   bool has_type_name =
7718       google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
7719 
7720   f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
7721 
7722   if (has_type) {
7723     switch (f->type_) {
7724       case kUpb_FieldType_Message:
7725       case kUpb_FieldType_Group:
7726       case kUpb_FieldType_Enum:
7727         if (!has_type_name) {
7728           symtab_errf(ctx, "field of type %d requires type name (%s)",
7729                       (int)f->type_, full_name);
7730         }
7731         break;
7732       default:
7733         if (has_type_name) {
7734           symtab_errf(ctx, "invalid type for field with type_name set (%s, %d)",
7735                       full_name, (int)f->type_);
7736         }
7737     }
7738   } else if (has_type_name) {
7739     f->type_ =
7740         FIELD_TYPE_UNSPECIFIED;  // We'll fill this in in resolve_fielddef().
7741   }
7742 
7743   if (!is_extension) {
7744     /* direct message field. */
7745     upb_value v, field_v, json_v, existing_v;
7746     size_t json_size;
7747 
7748     if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) {
7749       symtab_errf(ctx, "invalid field number (%u)", field_number);
7750     }
7751 
7752     f->index_ = f - m->fields;
7753     f->msgdef = m;
7754     f->is_extension_ = false;
7755 
7756     field_v = pack_def(f, UPB_DEFTYPE_FIELD);
7757     json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
7758     v = upb_value_constptr(f);
7759     json_size = strlen(json_name);
7760 
7761     if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) {
7762       symtab_errf(ctx, "duplicate field name (%s)", shortname);
7763     }
7764 
7765     CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v,
7766                                 ctx->arena));
7767 
7768     if (strcmp(shortname, json_name) != 0) {
7769       if (upb_strtable_lookup(&m->ntof, json_name, &v)) {
7770         symtab_errf(ctx, "duplicate json_name (%s)", json_name);
7771       } else {
7772         CHK_OOM(upb_strtable_insert(&m->ntof, json_name, json_size, json_v,
7773                                     ctx->arena));
7774       }
7775     }
7776 
7777     if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
7778       symtab_errf(ctx, "duplicate field number (%u)", field_number);
7779     }
7780 
7781     CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena));
7782 
7783     if (ctx->layout) {
7784       const upb_MiniTable_Field* fields = m->layout->fields;
7785       int count = m->layout->field_count;
7786       bool found = false;
7787       for (int i = 0; i < count; i++) {
7788         if (fields[i].number == field_number) {
7789           f->layout_index = i;
7790           found = true;
7791           break;
7792         }
7793       }
7794       UPB_ASSERT(found);
7795     }
7796   } else {
7797     /* extension field. */
7798     f->is_extension_ = true;
7799     f->scope.extension_scope = m;
7800     symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_EXT));
7801     f->layout_index = ctx->ext_count++;
7802     if (ctx->layout) {
7803       UPB_ASSERT(ctx->file->ext_layouts[f->layout_index]->field.number ==
7804                  field_number);
7805     }
7806   }
7807 
7808   if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
7809     symtab_errf(ctx, "invalid type for field %s (%d)", f->full_name, f->type_);
7810   }
7811 
7812   if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
7813     symtab_errf(ctx, "invalid label for field %s (%d)", f->full_name,
7814                 f->label_);
7815   }
7816 
7817   /* We can't resolve the subdef or (in the case of extensions) the containing
7818    * message yet, because it may not have been defined yet.  We stash a pointer
7819    * to the field_proto until later when we can properly resolve it. */
7820   f->sub.unresolved = field_proto;
7821 
7822   if (f->label_ == kUpb_Label_Required &&
7823       f->file->syntax == kUpb_Syntax_Proto3) {
7824     symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
7825   }
7826 
7827   if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
7828     uint32_t oneof_index = google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
7829     upb_OneofDef* oneof;
7830     upb_value v = upb_value_constptr(f);
7831 
7832     if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
7833       symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
7834                   f->full_name);
7835     }
7836 
7837     if (!m) {
7838       symtab_errf(ctx, "oneof_index provided for extension field (%s)",
7839                   f->full_name);
7840     }
7841 
7842     if (oneof_index >= m->oneof_count) {
7843       symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
7844     }
7845 
7846     oneof = (upb_OneofDef*)&m->oneofs[oneof_index];
7847     f->scope.oneof = oneof;
7848 
7849     oneof->field_count++;
7850     if (f->proto3_optional_) {
7851       oneof->synthetic = true;
7852     }
7853     CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena));
7854     CHK_OOM(
7855         upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena));
7856   } else {
7857     if (f->proto3_optional_ && !is_extension) {
7858       symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
7859                   f->full_name);
7860     }
7861   }
7862 
7863   SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
7864 
7865   if (google_protobuf_FieldOptions_has_packed(f->opts)) {
7866     f->packed_ = google_protobuf_FieldOptions_packed(f->opts);
7867   } else {
7868     /* Repeated fields default to packed for proto3 only. */
7869     f->packed_ = upb_FieldDef_IsPrimitive(f) &&
7870                  f->label_ == kUpb_Label_Repeated &&
7871                  f->file->syntax == kUpb_Syntax_Proto3;
7872   }
7873 }
7874 
create_service(symtab_addctx * ctx,const google_protobuf_ServiceDescriptorProto * svc_proto,const upb_ServiceDef * _s)7875 static void create_service(
7876     symtab_addctx* ctx, const google_protobuf_ServiceDescriptorProto* svc_proto,
7877     const upb_ServiceDef* _s) {
7878   upb_ServiceDef* s = (upb_ServiceDef*)_s;
7879   upb_StringView name;
7880   const google_protobuf_MethodDescriptorProto* const* methods;
7881   size_t i, n;
7882 
7883   s->file = ctx->file; /* Must happen prior to symtab_add. */
7884 
7885   name = google_protobuf_ServiceDescriptorProto_name(svc_proto);
7886   check_ident(ctx, name, false);
7887   s->full_name = makefullname(ctx, ctx->file->package, name);
7888   symtab_add(ctx, s->full_name, pack_def(s, UPB_DEFTYPE_SERVICE));
7889 
7890   methods = google_protobuf_ServiceDescriptorProto_method(svc_proto, &n);
7891 
7892   s->method_count = n;
7893   s->methods = symtab_alloc(ctx, sizeof(*s->methods) * n);
7894 
7895   SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto);
7896 
7897   for (i = 0; i < n; i++) {
7898     const google_protobuf_MethodDescriptorProto* method_proto = methods[i];
7899     upb_MethodDef* m = (upb_MethodDef*)&s->methods[i];
7900     upb_StringView name =
7901         google_protobuf_MethodDescriptorProto_name(method_proto);
7902 
7903     m->service = s;
7904     m->full_name = makefullname(ctx, s->full_name, name);
7905     m->index = i;
7906     m->client_streaming =
7907         google_protobuf_MethodDescriptorProto_client_streaming(method_proto);
7908     m->server_streaming =
7909         google_protobuf_MethodDescriptorProto_server_streaming(method_proto);
7910     m->input_type = symtab_resolve(
7911         ctx, m->full_name, m->full_name,
7912         google_protobuf_MethodDescriptorProto_input_type(method_proto),
7913         UPB_DEFTYPE_MSG);
7914     m->output_type = symtab_resolve(
7915         ctx, m->full_name, m->full_name,
7916         google_protobuf_MethodDescriptorProto_output_type(method_proto),
7917         UPB_DEFTYPE_MSG);
7918 
7919     SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto);
7920   }
7921 }
7922 
count_bits_debug(uint64_t x)7923 static int count_bits_debug(uint64_t x) {
7924   // For assertions only, speed does not matter.
7925   int n = 0;
7926   while (x) {
7927     if (x & 1) n++;
7928     x >>= 1;
7929   }
7930   return n;
7931 }
7932 
compare_int32(const void * a_ptr,const void * b_ptr)7933 static int compare_int32(const void* a_ptr, const void* b_ptr) {
7934   int32_t a = *(int32_t*)a_ptr;
7935   int32_t b = *(int32_t*)b_ptr;
7936   return a < b ? -1 : (a == b ? 0 : 1);
7937 }
7938 
create_enumlayout(symtab_addctx * ctx,const upb_EnumDef * e)7939 upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
7940                                       const upb_EnumDef* e) {
7941   int n = 0;
7942   uint64_t mask = 0;
7943 
7944   for (int i = 0; i < e->value_count; i++) {
7945     uint32_t val = (uint32_t)e->values[i].number;
7946     if (val < 64) {
7947       mask |= 1ULL << val;
7948     } else {
7949       n++;
7950     }
7951   }
7952 
7953   int32_t* values = symtab_alloc(ctx, sizeof(*values) * n);
7954 
7955   if (n) {
7956     int32_t* p = values;
7957 
7958     // Add values outside the bitmask range to the list, as described in the
7959     // comments for upb_MiniTable_Enum.
7960     for (int i = 0; i < e->value_count; i++) {
7961       int32_t val = e->values[i].number;
7962       if ((uint32_t)val >= 64) {
7963         *p++ = val;
7964       }
7965     }
7966     UPB_ASSERT(p == values + n);
7967   }
7968 
7969   // Enums can have duplicate values; we must sort+uniq them.
7970   if (values) qsort(values, n, sizeof(*values), &compare_int32);
7971 
7972   int dst = 0;
7973   for (int i = 0; i < n; dst++) {
7974     int32_t val = values[i];
7975     while (i < n && values[i] == val) i++;  // Skip duplicates.
7976     values[dst] = val;
7977   }
7978   n = dst;
7979 
7980   UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
7981 
7982   upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
7983   layout->value_count = n;
7984   layout->mask = mask;
7985   layout->values = values;
7986 
7987   return layout;
7988 }
7989 
create_enumvaldef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumValueDescriptorProto * val_proto,upb_EnumDef * e,int i)7990 static void create_enumvaldef(
7991     symtab_addctx* ctx, const char* prefix,
7992     const google_protobuf_EnumValueDescriptorProto* val_proto, upb_EnumDef* e,
7993     int i) {
7994   upb_EnumValueDef* val = (upb_EnumValueDef*)&e->values[i];
7995   upb_StringView name =
7996       google_protobuf_EnumValueDescriptorProto_name(val_proto);
7997   upb_value v = upb_value_constptr(val);
7998 
7999   val->parent = e; /* Must happen prior to symtab_add(). */
8000   val->full_name = makefullname(ctx, prefix, name);
8001   val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
8002   symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
8003 
8004   SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
8005 
8006   if (i == 0 && e->file->syntax == kUpb_Syntax_Proto3 && val->number != 0) {
8007     symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
8008                 e->full_name);
8009   }
8010 
8011   CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
8012 
8013   // Multiple enumerators can have the same number, first one wins.
8014   if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
8015     CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
8016   }
8017 }
8018 
_upb_EnumReservedNames_New(symtab_addctx * ctx,int n,const upb_StringView * protos)8019 static upb_StringView* _upb_EnumReservedNames_New(
8020     symtab_addctx* ctx, int n, const upb_StringView* protos) {
8021   upb_StringView* sv =
8022       upb_Arena_Malloc(ctx->arena, sizeof(upb_StringView) * n);
8023   for (size_t i = 0; i < n; i++) {
8024     sv[i].data =
8025         upb_strdup2(protos[i].data, protos[i].size, ctx->arena);
8026     sv[i].size = protos[i].size;
8027   }
8028   return sv;
8029 }
8030 
create_enumdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto,const upb_MessageDef * containing_type,const upb_EnumDef * _e)8031 static void create_enumdef(
8032     symtab_addctx* ctx, const char* prefix,
8033     const google_protobuf_EnumDescriptorProto* enum_proto,
8034     const upb_MessageDef* containing_type, const upb_EnumDef* _e) {
8035   upb_EnumDef* e = (upb_EnumDef*)_e;
8036   ;
8037   const google_protobuf_EnumValueDescriptorProto* const* values;
8038   const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* res_ranges;
8039   const upb_StringView* res_names;
8040   upb_StringView name;
8041   size_t i, n, n_res_range, n_res_name;
8042 
8043   e->file = ctx->file; /* Must happen prior to symtab_add() */
8044   e->containing_type = containing_type;
8045 
8046   name = google_protobuf_EnumDescriptorProto_name(enum_proto);
8047   check_ident(ctx, name, false);
8048 
8049   e->full_name = makefullname(ctx, prefix, name);
8050   symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
8051 
8052   values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
8053   CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena));
8054   CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));
8055 
8056   e->defaultval = 0;
8057   e->value_count = n;
8058   e->values = symtab_alloc(ctx, sizeof(*e->values) * n);
8059 
8060   if (n == 0) {
8061     symtab_errf(ctx, "enums must contain at least one value (%s)",
8062                 e->full_name);
8063   }
8064 
8065   res_ranges =
8066       google_protobuf_EnumDescriptorProto_reserved_range(enum_proto, &n_res_range);
8067   e->res_range_count = n_res_range;
8068   e->res_ranges = _upb_EnumReservedRanges_New(ctx, n_res_range, res_ranges, e);
8069 
8070   res_names = google_protobuf_EnumDescriptorProto_reserved_name(enum_proto, &n_res_name);
8071   e->res_name_count = n_res_name;
8072   e->res_names = _upb_EnumReservedNames_New(ctx, n_res_name, res_names);
8073 
8074   SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
8075 
8076   for (i = 0; i < n; i++) {
8077     create_enumvaldef(ctx, prefix, values[i], e, i);
8078   }
8079 
8080   upb_inttable_compact(&e->iton, ctx->arena);
8081 
8082   if (e->file->syntax == kUpb_Syntax_Proto2) {
8083     if (ctx->layout) {
8084       UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
8085       e->layout = ctx->layout->enums[ctx->enum_count++];
8086       UPB_ASSERT(upb_inttable_count(&e->iton) ==
8087                  e->layout->value_count + count_bits_debug(e->layout->mask));
8088     } else {
8089       e->layout = create_enumlayout(ctx, e);
8090     }
8091   } else {
8092     e->layout = NULL;
8093   }
8094 }
8095 
8096 static void msgdef_create_nested(
8097     symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
8098     upb_MessageDef* m);
8099 
_upb_ReservedNames_New(symtab_addctx * ctx,int n,const upb_StringView * protos)8100 static upb_StringView* _upb_ReservedNames_New(symtab_addctx* ctx, int n,
8101                                               const upb_StringView* protos) {
8102   upb_StringView* sv = upb_Arena_Malloc(ctx->arena, sizeof(upb_StringView) * n);
8103   for (size_t i = 0; i < n; i++) {
8104     sv[i].data =
8105         upb_strdup2(protos[i].data, protos[i].size, ctx->arena);
8106     sv[i].size = protos[i].size;
8107   }
8108   return sv;
8109 }
8110 
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto,const upb_MessageDef * containing_type,const upb_MessageDef * _m)8111 static void create_msgdef(symtab_addctx* ctx, const char* prefix,
8112                           const google_protobuf_DescriptorProto* msg_proto,
8113                           const upb_MessageDef* containing_type,
8114                           const upb_MessageDef* _m) {
8115   upb_MessageDef* m = (upb_MessageDef*)_m;
8116   const google_protobuf_OneofDescriptorProto* const* oneofs;
8117   const google_protobuf_FieldDescriptorProto* const* fields;
8118   const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges;
8119 
8120   const google_protobuf_DescriptorProto_ReservedRange* const* res_ranges;
8121   const upb_StringView* res_names;
8122   size_t i, n_oneof, n_field, n_ext_range;
8123   size_t n_res_range, n_res_name;
8124   upb_StringView name;
8125 
8126   m->file = ctx->file; /* Must happen prior to symtab_add(). */
8127   m->containing_type = containing_type;
8128 
8129   name = google_protobuf_DescriptorProto_name(msg_proto);
8130   check_ident(ctx, name, false);
8131 
8132   m->full_name = makefullname(ctx, prefix, name);
8133   symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
8134 
8135   oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
8136   fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
8137   ext_ranges =
8138       google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range);
8139   res_ranges = google_protobuf_DescriptorProto_reserved_range(msg_proto, &n_res_range);
8140   res_names = google_protobuf_DescriptorProto_reserved_name(msg_proto, &n_res_name);
8141 
8142   CHK_OOM(upb_inttable_init(&m->itof, ctx->arena));
8143   CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena));
8144 
8145   if (ctx->layout) {
8146     /* create_fielddef() below depends on this being set. */
8147     UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count);
8148     m->layout = ctx->layout->msgs[ctx->msg_count++];
8149     UPB_ASSERT(n_field == m->layout->field_count);
8150   } else {
8151     /* Allocate now (to allow cross-linking), populate later. */
8152     m->layout =
8153         symtab_alloc(ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry));
8154   }
8155 
8156   SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto);
8157 
8158   m->oneof_count = n_oneof;
8159   m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
8160   for (i = 0; i < n_oneof; i++) {
8161     create_oneofdef(ctx, m, oneofs[i], &m->oneofs[i]);
8162   }
8163 
8164   m->field_count = n_field;
8165   m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
8166   for (i = 0; i < n_field; i++) {
8167     create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i],
8168                     /* is_extension= */ false);
8169   }
8170 
8171   m->ext_range_count = n_ext_range;
8172   m->ext_ranges = symtab_alloc(ctx, sizeof(*m->ext_ranges) * n_ext_range);
8173   for (i = 0; i < n_ext_range; i++) {
8174     const google_protobuf_DescriptorProto_ExtensionRange* r = ext_ranges[i];
8175     upb_ExtensionRange* r_def = (upb_ExtensionRange*)&m->ext_ranges[i];
8176     int32_t start = google_protobuf_DescriptorProto_ExtensionRange_start(r);
8177     int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(r);
8178     int32_t max =
8179         google_protobuf_MessageOptions_message_set_wire_format(m->opts)
8180             ? INT32_MAX
8181             : kUpb_MaxFieldNumber + 1;
8182 
8183     // A full validation would also check that each range is disjoint, and that
8184     // none of the fields overlap with the extension ranges, but we are just
8185     // sanity checking here.
8186     if (start < 1 || end <= start || end > max) {
8187       symtab_errf(ctx, "Extension range (%d, %d) is invalid, message=%s\n",
8188                   (int)start, (int)end, m->full_name);
8189     }
8190 
8191     r_def->start = start;
8192     r_def->end = end;
8193     SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange,
8194                 ExtensionRangeOptions, r);
8195   }
8196 
8197   m->res_range_count = n_res_range;
8198   m->res_ranges =
8199       _upb_MessageReservedRanges_New(ctx, n_res_range, res_ranges, m);
8200 
8201   m->res_name_count = n_res_name;
8202   m->res_names = _upb_ReservedNames_New(ctx, n_res_name, res_names);
8203 
8204   finalize_oneofs(ctx, m);
8205   assign_msg_wellknowntype(m);
8206   upb_inttable_compact(&m->itof, ctx->arena);
8207   msgdef_create_nested(ctx, msg_proto, m);
8208 }
8209 
msgdef_create_nested(symtab_addctx * ctx,const google_protobuf_DescriptorProto * msg_proto,upb_MessageDef * m)8210 static void msgdef_create_nested(
8211     symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
8212     upb_MessageDef* m) {
8213   size_t n;
8214 
8215   const google_protobuf_EnumDescriptorProto* const* enums =
8216       google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
8217   m->nested_enum_count = n;
8218   m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n);
8219   for (size_t i = 0; i < n; i++) {
8220     m->nested_enum_count = i + 1;
8221     create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]);
8222   }
8223 
8224   const google_protobuf_FieldDescriptorProto* const* exts =
8225       google_protobuf_DescriptorProto_extension(msg_proto, &n);
8226   m->nested_ext_count = n;
8227   m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n);
8228   for (size_t i = 0; i < n; i++) {
8229     create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i],
8230                     /* is_extension= */ true);
8231     ((upb_FieldDef*)&m->nested_exts[i])->index_ = i;
8232   }
8233 
8234   const google_protobuf_DescriptorProto* const* msgs =
8235       google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
8236   m->nested_msg_count = n;
8237   m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n);
8238   for (size_t i = 0; i < n; i++) {
8239     create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]);
8240   }
8241 }
8242 
resolve_subdef(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f)8243 static void resolve_subdef(symtab_addctx* ctx, const char* prefix,
8244                            upb_FieldDef* f) {
8245   const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
8246   upb_StringView name =
8247       google_protobuf_FieldDescriptorProto_type_name(field_proto);
8248   bool has_name =
8249       google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
8250   switch ((int)f->type_) {
8251     case FIELD_TYPE_UNSPECIFIED: {
8252       // Type was not specified and must be inferred.
8253       UPB_ASSERT(has_name);
8254       upb_deftype_t type;
8255       const void* def =
8256           symtab_resolveany(ctx, f->full_name, prefix, name, &type);
8257       switch (type) {
8258         case UPB_DEFTYPE_ENUM:
8259           f->sub.enumdef = def;
8260           f->type_ = kUpb_FieldType_Enum;
8261           break;
8262         case UPB_DEFTYPE_MSG:
8263           f->sub.msgdef = def;
8264           f->type_ = kUpb_FieldType_Message;  // It appears there is no way of
8265                                               // this being a group.
8266           break;
8267         default:
8268           symtab_errf(ctx, "Couldn't resolve type name for field %s",
8269                       f->full_name);
8270       }
8271     }
8272     case kUpb_FieldType_Message:
8273     case kUpb_FieldType_Group:
8274       UPB_ASSERT(has_name);
8275       f->sub.msgdef =
8276           symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
8277       break;
8278     case kUpb_FieldType_Enum:
8279       UPB_ASSERT(has_name);
8280       f->sub.enumdef =
8281           symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM);
8282       break;
8283     default:
8284       // No resolution necessary.
8285       break;
8286   }
8287 }
8288 
resolve_extension(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f,const google_protobuf_FieldDescriptorProto * field_proto)8289 static void resolve_extension(
8290     symtab_addctx* ctx, const char* prefix, upb_FieldDef* f,
8291     const google_protobuf_FieldDescriptorProto* field_proto) {
8292   if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
8293     symtab_errf(ctx, "extension for field '%s' had no extendee", f->full_name);
8294   }
8295 
8296   upb_StringView name =
8297       google_protobuf_FieldDescriptorProto_extendee(field_proto);
8298   const upb_MessageDef* m =
8299       symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
8300   f->msgdef = m;
8301 
8302   bool found = false;
8303 
8304   for (int i = 0, n = m->ext_range_count; i < n; i++) {
8305     const upb_ExtensionRange* r = &m->ext_ranges[i];
8306     if (r->start <= f->number_ && f->number_ < r->end) {
8307       found = true;
8308       break;
8309     }
8310   }
8311 
8312   if (!found) {
8313     symtab_errf(ctx,
8314                 "field number %u in extension %s has no extension range in "
8315                 "message %s",
8316                 (unsigned)f->number_, f->full_name, f->msgdef->full_name);
8317   }
8318 
8319   const upb_MiniTable_Extension* ext = ctx->file->ext_layouts[f->layout_index];
8320   if (ctx->layout) {
8321     UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number);
8322   } else {
8323     upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext;
8324     fill_fieldlayout(&mut_ext->field, f);
8325     mut_ext->field.presence = 0;
8326     mut_ext->field.offset = 0;
8327     mut_ext->field.submsg_index = 0;
8328     mut_ext->extendee = f->msgdef->layout;
8329     mut_ext->sub.submsg = f->sub.msgdef->layout;
8330   }
8331 
8332   CHK_OOM(upb_inttable_insert(&ctx->symtab->exts, (uintptr_t)ext,
8333                               upb_value_constptr(f), ctx->arena));
8334 }
8335 
resolve_default(symtab_addctx * ctx,upb_FieldDef * f,const google_protobuf_FieldDescriptorProto * field_proto)8336 static void resolve_default(
8337     symtab_addctx* ctx, upb_FieldDef* f,
8338     const google_protobuf_FieldDescriptorProto* field_proto) {
8339   // Have to delay resolving of the default value until now because of the enum
8340   // case, since enum defaults are specified with a label.
8341   if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
8342     upb_StringView defaultval =
8343         google_protobuf_FieldDescriptorProto_default_value(field_proto);
8344 
8345     if (f->file->syntax == kUpb_Syntax_Proto3) {
8346       symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
8347                   f->full_name);
8348     }
8349 
8350     if (upb_FieldDef_IsSubMessage(f)) {
8351       symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
8352                   f->full_name);
8353     }
8354 
8355     parse_default(ctx, defaultval.data, defaultval.size, f);
8356     f->has_default = true;
8357   } else {
8358     set_default_default(ctx, f);
8359     f->has_default = false;
8360   }
8361 }
8362 
resolve_fielddef(symtab_addctx * ctx,const char * prefix,upb_FieldDef * f)8363 static void resolve_fielddef(symtab_addctx* ctx, const char* prefix,
8364                              upb_FieldDef* f) {
8365   // We have to stash this away since resolve_subdef() may overwrite it.
8366   const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
8367 
8368   resolve_subdef(ctx, prefix, f);
8369   resolve_default(ctx, f, field_proto);
8370 
8371   if (f->is_extension_) {
8372     resolve_extension(ctx, prefix, f, field_proto);
8373   }
8374 }
8375 
resolve_msgdef(symtab_addctx * ctx,upb_MessageDef * m)8376 static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
8377   for (int i = 0; i < m->field_count; i++) {
8378     resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]);
8379   }
8380 
8381   m->in_message_set = false;
8382   for (int i = 0; i < m->nested_ext_count; i++) {
8383     upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i];
8384     resolve_fielddef(ctx, m->full_name, ext);
8385     if (ext->type_ == kUpb_FieldType_Message &&
8386         ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m &&
8387         google_protobuf_MessageOptions_message_set_wire_format(
8388             ext->msgdef->opts)) {
8389       m->in_message_set = true;
8390     }
8391   }
8392 
8393   if (!ctx->layout) make_layout(ctx, m);
8394 
8395   for (int i = 0; i < m->nested_msg_count; i++) {
8396     resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]);
8397   }
8398 }
8399 
count_exts_in_msg(const google_protobuf_DescriptorProto * msg_proto)8400 static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) {
8401   size_t n;
8402   google_protobuf_DescriptorProto_extension(msg_proto, &n);
8403   int ext_count = n;
8404 
8405   const google_protobuf_DescriptorProto* const* nested_msgs =
8406       google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
8407   for (size_t i = 0; i < n; i++) {
8408     ext_count += count_exts_in_msg(nested_msgs[i]);
8409   }
8410 
8411   return ext_count;
8412 }
8413 
build_filedef(symtab_addctx * ctx,upb_FileDef * file,const google_protobuf_FileDescriptorProto * file_proto)8414 static void build_filedef(
8415     symtab_addctx* ctx, upb_FileDef* file,
8416     const google_protobuf_FileDescriptorProto* file_proto) {
8417   const google_protobuf_DescriptorProto* const* msgs;
8418   const google_protobuf_EnumDescriptorProto* const* enums;
8419   const google_protobuf_FieldDescriptorProto* const* exts;
8420   const google_protobuf_ServiceDescriptorProto* const* services;
8421   const upb_StringView* strs;
8422   const int32_t* public_deps;
8423   const int32_t* weak_deps;
8424   size_t i, n;
8425 
8426   file->symtab = ctx->symtab;
8427 
8428   /* Count all extensions in the file, to build a flat array of layouts. */
8429   google_protobuf_FileDescriptorProto_extension(file_proto, &n);
8430   int ext_count = n;
8431   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8432   for (int i = 0; i < n; i++) {
8433     ext_count += count_exts_in_msg(msgs[i]);
8434   }
8435   file->ext_count = ext_count;
8436 
8437   if (ctx->layout) {
8438     /* We are using the ext layouts that were passed in. */
8439     file->ext_layouts = ctx->layout->exts;
8440     if (ctx->layout->ext_count != file->ext_count) {
8441       symtab_errf(ctx, "Extension count did not match layout (%d vs %d)",
8442                   ctx->layout->ext_count, file->ext_count);
8443     }
8444   } else {
8445     /* We are building ext layouts from scratch. */
8446     file->ext_layouts =
8447         symtab_alloc(ctx, sizeof(*file->ext_layouts) * file->ext_count);
8448     upb_MiniTable_Extension* ext =
8449         symtab_alloc(ctx, sizeof(*ext) * file->ext_count);
8450     for (int i = 0; i < file->ext_count; i++) {
8451       file->ext_layouts[i] = &ext[i];
8452     }
8453   }
8454 
8455   if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
8456     symtab_errf(ctx, "File has no name");
8457   }
8458 
8459   file->name =
8460       strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
8461 
8462   upb_StringView package = google_protobuf_FileDescriptorProto_package(file_proto);
8463   if (package.size) {
8464     check_ident(ctx, package, true);
8465     file->package = strviewdup(ctx, package);
8466   } else {
8467     file->package = NULL;
8468   }
8469 
8470   if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
8471     upb_StringView syntax =
8472         google_protobuf_FileDescriptorProto_syntax(file_proto);
8473 
8474     if (streql_view(syntax, "proto2")) {
8475       file->syntax = kUpb_Syntax_Proto2;
8476     } else if (streql_view(syntax, "proto3")) {
8477       file->syntax = kUpb_Syntax_Proto3;
8478     } else {
8479       symtab_errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'",
8480                   UPB_STRINGVIEW_ARGS(syntax));
8481     }
8482   } else {
8483     file->syntax = kUpb_Syntax_Proto2;
8484   }
8485 
8486   /* Read options. */
8487   SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto);
8488 
8489   /* Verify dependencies. */
8490   strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
8491   file->dep_count = n;
8492   file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
8493 
8494   for (i = 0; i < n; i++) {
8495     upb_StringView str = strs[i];
8496     file->deps[i] =
8497         upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size);
8498     if (!file->deps[i]) {
8499       symtab_errf(ctx,
8500                   "Depends on file '" UPB_STRINGVIEW_FORMAT
8501                   "', but it has not been loaded",
8502                   UPB_STRINGVIEW_ARGS(str));
8503     }
8504   }
8505 
8506   public_deps =
8507       google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
8508   file->public_dep_count = n;
8509   file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n);
8510   int32_t* mutable_public_deps = (int32_t*)file->public_deps;
8511   for (i = 0; i < n; i++) {
8512     if (public_deps[i] >= file->dep_count) {
8513       symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
8514     }
8515     mutable_public_deps[i] = public_deps[i];
8516   }
8517 
8518   weak_deps =
8519       google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
8520   file->weak_dep_count = n;
8521   file->weak_deps = symtab_alloc(ctx, sizeof(*file->weak_deps) * n);
8522   int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
8523   for (i = 0; i < n; i++) {
8524     if (weak_deps[i] >= file->dep_count) {
8525       symtab_errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]);
8526     }
8527     mutable_weak_deps[i] = weak_deps[i];
8528   }
8529 
8530   /* Create enums. */
8531   enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
8532   file->top_lvl_enum_count = n;
8533   file->top_lvl_enums = symtab_alloc(ctx, sizeof(*file->top_lvl_enums) * n);
8534   for (i = 0; i < n; i++) {
8535     create_enumdef(ctx, file->package, enums[i], NULL, &file->top_lvl_enums[i]);
8536   }
8537 
8538   /* Create extensions. */
8539   exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
8540   file->top_lvl_ext_count = n;
8541   file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n);
8542   for (i = 0; i < n; i++) {
8543     create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i],
8544                     /* is_extension= */ true);
8545     ((upb_FieldDef*)&file->top_lvl_exts[i])->index_ = i;
8546   }
8547 
8548   /* Create messages. */
8549   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8550   file->top_lvl_msg_count = n;
8551   file->top_lvl_msgs = symtab_alloc(ctx, sizeof(*file->top_lvl_msgs) * n);
8552   for (i = 0; i < n; i++) {
8553     create_msgdef(ctx, file->package, msgs[i], NULL, &file->top_lvl_msgs[i]);
8554   }
8555 
8556   /* Create services. */
8557   services = google_protobuf_FileDescriptorProto_service(file_proto, &n);
8558   file->service_count = n;
8559   file->services = symtab_alloc(ctx, sizeof(*file->services) * n);
8560   for (i = 0; i < n; i++) {
8561     create_service(ctx, services[i], &file->services[i]);
8562     ((upb_ServiceDef*)&file->services[i])->index = i;
8563   }
8564 
8565   /* Now that all names are in the table, build layouts and resolve refs. */
8566   for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) {
8567     resolve_fielddef(ctx, file->package, (upb_FieldDef*)&file->top_lvl_exts[i]);
8568   }
8569 
8570   for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) {
8571     resolve_msgdef(ctx, (upb_MessageDef*)&file->top_lvl_msgs[i]);
8572   }
8573 
8574   if (file->ext_count) {
8575     CHK_OOM(_upb_extreg_add(ctx->symtab->extreg, file->ext_layouts,
8576                             file->ext_count));
8577   }
8578 }
8579 
remove_filedef(upb_DefPool * s,upb_FileDef * file)8580 static void remove_filedef(upb_DefPool* s, upb_FileDef* file) {
8581   intptr_t iter = UPB_INTTABLE_BEGIN;
8582   upb_StringView key;
8583   upb_value val;
8584   while (upb_strtable_next2(&s->syms, &key, &val, &iter)) {
8585     const upb_FileDef* f;
8586     switch (deftype(val)) {
8587       case UPB_DEFTYPE_EXT:
8588         f = upb_FieldDef_File(unpack_def(val, UPB_DEFTYPE_EXT));
8589         break;
8590       case UPB_DEFTYPE_MSG:
8591         f = upb_MessageDef_File(unpack_def(val, UPB_DEFTYPE_MSG));
8592         break;
8593       case UPB_DEFTYPE_ENUM:
8594         f = upb_EnumDef_File(unpack_def(val, UPB_DEFTYPE_ENUM));
8595         break;
8596       case UPB_DEFTYPE_ENUMVAL:
8597         f = upb_EnumDef_File(
8598             upb_EnumValueDef_Enum(unpack_def(val, UPB_DEFTYPE_ENUMVAL)));
8599         break;
8600       case UPB_DEFTYPE_SERVICE:
8601         f = upb_ServiceDef_File(unpack_def(val, UPB_DEFTYPE_SERVICE));
8602         break;
8603       default:
8604         UPB_UNREACHABLE();
8605     }
8606 
8607     if (f == file) upb_strtable_removeiter(&s->syms, &iter);
8608   }
8609 }
8610 
_upb_DefPool_AddFile(upb_DefPool * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_MiniTable_File * layout,upb_Status * status)8611 static const upb_FileDef* _upb_DefPool_AddFile(
8612     upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8613     const upb_MiniTable_File* layout, upb_Status* status) {
8614   symtab_addctx ctx;
8615   upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto);
8616   upb_value v;
8617 
8618   if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) {
8619     if (unpack_def(v, UPB_DEFTYPE_FILE)) {
8620       upb_Status_SetErrorFormat(status, "duplicate file name (%.*s)",
8621                                 UPB_STRINGVIEW_ARGS(name));
8622       return NULL;
8623     }
8624     const upb_MiniTable_File* registered = unpack_def(v, UPB_DEFTYPE_LAYOUT);
8625     UPB_ASSERT(registered);
8626     if (layout && layout != registered) {
8627       upb_Status_SetErrorFormat(
8628           status, "tried to build with a different layout (filename=%.*s)",
8629           UPB_STRINGVIEW_ARGS(name));
8630       return NULL;
8631     }
8632     layout = registered;
8633   }
8634 
8635   ctx.symtab = s;
8636   ctx.layout = layout;
8637   ctx.msg_count = 0;
8638   ctx.enum_count = 0;
8639   ctx.ext_count = 0;
8640   ctx.status = status;
8641   ctx.file = NULL;
8642   ctx.arena = upb_Arena_New();
8643   ctx.tmp_arena = upb_Arena_New();
8644 
8645   if (!ctx.arena || !ctx.tmp_arena) {
8646     if (ctx.arena) upb_Arena_Free(ctx.arena);
8647     if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena);
8648     upb_Status_setoom(status);
8649     return NULL;
8650   }
8651 
8652   if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
8653     UPB_ASSERT(!upb_Status_IsOk(status));
8654     if (ctx.file) {
8655       remove_filedef(s, ctx.file);
8656       ctx.file = NULL;
8657     }
8658   } else {
8659     ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file));
8660     build_filedef(&ctx, ctx.file, file_proto);
8661     upb_strtable_insert(&s->files, name.data, name.size,
8662                         pack_def(ctx.file, UPB_DEFTYPE_FILE), ctx.arena);
8663     UPB_ASSERT(upb_Status_IsOk(status));
8664     upb_Arena_Fuse(s->arena, ctx.arena);
8665   }
8666 
8667   upb_Arena_Free(ctx.arena);
8668   upb_Arena_Free(ctx.tmp_arena);
8669   return ctx.file;
8670 }
8671 
upb_DefPool_AddFile(upb_DefPool * s,const google_protobuf_FileDescriptorProto * file_proto,upb_Status * status)8672 const upb_FileDef* upb_DefPool_AddFile(
8673     upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8674     upb_Status* status) {
8675   return _upb_DefPool_AddFile(s, file_proto, NULL, status);
8676 }
8677 
8678 /* Include here since we want most of this file to be stdio-free. */
8679 #include <stdio.h>
8680 
_upb_DefPool_LoadDefInitEx(upb_DefPool * s,const _upb_DefPool_Init * init,bool rebuild_minitable)8681 bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
8682                                 bool rebuild_minitable) {
8683   /* Since this function should never fail (it would indicate a bug in upb) we
8684    * print errors to stderr instead of returning error status to the user. */
8685   _upb_DefPool_Init** deps = init->deps;
8686   google_protobuf_FileDescriptorProto* file;
8687   upb_Arena* arena;
8688   upb_Status status;
8689 
8690   upb_Status_Clear(&status);
8691 
8692   if (upb_DefPool_FindFileByName(s, init->filename)) {
8693     return true;
8694   }
8695 
8696   arena = upb_Arena_New();
8697 
8698   for (; *deps; deps++) {
8699     if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err;
8700   }
8701 
8702   file = google_protobuf_FileDescriptorProto_parse_ex(
8703       init->descriptor.data, init->descriptor.size, NULL,
8704       kUpb_DecodeOption_AliasString, arena);
8705   s->bytes_loaded += init->descriptor.size;
8706 
8707   if (!file) {
8708     upb_Status_SetErrorFormat(
8709         &status,
8710         "Failed to parse compiled-in descriptor for file '%s'. This should "
8711         "never happen.",
8712         init->filename);
8713     goto err;
8714   }
8715 
8716   const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout;
8717   if (!_upb_DefPool_AddFile(s, file, mt, &status)) {
8718     goto err;
8719   }
8720 
8721   upb_Arena_Free(arena);
8722   return true;
8723 
8724 err:
8725   fprintf(stderr,
8726           "Error loading compiled-in descriptor for file '%s' (this should "
8727           "never happen): %s\n",
8728           init->filename, upb_Status_ErrorMessage(&status));
8729   upb_Arena_Free(arena);
8730   return false;
8731 }
8732 
_upb_DefPool_BytesLoaded(const upb_DefPool * s)8733 size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) {
8734   return s->bytes_loaded;
8735 }
8736 
_upb_DefPool_Arena(const upb_DefPool * s)8737 upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; }
8738 
_upb_DefPool_FindExtensionByMiniTable(const upb_DefPool * s,const upb_MiniTable_Extension * ext)8739 const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable(
8740     const upb_DefPool* s, const upb_MiniTable_Extension* ext) {
8741   upb_value v;
8742   bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v);
8743   UPB_ASSERT(ok);
8744   return upb_value_getconstptr(v);
8745 }
8746 
upb_DefPool_FindExtensionByNumber(const upb_DefPool * s,const upb_MessageDef * m,int32_t fieldnum)8747 const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
8748                                                       const upb_MessageDef* m,
8749                                                       int32_t fieldnum) {
8750   const upb_MiniTable* l = upb_MessageDef_MiniTable(m);
8751   const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum);
8752   return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL;
8753 }
8754 
_upb_DefPool_registerlayout(upb_DefPool * s,const char * filename,const upb_MiniTable_File * file)8755 bool _upb_DefPool_registerlayout(upb_DefPool* s, const char* filename,
8756                                  const upb_MiniTable_File* file) {
8757   if (upb_DefPool_FindFileByName(s, filename)) return false;
8758   upb_value v = pack_def(file, UPB_DEFTYPE_LAYOUT);
8759   return upb_strtable_insert(&s->files, filename, strlen(filename), v,
8760                              s->arena);
8761 }
8762 
upb_DefPool_ExtensionRegistry(const upb_DefPool * s)8763 const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
8764     const upb_DefPool* s) {
8765   return s->extreg;
8766 }
8767 
upb_DefPool_GetAllExtensions(const upb_DefPool * s,const upb_MessageDef * m,size_t * count)8768 const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
8769                                                   const upb_MessageDef* m,
8770                                                   size_t* count) {
8771   size_t n = 0;
8772   intptr_t iter = UPB_INTTABLE_BEGIN;
8773   uintptr_t key;
8774   upb_value val;
8775   // This is O(all exts) instead of O(exts for m).  If we need this to be
8776   // efficient we may need to make extreg into a two-level table, or have a
8777   // second per-message index.
8778   while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8779     const upb_FieldDef* f = upb_value_getconstptr(val);
8780     if (upb_FieldDef_ContainingType(f) == m) n++;
8781   }
8782   const upb_FieldDef** exts = malloc(n * sizeof(*exts));
8783   iter = UPB_INTTABLE_BEGIN;
8784   size_t i = 0;
8785   while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8786     const upb_FieldDef* f = upb_value_getconstptr(val);
8787     if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f;
8788   }
8789   *count = n;
8790   return exts;
8791 }
8792 
8793 #undef CHK_OOM
8794 
8795 /** upb/reflection.c ************************************************************/
8796 
8797 #include <string.h>
8798 
8799 
get_field_size(const upb_MiniTable_Field * f)8800 static size_t get_field_size(const upb_MiniTable_Field* f) {
8801   static unsigned char sizes[] = {
8802       0,                      /* 0 */
8803       8,                      /* kUpb_FieldType_Double */
8804       4,                      /* kUpb_FieldType_Float */
8805       8,                      /* kUpb_FieldType_Int64 */
8806       8,                      /* kUpb_FieldType_UInt64 */
8807       4,                      /* kUpb_FieldType_Int32 */
8808       8,                      /* kUpb_FieldType_Fixed64 */
8809       4,                      /* kUpb_FieldType_Fixed32 */
8810       1,                      /* kUpb_FieldType_Bool */
8811       sizeof(upb_StringView), /* kUpb_FieldType_String */
8812       sizeof(void*),          /* kUpb_FieldType_Group */
8813       sizeof(void*),          /* kUpb_FieldType_Message */
8814       sizeof(upb_StringView), /* kUpb_FieldType_Bytes */
8815       4,                      /* kUpb_FieldType_UInt32 */
8816       4,                      /* kUpb_FieldType_Enum */
8817       4,                      /* kUpb_FieldType_SFixed32 */
8818       8,                      /* kUpb_FieldType_SFixed64 */
8819       4,                      /* kUpb_FieldType_SInt32 */
8820       8,                      /* kUpb_FieldType_SInt64 */
8821   };
8822   return upb_IsRepeatedOrMap(f) ? sizeof(void*) : sizes[f->descriptortype];
8823 }
8824 
8825 /** upb_Message
8826  * *******************************************************************/
8827 
upb_Message_New(const upb_MessageDef * m,upb_Arena * a)8828 upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a) {
8829   return _upb_Message_New(upb_MessageDef_MiniTable(m), a);
8830 }
8831 
in_oneof(const upb_MiniTable_Field * field)8832 static bool in_oneof(const upb_MiniTable_Field* field) {
8833   return field->presence < 0;
8834 }
8835 
_upb_Message_Getraw(const upb_Message * msg,const upb_FieldDef * f)8836 static upb_MessageValue _upb_Message_Getraw(const upb_Message* msg,
8837                                             const upb_FieldDef* f) {
8838   const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8839   const char* mem = UPB_PTR_AT(msg, field->offset, char);
8840   upb_MessageValue val = {0};
8841   memcpy(&val, mem, get_field_size(field));
8842   return val;
8843 }
8844 
upb_Message_Has(const upb_Message * msg,const upb_FieldDef * f)8845 bool upb_Message_Has(const upb_Message* msg, const upb_FieldDef* f) {
8846   assert(upb_FieldDef_HasPresence(f));
8847   if (upb_FieldDef_IsExtension(f)) {
8848     const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f);
8849     return _upb_Message_Getext(msg, ext) != NULL;
8850   } else {
8851     const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8852     if (in_oneof(field)) {
8853       return _upb_getoneofcase_field(msg, field) == field->number;
8854     } else if (field->presence > 0) {
8855       return _upb_hasbit_field(msg, field);
8856     } else {
8857       UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message ||
8858                  field->descriptortype == kUpb_FieldType_Group);
8859       return _upb_Message_Getraw(msg, f).msg_val != NULL;
8860     }
8861   }
8862 }
8863 
upb_Message_WhichOneof(const upb_Message * msg,const upb_OneofDef * o)8864 const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg,
8865                                            const upb_OneofDef* o) {
8866   const upb_FieldDef* f = upb_OneofDef_Field(o, 0);
8867   if (upb_OneofDef_IsSynthetic(o)) {
8868     UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1);
8869     return upb_Message_Has(msg, f) ? f : NULL;
8870   } else {
8871     const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8872     uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
8873     f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL;
8874     UPB_ASSERT((f != NULL) == (oneof_case != 0));
8875     return f;
8876   }
8877 }
8878 
upb_Message_Get(const upb_Message * msg,const upb_FieldDef * f)8879 upb_MessageValue upb_Message_Get(const upb_Message* msg,
8880                                  const upb_FieldDef* f) {
8881   if (upb_FieldDef_IsExtension(f)) {
8882     const upb_Message_Extension* ext =
8883         _upb_Message_Getext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8884     if (ext) {
8885       upb_MessageValue val;
8886       memcpy(&val, &ext->data, sizeof(val));
8887       return val;
8888     } else if (upb_FieldDef_IsRepeated(f)) {
8889       return (upb_MessageValue){.array_val = NULL};
8890     }
8891   } else if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
8892     return _upb_Message_Getraw(msg, f);
8893   }
8894   return upb_FieldDef_Default(f);
8895 }
8896 
upb_Message_Mutable(upb_Message * msg,const upb_FieldDef * f,upb_Arena * a)8897 upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg,
8898                                             const upb_FieldDef* f,
8899                                             upb_Arena* a) {
8900   UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f));
8901   if (upb_FieldDef_HasPresence(f) && !upb_Message_Has(msg, f)) {
8902     // We need to skip the upb_Message_Get() call in this case.
8903     goto make;
8904   }
8905 
8906   upb_MessageValue val = upb_Message_Get(msg, f);
8907   if (val.array_val) {
8908     return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val};
8909   }
8910 
8911   upb_MutableMessageValue ret;
8912 make:
8913   if (!a) return (upb_MutableMessageValue){.array = NULL};
8914   if (upb_FieldDef_IsMap(f)) {
8915     const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
8916     const upb_FieldDef* key =
8917         upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
8918     const upb_FieldDef* value =
8919         upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
8920     ret.map =
8921         upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value));
8922   } else if (upb_FieldDef_IsRepeated(f)) {
8923     ret.array = upb_Array_New(a, upb_FieldDef_CType(f));
8924   } else {
8925     UPB_ASSERT(upb_FieldDef_IsSubMessage(f));
8926     ret.msg = upb_Message_New(upb_FieldDef_MessageSubDef(f), a);
8927   }
8928 
8929   val.array_val = ret.array;
8930   upb_Message_Set(msg, f, val, a);
8931 
8932   return ret;
8933 }
8934 
upb_Message_Set(upb_Message * msg,const upb_FieldDef * f,upb_MessageValue val,upb_Arena * a)8935 bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f,
8936                      upb_MessageValue val, upb_Arena* a) {
8937   if (upb_FieldDef_IsExtension(f)) {
8938     upb_Message_Extension* ext = _upb_Message_GetOrCreateExtension(
8939         msg, _upb_FieldDef_ExtensionMiniTable(f), a);
8940     if (!ext) return false;
8941     memcpy(&ext->data, &val, sizeof(val));
8942   } else {
8943     const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8944     char* mem = UPB_PTR_AT(msg, field->offset, char);
8945     memcpy(mem, &val, get_field_size(field));
8946     if (field->presence > 0) {
8947       _upb_sethas_field(msg, field);
8948     } else if (in_oneof(field)) {
8949       *_upb_oneofcase_field(msg, field) = field->number;
8950     }
8951   }
8952   return true;
8953 }
8954 
upb_Message_ClearField(upb_Message * msg,const upb_FieldDef * f)8955 void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f) {
8956   if (upb_FieldDef_IsExtension(f)) {
8957     _upb_Message_Clearext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8958   } else {
8959     const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8960     char* mem = UPB_PTR_AT(msg, field->offset, char);
8961 
8962     if (field->presence > 0) {
8963       _upb_clearhas_field(msg, field);
8964     } else if (in_oneof(field)) {
8965       uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
8966       if (*oneof_case != field->number) return;
8967       *oneof_case = 0;
8968     }
8969 
8970     memset(mem, 0, get_field_size(field));
8971   }
8972 }
8973 
upb_Message_Clear(upb_Message * msg,const upb_MessageDef * m)8974 void upb_Message_Clear(upb_Message* msg, const upb_MessageDef* m) {
8975   _upb_Message_Clear(msg, upb_MessageDef_MiniTable(m));
8976 }
8977 
upb_Message_Next(const upb_Message * msg,const upb_MessageDef * m,const upb_DefPool * ext_pool,const upb_FieldDef ** out_f,upb_MessageValue * out_val,size_t * iter)8978 bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m,
8979                       const upb_DefPool* ext_pool, const upb_FieldDef** out_f,
8980                       upb_MessageValue* out_val, size_t* iter) {
8981   size_t i = *iter;
8982   size_t n = upb_MessageDef_FieldCount(m);
8983   const upb_MessageValue zero = {0};
8984   UPB_UNUSED(ext_pool);
8985 
8986   /* Iterate over normal fields, returning the first one that is set. */
8987   while (++i < n) {
8988     const upb_FieldDef* f = upb_MessageDef_Field(m, i);
8989     upb_MessageValue val = _upb_Message_Getraw(msg, f);
8990 
8991     /* Skip field if unset or empty. */
8992     if (upb_FieldDef_HasPresence(f)) {
8993       if (!upb_Message_Has(msg, f)) continue;
8994     } else {
8995       upb_MessageValue test = val;
8996       if (upb_FieldDef_IsString(f) && !upb_FieldDef_IsRepeated(f)) {
8997         /* Clear string pointer, only size matters (ptr could be non-NULL). */
8998         test.str_val.data = NULL;
8999       }
9000       /* Continue if NULL or 0. */
9001       if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
9002 
9003       /* Continue on empty array or map. */
9004       if (upb_FieldDef_IsMap(f)) {
9005         if (upb_Map_Size(test.map_val) == 0) continue;
9006       } else if (upb_FieldDef_IsRepeated(f)) {
9007         if (upb_Array_Size(test.array_val) == 0) continue;
9008       }
9009     }
9010 
9011     *out_val = val;
9012     *out_f = f;
9013     *iter = i;
9014     return true;
9015   }
9016 
9017   if (ext_pool) {
9018     /* Return any extensions that are set. */
9019     size_t count;
9020     const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count);
9021     if (i - n < count) {
9022       ext += count - 1 - (i - n);
9023       memcpy(out_val, &ext->data, sizeof(*out_val));
9024       *out_f = _upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext);
9025       *iter = i;
9026       return true;
9027     }
9028   }
9029 
9030   *iter = i;
9031   return false;
9032 }
9033 
_upb_Message_DiscardUnknown(upb_Message * msg,const upb_MessageDef * m,int depth)9034 bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
9035                                  int depth) {
9036   size_t iter = kUpb_Message_Begin;
9037   const upb_FieldDef* f;
9038   upb_MessageValue val;
9039   bool ret = true;
9040 
9041   if (--depth == 0) return false;
9042 
9043   _upb_Message_DiscardUnknown_shallow(msg);
9044 
9045   while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) {
9046     const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
9047     if (!subm) continue;
9048     if (upb_FieldDef_IsMap(f)) {
9049       const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2);
9050       const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
9051       upb_Map* map = (upb_Map*)val.map_val;
9052       size_t iter = kUpb_Map_Begin;
9053 
9054       if (!val_m) continue;
9055 
9056       while (upb_MapIterator_Next(map, &iter)) {
9057         upb_MessageValue map_val = upb_MapIterator_Value(map, iter);
9058         if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m,
9059                                          depth)) {
9060           ret = false;
9061         }
9062       }
9063     } else if (upb_FieldDef_IsRepeated(f)) {
9064       const upb_Array* arr = val.array_val;
9065       size_t i, n = upb_Array_Size(arr);
9066       for (i = 0; i < n; i++) {
9067         upb_MessageValue elem = upb_Array_Get(arr, i);
9068         if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm,
9069                                          depth)) {
9070           ret = false;
9071         }
9072       }
9073     } else {
9074       if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm,
9075                                        depth)) {
9076         ret = false;
9077       }
9078     }
9079   }
9080 
9081   return ret;
9082 }
9083 
upb_Message_DiscardUnknown(upb_Message * msg,const upb_MessageDef * m,int maxdepth)9084 bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
9085                                 int maxdepth) {
9086   return _upb_Message_DiscardUnknown(msg, m, maxdepth);
9087 }
9088 
9089 /** upb/decode.c ************************************************************/
9090 
9091 #include <setjmp.h>
9092 #include <string.h>
9093 
9094 
9095 /* Must be last. */
9096 
9097 /* Maps descriptor type -> elem_size_lg2.  */
9098 static const uint8_t desctype_to_elem_size_lg2[] = {
9099     -1,             /* invalid descriptor type */
9100     3,              /* DOUBLE */
9101     2,              /* FLOAT */
9102     3,              /* INT64 */
9103     3,              /* UINT64 */
9104     2,              /* INT32 */
9105     3,              /* FIXED64 */
9106     2,              /* FIXED32 */
9107     0,              /* BOOL */
9108     UPB_SIZE(3, 4), /* STRING */
9109     UPB_SIZE(2, 3), /* GROUP */
9110     UPB_SIZE(2, 3), /* MESSAGE */
9111     UPB_SIZE(3, 4), /* BYTES */
9112     2,              /* UINT32 */
9113     2,              /* ENUM */
9114     2,              /* SFIXED32 */
9115     3,              /* SFIXED64 */
9116     2,              /* SINT32 */
9117     3,              /* SINT64 */
9118 };
9119 
9120 /* Maps descriptor type -> upb map size.  */
9121 static const uint8_t desctype_to_mapsize[] = {
9122     -1,                 /* invalid descriptor type */
9123     8,                  /* DOUBLE */
9124     4,                  /* FLOAT */
9125     8,                  /* INT64 */
9126     8,                  /* UINT64 */
9127     4,                  /* INT32 */
9128     8,                  /* FIXED64 */
9129     4,                  /* FIXED32 */
9130     1,                  /* BOOL */
9131     UPB_MAPTYPE_STRING, /* STRING */
9132     sizeof(void*),      /* GROUP */
9133     sizeof(void*),      /* MESSAGE */
9134     UPB_MAPTYPE_STRING, /* BYTES */
9135     4,                  /* UINT32 */
9136     4,                  /* ENUM */
9137     4,                  /* SFIXED32 */
9138     8,                  /* SFIXED64 */
9139     4,                  /* SINT32 */
9140     8,                  /* SINT64 */
9141 };
9142 
9143 static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) |
9144                                         (1 << kUpb_FieldType_Fixed32) |
9145                                         (1 << kUpb_FieldType_SFixed32);
9146 
9147 static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) |
9148                                         (1 << kUpb_FieldType_Fixed64) |
9149                                         (1 << kUpb_FieldType_SFixed64);
9150 
9151 /* Three fake field types for MessageSet. */
9152 #define TYPE_MSGSET_ITEM 19
9153 #define TYPE_COUNT 19
9154 
9155 /* Op: an action to be performed for a wire-type/field-type combination. */
9156 #define OP_UNKNOWN -1 /* Unknown field. */
9157 #define OP_MSGSET_ITEM -2
9158 #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
9159 #define OP_ENUM 1
9160 #define OP_STRING 4
9161 #define OP_BYTES 5
9162 #define OP_SUBMSG 6
9163 /* Scalar fields use only ops above. Repeated fields can use any op.  */
9164 #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
9165 #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
9166 #define OP_PACKED_ENUM 13
9167 
9168 static const int8_t varint_ops[] = {
9169     OP_UNKNOWN,       /* field not found */
9170     OP_UNKNOWN,       /* DOUBLE */
9171     OP_UNKNOWN,       /* FLOAT */
9172     OP_SCALAR_LG2(3), /* INT64 */
9173     OP_SCALAR_LG2(3), /* UINT64 */
9174     OP_SCALAR_LG2(2), /* INT32 */
9175     OP_UNKNOWN,       /* FIXED64 */
9176     OP_UNKNOWN,       /* FIXED32 */
9177     OP_SCALAR_LG2(0), /* BOOL */
9178     OP_UNKNOWN,       /* STRING */
9179     OP_UNKNOWN,       /* GROUP */
9180     OP_UNKNOWN,       /* MESSAGE */
9181     OP_UNKNOWN,       /* BYTES */
9182     OP_SCALAR_LG2(2), /* UINT32 */
9183     OP_ENUM,          /* ENUM */
9184     OP_UNKNOWN,       /* SFIXED32 */
9185     OP_UNKNOWN,       /* SFIXED64 */
9186     OP_SCALAR_LG2(2), /* SINT32 */
9187     OP_SCALAR_LG2(3), /* SINT64 */
9188     OP_UNKNOWN,       /* MSGSET_ITEM */
9189 };
9190 
9191 static const int8_t delim_ops[] = {
9192     /* For non-repeated field type. */
9193     OP_UNKNOWN, /* field not found */
9194     OP_UNKNOWN, /* DOUBLE */
9195     OP_UNKNOWN, /* FLOAT */
9196     OP_UNKNOWN, /* INT64 */
9197     OP_UNKNOWN, /* UINT64 */
9198     OP_UNKNOWN, /* INT32 */
9199     OP_UNKNOWN, /* FIXED64 */
9200     OP_UNKNOWN, /* FIXED32 */
9201     OP_UNKNOWN, /* BOOL */
9202     OP_STRING,  /* STRING */
9203     OP_UNKNOWN, /* GROUP */
9204     OP_SUBMSG,  /* MESSAGE */
9205     OP_BYTES,   /* BYTES */
9206     OP_UNKNOWN, /* UINT32 */
9207     OP_UNKNOWN, /* ENUM */
9208     OP_UNKNOWN, /* SFIXED32 */
9209     OP_UNKNOWN, /* SFIXED64 */
9210     OP_UNKNOWN, /* SINT32 */
9211     OP_UNKNOWN, /* SINT64 */
9212     OP_UNKNOWN, /* MSGSET_ITEM */
9213     /* For repeated field type. */
9214     OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
9215     OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
9216     OP_VARPCK_LG2(3), /* REPEATED INT64 */
9217     OP_VARPCK_LG2(3), /* REPEATED UINT64 */
9218     OP_VARPCK_LG2(2), /* REPEATED INT32 */
9219     OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
9220     OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
9221     OP_VARPCK_LG2(0), /* REPEATED BOOL */
9222     OP_STRING,        /* REPEATED STRING */
9223     OP_SUBMSG,        /* REPEATED GROUP */
9224     OP_SUBMSG,        /* REPEATED MESSAGE */
9225     OP_BYTES,         /* REPEATED BYTES */
9226     OP_VARPCK_LG2(2), /* REPEATED UINT32 */
9227     OP_PACKED_ENUM,   /* REPEATED ENUM */
9228     OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
9229     OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
9230     OP_VARPCK_LG2(2), /* REPEATED SINT32 */
9231     OP_VARPCK_LG2(3), /* REPEATED SINT64 */
9232     /* Omitting MSGSET_*, because we never emit a repeated msgset type */
9233 };
9234 
9235 typedef union {
9236   bool bool_val;
9237   uint32_t uint32_val;
9238   uint64_t uint64_val;
9239   uint32_t size;
9240 } wireval;
9241 
9242 static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
9243                               const upb_MiniTable* layout);
9244 
decode_err(upb_Decoder * d,upb_DecodeStatus status)9245 UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) {
9246   assert(status != kUpb_DecodeStatus_Ok);
9247   UPB_LONGJMP(d->err, status);
9248 }
9249 
fastdecode_err(upb_Decoder * d,int status)9250 const char* fastdecode_err(upb_Decoder* d, int status) {
9251   assert(status != kUpb_DecodeStatus_Ok);
9252   UPB_LONGJMP(d->err, status);
9253   return NULL;
9254 }
decode_verifyutf8(upb_Decoder * d,const char * buf,int len)9255 static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) {
9256   if (!decode_verifyutf8_inl(buf, len))
9257     decode_err(d, kUpb_DecodeStatus_BadUtf8);
9258 }
9259 
decode_reserve(upb_Decoder * d,upb_Array * arr,size_t elem)9260 static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
9261   bool need_realloc = arr->size - arr->len < elem;
9262   if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
9263     decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9264   }
9265   return need_realloc;
9266 }
9267 
9268 typedef struct {
9269   const char* ptr;
9270   uint64_t val;
9271 } decode_vret;
9272 
9273 UPB_NOINLINE
decode_longvarint64(const char * ptr,uint64_t val)9274 static decode_vret decode_longvarint64(const char* ptr, uint64_t val) {
9275   decode_vret ret = {NULL, 0};
9276   uint64_t byte;
9277   int i;
9278   for (i = 1; i < 10; i++) {
9279     byte = (uint8_t)ptr[i];
9280     val += (byte - 1) << (i * 7);
9281     if (!(byte & 0x80)) {
9282       ret.ptr = ptr + i + 1;
9283       ret.val = val;
9284       return ret;
9285     }
9286   }
9287   return ret;
9288 }
9289 
9290 UPB_FORCEINLINE
decode_varint64(upb_Decoder * d,const char * ptr,uint64_t * val)9291 static const char* decode_varint64(upb_Decoder* d, const char* ptr,
9292                                    uint64_t* val) {
9293   uint64_t byte = (uint8_t)*ptr;
9294   if (UPB_LIKELY((byte & 0x80) == 0)) {
9295     *val = byte;
9296     return ptr + 1;
9297   } else {
9298     decode_vret res = decode_longvarint64(ptr, byte);
9299     if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed);
9300     *val = res.val;
9301     return res.ptr;
9302   }
9303 }
9304 
9305 UPB_FORCEINLINE
decode_tag(upb_Decoder * d,const char * ptr,uint32_t * val)9306 static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) {
9307   uint64_t byte = (uint8_t)*ptr;
9308   if (UPB_LIKELY((byte & 0x80) == 0)) {
9309     *val = byte;
9310     return ptr + 1;
9311   } else {
9312     const char* start = ptr;
9313     decode_vret res = decode_longvarint64(ptr, byte);
9314     if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
9315       return decode_err(d, kUpb_DecodeStatus_Malformed);
9316     }
9317     *val = res.val;
9318     return res.ptr;
9319   }
9320 }
9321 
9322 UPB_FORCEINLINE
upb_Decoder_DecodeSize(upb_Decoder * d,const char * ptr,uint32_t * size)9323 static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
9324                                           uint32_t* size) {
9325   uint64_t size64;
9326   ptr = decode_varint64(d, ptr, &size64);
9327   if (size64 >= INT32_MAX || ptr - d->end + (int)size64 > d->limit) {
9328     decode_err(d, kUpb_DecodeStatus_Malformed);
9329   }
9330   *size = size64;
9331   return ptr;
9332 }
9333 
decode_munge_int32(wireval * val)9334 static void decode_munge_int32(wireval* val) {
9335   if (!_upb_IsLittleEndian()) {
9336     /* The next stage will memcpy(dst, &val, 4) */
9337     val->uint32_val = val->uint64_val;
9338   }
9339 }
9340 
decode_munge(int type,wireval * val)9341 static void decode_munge(int type, wireval* val) {
9342   switch (type) {
9343     case kUpb_FieldType_Bool:
9344       val->bool_val = val->uint64_val != 0;
9345       break;
9346     case kUpb_FieldType_SInt32: {
9347       uint32_t n = val->uint64_val;
9348       val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
9349       break;
9350     }
9351     case kUpb_FieldType_SInt64: {
9352       uint64_t n = val->uint64_val;
9353       val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
9354       break;
9355     }
9356     case kUpb_FieldType_Int32:
9357     case kUpb_FieldType_UInt32:
9358     case kUpb_FieldType_Enum:
9359       decode_munge_int32(val);
9360       break;
9361   }
9362 }
9363 
decode_newsubmsg(upb_Decoder * d,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)9364 static upb_Message* decode_newsubmsg(upb_Decoder* d,
9365                                      const upb_MiniTable_Sub* subs,
9366                                      const upb_MiniTable_Field* field) {
9367   const upb_MiniTable* subl = subs[field->submsg_index].submsg;
9368   upb_Message* msg = _upb_Message_New_inl(subl, &d->arena);
9369   if (!msg) decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9370   return msg;
9371 }
9372 
9373 UPB_NOINLINE
decode_isdonefallback(upb_Decoder * d,const char * ptr,int overrun)9374 const char* decode_isdonefallback(upb_Decoder* d, const char* ptr,
9375                                   int overrun) {
9376   int status;
9377   ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
9378   if (ptr == NULL) {
9379     return decode_err(d, status);
9380   }
9381   return ptr;
9382 }
9383 
decode_readstr(upb_Decoder * d,const char * ptr,int size,upb_StringView * str)9384 static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size,
9385                                   upb_StringView* str) {
9386   if (d->options & kUpb_DecodeOption_AliasString) {
9387     str->data = ptr;
9388   } else {
9389     char* data = upb_Arena_Malloc(&d->arena, size);
9390     if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9391     memcpy(data, ptr, size);
9392     str->data = data;
9393   }
9394   str->size = size;
9395   return ptr + size;
9396 }
9397 
9398 UPB_FORCEINLINE
decode_tosubmsg2(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,int size)9399 static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr,
9400                                     upb_Message* submsg,
9401                                     const upb_MiniTable* subl, int size) {
9402   int saved_delta = decode_pushlimit(d, ptr, size);
9403   if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
9404   ptr = decode_msg(d, ptr, submsg, subl);
9405   if (d->end_group != DECODE_NOGROUP)
9406     return decode_err(d, kUpb_DecodeStatus_Malformed);
9407   decode_poplimit(d, ptr, saved_delta);
9408   d->depth++;
9409   return ptr;
9410 }
9411 
9412 UPB_FORCEINLINE
decode_tosubmsg(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,int size)9413 static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr,
9414                                    upb_Message* submsg,
9415                                    const upb_MiniTable_Sub* subs,
9416                                    const upb_MiniTable_Field* field, int size) {
9417   return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg,
9418                           size);
9419 }
9420 
9421 UPB_FORCEINLINE
decode_group(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable * subl,uint32_t number)9422 static const char* decode_group(upb_Decoder* d, const char* ptr,
9423                                 upb_Message* submsg, const upb_MiniTable* subl,
9424                                 uint32_t number) {
9425   if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
9426   if (decode_isdone(d, &ptr)) {
9427     return decode_err(d, kUpb_DecodeStatus_Malformed);
9428   }
9429   ptr = decode_msg(d, ptr, submsg, subl);
9430   if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed);
9431   d->end_group = DECODE_NOGROUP;
9432   d->depth++;
9433   return ptr;
9434 }
9435 
9436 UPB_FORCEINLINE
decode_togroup(upb_Decoder * d,const char * ptr,upb_Message * submsg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)9437 static const char* decode_togroup(upb_Decoder* d, const char* ptr,
9438                                   upb_Message* submsg,
9439                                   const upb_MiniTable_Sub* subs,
9440                                   const upb_MiniTable_Field* field) {
9441   const upb_MiniTable* subl = subs[field->submsg_index].submsg;
9442   return decode_group(d, ptr, submsg, subl, field->number);
9443 }
9444 
upb_Decoder_EncodeVarint32(uint32_t val,char * ptr)9445 static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
9446   do {
9447     uint8_t byte = val & 0x7fU;
9448     val >>= 7;
9449     if (val) byte |= 0x80U;
9450     *(ptr++) = byte;
9451   } while (val);
9452   return ptr;
9453 }
9454 
upb_Decode_AddUnknownVarints(upb_Decoder * d,upb_Message * msg,uint32_t val1,uint32_t val2)9455 static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
9456                                          uint32_t val1, uint32_t val2) {
9457   char buf[20];
9458   char* end = buf;
9459   end = upb_Decoder_EncodeVarint32(val1, end);
9460   end = upb_Decoder_EncodeVarint32(val2, end);
9461 
9462   if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
9463     decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9464   }
9465 }
9466 
9467 UPB_NOINLINE
decode_checkenum_slow(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Enum * e,const upb_MiniTable_Field * field,uint32_t v)9468 static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
9469                                   upb_Message* msg, const upb_MiniTable_Enum* e,
9470                                   const upb_MiniTable_Field* field,
9471                                   uint32_t v) {
9472   // OPT: binary search long lists?
9473   int n = e->value_count;
9474   for (int i = 0; i < n; i++) {
9475     if ((uint32_t)e->values[i] == v) return true;
9476   }
9477 
9478   // Unrecognized enum goes into unknown fields.
9479   // For packed fields the tag could be arbitrarily far in the past, so we
9480   // just re-encode the tag and value here.
9481   uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
9482   upb_Message* unknown_msg =
9483       field->mode & kUpb_LabelFlags_IsExtension ? d->unknown_msg : msg;
9484   upb_Decode_AddUnknownVarints(d, unknown_msg, tag, v);
9485   return false;
9486 }
9487 
9488 UPB_FORCEINLINE
decode_checkenum(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Enum * e,const upb_MiniTable_Field * field,wireval * val)9489 static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg,
9490                              const upb_MiniTable_Enum* e,
9491                              const upb_MiniTable_Field* field, wireval* val) {
9492   uint32_t v = val->uint32_val;
9493 
9494   if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
9495 
9496   return decode_checkenum_slow(d, ptr, msg, e, field, v);
9497 }
9498 
9499 UPB_NOINLINE
decode_enum_toarray(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9500 static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr,
9501                                        upb_Message* msg, upb_Array* arr,
9502                                        const upb_MiniTable_Sub* subs,
9503                                        const upb_MiniTable_Field* field,
9504                                        wireval* val) {
9505   const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
9506   if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
9507   void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9508   arr->len++;
9509   memcpy(mem, val, 4);
9510   return ptr;
9511 }
9512 
9513 UPB_FORCEINLINE
decode_fixed_packed(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTable_Field * field,int lg2)9514 static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr,
9515                                        upb_Array* arr, wireval* val,
9516                                        const upb_MiniTable_Field* field,
9517                                        int lg2) {
9518   int mask = (1 << lg2) - 1;
9519   size_t count = val->size >> lg2;
9520   if ((val->size & mask) != 0) {
9521     // Length isn't a round multiple of elem size.
9522     return decode_err(d, kUpb_DecodeStatus_Malformed);
9523   }
9524   decode_reserve(d, arr, count);
9525   void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9526   arr->len += count;
9527   // Note: if/when the decoder supports multi-buffer input, we will need to
9528   // handle buffer seams here.
9529   if (_upb_IsLittleEndian()) {
9530     memcpy(mem, ptr, val->size);
9531     ptr += val->size;
9532   } else {
9533     const char* end = ptr + val->size;
9534     char* dst = mem;
9535     while (ptr < end) {
9536       if (lg2 == 2) {
9537         uint32_t val;
9538         memcpy(&val, ptr, sizeof(val));
9539         val = _upb_BigEndian_Swap32(val);
9540         memcpy(dst, &val, sizeof(val));
9541       } else {
9542         UPB_ASSERT(lg2 == 3);
9543         uint64_t val;
9544         memcpy(&val, ptr, sizeof(val));
9545         val = _upb_BigEndian_Swap64(val);
9546         memcpy(dst, &val, sizeof(val));
9547       }
9548       ptr += 1 << lg2;
9549       dst += 1 << lg2;
9550     }
9551   }
9552 
9553   return ptr;
9554 }
9555 
9556 UPB_FORCEINLINE
decode_varint_packed(upb_Decoder * d,const char * ptr,upb_Array * arr,wireval * val,const upb_MiniTable_Field * field,int lg2)9557 static const char* decode_varint_packed(upb_Decoder* d, const char* ptr,
9558                                         upb_Array* arr, wireval* val,
9559                                         const upb_MiniTable_Field* field,
9560                                         int lg2) {
9561   int scale = 1 << lg2;
9562   int saved_limit = decode_pushlimit(d, ptr, val->size);
9563   char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9564   while (!decode_isdone(d, &ptr)) {
9565     wireval elem;
9566     ptr = decode_varint64(d, ptr, &elem.uint64_val);
9567     decode_munge(field->descriptortype, &elem);
9568     if (decode_reserve(d, arr, 1)) {
9569       out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
9570     }
9571     arr->len++;
9572     memcpy(out, &elem, scale);
9573     out += scale;
9574   }
9575   decode_poplimit(d, ptr, saved_limit);
9576   return ptr;
9577 }
9578 
9579 UPB_NOINLINE
decode_enum_packed(upb_Decoder * d,const char * ptr,upb_Message * msg,upb_Array * arr,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9580 static const char* decode_enum_packed(upb_Decoder* d, const char* ptr,
9581                                       upb_Message* msg, upb_Array* arr,
9582                                       const upb_MiniTable_Sub* subs,
9583                                       const upb_MiniTable_Field* field,
9584                                       wireval* val) {
9585   const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
9586   int saved_limit = decode_pushlimit(d, ptr, val->size);
9587   char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9588   while (!decode_isdone(d, &ptr)) {
9589     wireval elem;
9590     ptr = decode_varint64(d, ptr, &elem.uint64_val);
9591     decode_munge_int32(&elem);
9592     if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
9593       continue;
9594     }
9595     if (decode_reserve(d, arr, 1)) {
9596       out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
9597     }
9598     arr->len++;
9599     memcpy(out, &elem, 4);
9600     out += 4;
9601   }
9602   decode_poplimit(d, ptr, saved_limit);
9603   return ptr;
9604 }
9605 
decode_toarray(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val,int op)9606 static const char* decode_toarray(upb_Decoder* d, const char* ptr,
9607                                   upb_Message* msg,
9608                                   const upb_MiniTable_Sub* subs,
9609                                   const upb_MiniTable_Field* field,
9610                                   wireval* val, int op) {
9611   upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
9612   upb_Array* arr = *arrp;
9613   void* mem;
9614 
9615   if (arr) {
9616     decode_reserve(d, arr, 1);
9617   } else {
9618     size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
9619     arr = _upb_Array_New(&d->arena, 4, lg2);
9620     if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9621     *arrp = arr;
9622   }
9623 
9624   switch (op) {
9625     case OP_SCALAR_LG2(0):
9626     case OP_SCALAR_LG2(2):
9627     case OP_SCALAR_LG2(3):
9628       /* Append scalar value. */
9629       mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
9630       arr->len++;
9631       memcpy(mem, val, 1 << op);
9632       return ptr;
9633     case OP_STRING:
9634       decode_verifyutf8(d, ptr, val->size);
9635       /* Fallthrough. */
9636     case OP_BYTES: {
9637       /* Append bytes. */
9638       upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->len;
9639       arr->len++;
9640       return decode_readstr(d, ptr, val->size, str);
9641     }
9642     case OP_SUBMSG: {
9643       /* Append submessage / group. */
9644       upb_Message* submsg = decode_newsubmsg(d, subs, field);
9645       *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void*), upb_Message*) =
9646           submsg;
9647       arr->len++;
9648       if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) {
9649         return decode_togroup(d, ptr, submsg, subs, field);
9650       } else {
9651         return decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
9652       }
9653     }
9654     case OP_FIXPCK_LG2(2):
9655     case OP_FIXPCK_LG2(3):
9656       return decode_fixed_packed(d, ptr, arr, val, field,
9657                                  op - OP_FIXPCK_LG2(0));
9658     case OP_VARPCK_LG2(0):
9659     case OP_VARPCK_LG2(2):
9660     case OP_VARPCK_LG2(3):
9661       return decode_varint_packed(d, ptr, arr, val, field,
9662                                   op - OP_VARPCK_LG2(0));
9663     case OP_ENUM:
9664       return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
9665     case OP_PACKED_ENUM:
9666       return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
9667     default:
9668       UPB_UNREACHABLE();
9669   }
9670 }
9671 
decode_tomap(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val)9672 static const char* decode_tomap(upb_Decoder* d, const char* ptr,
9673                                 upb_Message* msg, const upb_MiniTable_Sub* subs,
9674                                 const upb_MiniTable_Field* field,
9675                                 wireval* val) {
9676   upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
9677   upb_Map* map = *map_p;
9678   upb_MapEntry ent;
9679   const upb_MiniTable* entry = subs[field->submsg_index].submsg;
9680 
9681   if (!map) {
9682     /* Lazily create map. */
9683     const upb_MiniTable_Field* key_field = &entry->fields[0];
9684     const upb_MiniTable_Field* val_field = &entry->fields[1];
9685     char key_size = desctype_to_mapsize[key_field->descriptortype];
9686     char val_size = desctype_to_mapsize[val_field->descriptortype];
9687     UPB_ASSERT(key_field->offset == 0);
9688     UPB_ASSERT(val_field->offset == sizeof(upb_StringView));
9689     map = _upb_Map_New(&d->arena, key_size, val_size);
9690     *map_p = map;
9691   }
9692 
9693   /* Parse map entry. */
9694   memset(&ent, 0, sizeof(ent));
9695 
9696   if (entry->fields[1].descriptortype == kUpb_FieldType_Message ||
9697       entry->fields[1].descriptortype == kUpb_FieldType_Group) {
9698     /* Create proactively to handle the case where it doesn't appear. */
9699     ent.v.val =
9700         upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena));
9701   }
9702 
9703   const char* start = ptr;
9704   ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
9705   // check if ent had any unknown fields
9706   size_t size;
9707   upb_Message_GetUnknown(&ent.k, &size);
9708   if (size != 0) {
9709     uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
9710     upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start));
9711     if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
9712       decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9713     }
9714   } else {
9715     if (_upb_Map_Insert(map, &ent.k, map->key_size, &ent.v, map->val_size,
9716                         &d->arena) == _kUpb_MapInsertStatus_OutOfMemory) {
9717       decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9718     }
9719   }
9720   return ptr;
9721 }
9722 
decode_tomsg(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field,wireval * val,int op)9723 static const char* decode_tomsg(upb_Decoder* d, const char* ptr,
9724                                 upb_Message* msg, const upb_MiniTable_Sub* subs,
9725                                 const upb_MiniTable_Field* field, wireval* val,
9726                                 int op) {
9727   void* mem = UPB_PTR_AT(msg, field->offset, void);
9728   int type = field->descriptortype;
9729 
9730   if (UPB_UNLIKELY(op == OP_ENUM) &&
9731       !decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
9732                         val)) {
9733     return ptr;
9734   }
9735 
9736   /* Set presence if necessary. */
9737   if (field->presence > 0) {
9738     _upb_sethas_field(msg, field);
9739   } else if (field->presence < 0) {
9740     /* Oneof case */
9741     uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
9742     if (op == OP_SUBMSG && *oneof_case != field->number) {
9743       memset(mem, 0, sizeof(void*));
9744     }
9745     *oneof_case = field->number;
9746   }
9747 
9748   /* Store into message. */
9749   switch (op) {
9750     case OP_SUBMSG: {
9751       upb_Message** submsgp = mem;
9752       upb_Message* submsg = *submsgp;
9753       if (!submsg) {
9754         submsg = decode_newsubmsg(d, subs, field);
9755         *submsgp = submsg;
9756       }
9757       if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
9758         ptr = decode_togroup(d, ptr, submsg, subs, field);
9759       } else {
9760         ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
9761       }
9762       break;
9763     }
9764     case OP_STRING:
9765       decode_verifyutf8(d, ptr, val->size);
9766       /* Fallthrough. */
9767     case OP_BYTES:
9768       return decode_readstr(d, ptr, val->size, mem);
9769     case OP_SCALAR_LG2(3):
9770       memcpy(mem, val, 8);
9771       break;
9772     case OP_ENUM:
9773     case OP_SCALAR_LG2(2):
9774       memcpy(mem, val, 4);
9775       break;
9776     case OP_SCALAR_LG2(0):
9777       memcpy(mem, val, 1);
9778       break;
9779     default:
9780       UPB_UNREACHABLE();
9781   }
9782 
9783   return ptr;
9784 }
9785 
9786 UPB_NOINLINE
decode_checkrequired(upb_Decoder * d,const char * ptr,const upb_Message * msg,const upb_MiniTable * l)9787 const char* decode_checkrequired(upb_Decoder* d, const char* ptr,
9788                                  const upb_Message* msg,
9789                                  const upb_MiniTable* l) {
9790   assert(l->required_count);
9791   if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
9792     return ptr;
9793   }
9794   uint64_t msg_head;
9795   memcpy(&msg_head, msg, 8);
9796   msg_head = _upb_BigEndian_Swap64(msg_head);
9797   if (upb_MiniTable_requiredmask(l) & ~msg_head) {
9798     d->missing_required = true;
9799   }
9800   return ptr;
9801 }
9802 
9803 UPB_FORCEINLINE
decode_tryfastdispatch(upb_Decoder * d,const char ** ptr,upb_Message * msg,const upb_MiniTable * layout)9804 static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr,
9805                                    upb_Message* msg,
9806                                    const upb_MiniTable* layout) {
9807 #if UPB_FASTTABLE
9808   if (layout && layout->table_mask != (unsigned char)-1) {
9809     uint16_t tag = fastdecode_loadtag(*ptr);
9810     intptr_t table = decode_totable(layout);
9811     *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
9812     return true;
9813   }
9814 #endif
9815   return false;
9816 }
9817 
upb_Decoder_SkipField(upb_Decoder * d,const char * ptr,uint32_t tag)9818 static const char* upb_Decoder_SkipField(upb_Decoder* d, const char* ptr,
9819                                          uint32_t tag) {
9820   int field_number = tag >> 3;
9821   int wire_type = tag & 7;
9822   switch (wire_type) {
9823     case kUpb_WireType_Varint: {
9824       uint64_t val;
9825       return decode_varint64(d, ptr, &val);
9826     }
9827     case kUpb_WireType_64Bit:
9828       return ptr + 8;
9829     case kUpb_WireType_32Bit:
9830       return ptr + 4;
9831     case kUpb_WireType_Delimited: {
9832       uint32_t size;
9833       ptr = upb_Decoder_DecodeSize(d, ptr, &size);
9834       return ptr + size;
9835     }
9836     case kUpb_WireType_StartGroup:
9837       return decode_group(d, ptr, NULL, NULL, field_number);
9838     default:
9839       decode_err(d, kUpb_DecodeStatus_Malformed);
9840   }
9841 }
9842 
9843 enum {
9844   kStartItemTag = ((1 << 3) | kUpb_WireType_StartGroup),
9845   kEndItemTag = ((1 << 3) | kUpb_WireType_EndGroup),
9846   kTypeIdTag = ((2 << 3) | kUpb_WireType_Varint),
9847   kMessageTag = ((3 << 3) | kUpb_WireType_Delimited),
9848 };
9849 
upb_Decoder_AddKnownMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable_Extension * item_mt,const char * data,uint32_t size)9850 static void upb_Decoder_AddKnownMessageSetItem(
9851     upb_Decoder* d, upb_Message* msg, const upb_MiniTable_Extension* item_mt,
9852     const char* data, uint32_t size) {
9853   upb_Message_Extension* ext =
9854       _upb_Message_GetOrCreateExtension(msg, item_mt, &d->arena);
9855   if (UPB_UNLIKELY(!ext)) decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9856   upb_Message* submsg = decode_newsubmsg(d, &ext->ext->sub, &ext->ext->field);
9857   upb_DecodeStatus status = upb_Decode(data, size, submsg, item_mt->sub.submsg,
9858                                        d->extreg, d->options, &d->arena);
9859   memcpy(&ext->data, &submsg, sizeof(submsg));
9860   if (status != kUpb_DecodeStatus_Ok) decode_err(d, status);
9861 }
9862 
upb_Decoder_AddUnknownMessageSetItem(upb_Decoder * d,upb_Message * msg,uint32_t type_id,const char * message_data,uint32_t message_size)9863 static void upb_Decoder_AddUnknownMessageSetItem(upb_Decoder* d,
9864                                                  upb_Message* msg,
9865                                                  uint32_t type_id,
9866                                                  const char* message_data,
9867                                                  uint32_t message_size) {
9868   char buf[60];
9869   char* ptr = buf;
9870   ptr = upb_Decoder_EncodeVarint32(kStartItemTag, ptr);
9871   ptr = upb_Decoder_EncodeVarint32(kTypeIdTag, ptr);
9872   ptr = upb_Decoder_EncodeVarint32(type_id, ptr);
9873   ptr = upb_Decoder_EncodeVarint32(kMessageTag, ptr);
9874   ptr = upb_Decoder_EncodeVarint32(message_size, ptr);
9875   char* split = ptr;
9876 
9877   ptr = upb_Decoder_EncodeVarint32(kEndItemTag, ptr);
9878   char* end = ptr;
9879 
9880   if (!_upb_Message_AddUnknown(msg, buf, split - buf, &d->arena) ||
9881       !_upb_Message_AddUnknown(msg, message_data, message_size, &d->arena) ||
9882       !_upb_Message_AddUnknown(msg, split, end - split, &d->arena)) {
9883     decode_err(d, kUpb_DecodeStatus_OutOfMemory);
9884   }
9885 }
9886 
upb_Decoder_AddMessageSetItem(upb_Decoder * d,upb_Message * msg,const upb_MiniTable * layout,uint32_t type_id,const char * data,uint32_t size)9887 static void upb_Decoder_AddMessageSetItem(upb_Decoder* d, upb_Message* msg,
9888                                           const upb_MiniTable* layout,
9889                                           uint32_t type_id, const char* data,
9890                                           uint32_t size) {
9891   const upb_MiniTable_Extension* item_mt =
9892       _upb_extreg_get(d->extreg, layout, type_id);
9893   if (item_mt) {
9894     upb_Decoder_AddKnownMessageSetItem(d, msg, item_mt, data, size);
9895   } else {
9896     upb_Decoder_AddUnknownMessageSetItem(d, msg, type_id, data, size);
9897   }
9898 }
9899 
upb_Decoder_DecodeMessageSetItem(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)9900 static const char* upb_Decoder_DecodeMessageSetItem(
9901     upb_Decoder* d, const char* ptr, upb_Message* msg,
9902     const upb_MiniTable* layout) {
9903   uint32_t type_id = 0;
9904   upb_StringView preserved = {NULL, 0};
9905   typedef enum {
9906     kUpb_HaveId = 1 << 0,
9907     kUpb_HavePayload = 1 << 1,
9908   } StateMask;
9909   StateMask state_mask = 0;
9910   while (!decode_isdone(d, &ptr)) {
9911     uint32_t tag;
9912     ptr = decode_tag(d, ptr, &tag);
9913     switch (tag) {
9914       case kEndItemTag:
9915         return ptr;
9916       case kTypeIdTag: {
9917         uint64_t tmp;
9918         ptr = decode_varint64(d, ptr, &tmp);
9919         if (state_mask & kUpb_HaveId) break;  // Ignore dup.
9920         state_mask |= kUpb_HaveId;
9921         type_id = tmp;
9922         if (state_mask & kUpb_HavePayload) {
9923           upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, preserved.data,
9924                                         preserved.size);
9925         }
9926         break;
9927       }
9928       case kMessageTag: {
9929         uint32_t size;
9930         ptr = upb_Decoder_DecodeSize(d, ptr, &size);
9931         const char* data = ptr;
9932         ptr += size;
9933         if (state_mask & kUpb_HavePayload) break;  // Ignore dup.
9934         state_mask |= kUpb_HavePayload;
9935         if (state_mask & kUpb_HaveId) {
9936           upb_Decoder_AddMessageSetItem(d, msg, layout, type_id, data, size);
9937         } else {
9938           // Out of order, we must preserve the payload.
9939           preserved.data = data;
9940           preserved.size = size;
9941         }
9942         break;
9943       }
9944       default:
9945         // We do not preserve unexpected fields inside a message set item.
9946         ptr = upb_Decoder_SkipField(d, ptr, tag);
9947         break;
9948     }
9949   }
9950   decode_err(d, kUpb_DecodeStatus_Malformed);
9951 }
9952 
decode_findfield(upb_Decoder * d,const upb_MiniTable * l,uint32_t field_number,int * last_field_index)9953 static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d,
9954                                                    const upb_MiniTable* l,
9955                                                    uint32_t field_number,
9956                                                    int* last_field_index) {
9957   static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0};
9958   if (l == NULL) return &none;
9959 
9960   size_t idx = ((size_t)field_number) - 1;  // 0 wraps to SIZE_MAX
9961   if (idx < l->dense_below) {
9962     /* Fastest case: index into dense fields. */
9963     goto found;
9964   }
9965 
9966   if (l->dense_below < l->field_count) {
9967     /* Linear search non-dense fields. Resume scanning from last_field_index
9968      * since fields are usually in order. */
9969     int last = *last_field_index;
9970     for (idx = last; idx < l->field_count; idx++) {
9971       if (l->fields[idx].number == field_number) {
9972         goto found;
9973       }
9974     }
9975 
9976     for (idx = l->dense_below; idx < last; idx++) {
9977       if (l->fields[idx].number == field_number) {
9978         goto found;
9979       }
9980     }
9981   }
9982 
9983   if (d->extreg) {
9984     switch (l->ext) {
9985       case kUpb_ExtMode_Extendable: {
9986         const upb_MiniTable_Extension* ext =
9987             _upb_extreg_get(d->extreg, l, field_number);
9988         if (ext) return &ext->field;
9989         break;
9990       }
9991       case kUpb_ExtMode_IsMessageSet:
9992         if (field_number == _UPB_MSGSET_ITEM) {
9993           static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0};
9994           return &item;
9995         }
9996         break;
9997     }
9998   }
9999 
10000   return &none; /* Unknown field. */
10001 
10002 found:
10003   UPB_ASSERT(l->fields[idx].number == field_number);
10004   *last_field_index = idx;
10005   return &l->fields[idx];
10006 }
10007 
10008 UPB_FORCEINLINE
decode_wireval(upb_Decoder * d,const char * ptr,const upb_MiniTable_Field * field,int wire_type,wireval * val,int * op)10009 static const char* decode_wireval(upb_Decoder* d, const char* ptr,
10010                                   const upb_MiniTable_Field* field,
10011                                   int wire_type, wireval* val, int* op) {
10012   switch (wire_type) {
10013     case kUpb_WireType_Varint:
10014       ptr = decode_varint64(d, ptr, &val->uint64_val);
10015       *op = varint_ops[field->descriptortype];
10016       decode_munge(field->descriptortype, val);
10017       return ptr;
10018     case kUpb_WireType_32Bit:
10019       memcpy(&val->uint32_val, ptr, 4);
10020       val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val);
10021       *op = OP_SCALAR_LG2(2);
10022       if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) {
10023         *op = OP_UNKNOWN;
10024       }
10025       return ptr + 4;
10026     case kUpb_WireType_64Bit:
10027       memcpy(&val->uint64_val, ptr, 8);
10028       val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val);
10029       *op = OP_SCALAR_LG2(3);
10030       if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) {
10031         *op = OP_UNKNOWN;
10032       }
10033       return ptr + 8;
10034     case kUpb_WireType_Delimited: {
10035       int ndx = field->descriptortype;
10036       if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT;
10037       ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
10038       *op = delim_ops[ndx];
10039       return ptr;
10040     }
10041     case kUpb_WireType_StartGroup:
10042       val->uint32_val = field->number;
10043       if (field->descriptortype == kUpb_FieldType_Group) {
10044         *op = OP_SUBMSG;
10045       } else if (field->descriptortype == TYPE_MSGSET_ITEM) {
10046         *op = OP_MSGSET_ITEM;
10047       } else {
10048         *op = OP_UNKNOWN;
10049       }
10050       return ptr;
10051     default:
10052       break;
10053   }
10054   return decode_err(d, kUpb_DecodeStatus_Malformed);
10055 }
10056 
10057 UPB_FORCEINLINE
decode_known(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout,const upb_MiniTable_Field * field,int op,wireval * val)10058 static const char* decode_known(upb_Decoder* d, const char* ptr,
10059                                 upb_Message* msg, const upb_MiniTable* layout,
10060                                 const upb_MiniTable_Field* field, int op,
10061                                 wireval* val) {
10062   const upb_MiniTable_Sub* subs = layout->subs;
10063   uint8_t mode = field->mode;
10064 
10065   if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
10066     const upb_MiniTable_Extension* ext_layout =
10067         (const upb_MiniTable_Extension*)field;
10068     upb_Message_Extension* ext =
10069         _upb_Message_GetOrCreateExtension(msg, ext_layout, &d->arena);
10070     if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
10071     d->unknown_msg = msg;
10072     msg = &ext->data;
10073     subs = &ext->ext->sub;
10074   }
10075 
10076   switch (mode & kUpb_FieldMode_Mask) {
10077     case kUpb_FieldMode_Array:
10078       return decode_toarray(d, ptr, msg, subs, field, val, op);
10079     case kUpb_FieldMode_Map:
10080       return decode_tomap(d, ptr, msg, subs, field, val);
10081     case kUpb_FieldMode_Scalar:
10082       return decode_tomsg(d, ptr, msg, subs, field, val, op);
10083     default:
10084       UPB_UNREACHABLE();
10085   }
10086 }
10087 
decode_reverse_skip_varint(const char * ptr,uint32_t val)10088 static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) {
10089   uint32_t seen = 0;
10090   do {
10091     ptr--;
10092     seen <<= 7;
10093     seen |= *ptr & 0x7f;
10094   } while (seen != val);
10095   return ptr;
10096 }
10097 
decode_unknown(upb_Decoder * d,const char * ptr,upb_Message * msg,int field_number,int wire_type,wireval val)10098 static const char* decode_unknown(upb_Decoder* d, const char* ptr,
10099                                   upb_Message* msg, int field_number,
10100                                   int wire_type, wireval val) {
10101   if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
10102 
10103   // Since unknown fields are the uncommon case, we do a little extra work here
10104   // to walk backwards through the buffer to find the field start.  This frees
10105   // up a register in the fast paths (when the field is known), which leads to
10106   // significant speedups in benchmarks.
10107   const char* start = ptr;
10108 
10109   if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
10110   if (msg) {
10111     switch (wire_type) {
10112       case kUpb_WireType_Varint:
10113       case kUpb_WireType_Delimited:
10114         start--;
10115         while (start[-1] & 0x80) start--;
10116         break;
10117       case kUpb_WireType_32Bit:
10118         start -= 4;
10119         break;
10120       case kUpb_WireType_64Bit:
10121         start -= 8;
10122         break;
10123       default:
10124         break;
10125     }
10126 
10127     assert(start == d->debug_valstart);
10128     uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
10129     start = decode_reverse_skip_varint(start, tag);
10130     assert(start == d->debug_tagstart);
10131 
10132     if (wire_type == kUpb_WireType_StartGroup) {
10133       d->unknown = start;
10134       d->unknown_msg = msg;
10135       ptr = decode_group(d, ptr, NULL, NULL, field_number);
10136       start = d->unknown;
10137       d->unknown = NULL;
10138     }
10139     if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
10140       return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
10141     }
10142   } else if (wire_type == kUpb_WireType_StartGroup) {
10143     ptr = decode_group(d, ptr, NULL, NULL, field_number);
10144   }
10145   return ptr;
10146 }
10147 
10148 UPB_NOINLINE
decode_msg(upb_Decoder * d,const char * ptr,upb_Message * msg,const upb_MiniTable * layout)10149 static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
10150                               const upb_MiniTable* layout) {
10151   int last_field_index = 0;
10152 
10153 #if UPB_FASTTABLE
10154   // The first time we want to skip fast dispatch, because we may have just been
10155   // invoked by the fast parser to handle a case that it bailed on.
10156   if (!decode_isdone(d, &ptr)) goto nofast;
10157 #endif
10158 
10159   while (!decode_isdone(d, &ptr)) {
10160     uint32_t tag;
10161     const upb_MiniTable_Field* field;
10162     int field_number;
10163     int wire_type;
10164     wireval val;
10165     int op;
10166 
10167     if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
10168 
10169 #if UPB_FASTTABLE
10170   nofast:
10171 #endif
10172 
10173 #ifndef NDEBUG
10174     d->debug_tagstart = ptr;
10175 #endif
10176 
10177     UPB_ASSERT(ptr < d->limit_ptr);
10178     ptr = decode_tag(d, ptr, &tag);
10179     field_number = tag >> 3;
10180     wire_type = tag & 7;
10181 
10182 #ifndef NDEBUG
10183     d->debug_valstart = ptr;
10184 #endif
10185 
10186     if (wire_type == kUpb_WireType_EndGroup) {
10187       d->end_group = field_number;
10188       return ptr;
10189     }
10190 
10191     field = decode_findfield(d, layout, field_number, &last_field_index);
10192     ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
10193 
10194     if (op >= 0) {
10195       ptr = decode_known(d, ptr, msg, layout, field, op, &val);
10196     } else {
10197       switch (op) {
10198         case OP_UNKNOWN:
10199           ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val);
10200           break;
10201         case OP_MSGSET_ITEM:
10202           ptr = upb_Decoder_DecodeMessageSetItem(d, ptr, msg, layout);
10203           break;
10204       }
10205     }
10206   }
10207 
10208   return UPB_UNLIKELY(layout && layout->required_count)
10209              ? decode_checkrequired(d, ptr, msg, layout)
10210              : ptr;
10211 }
10212 
fastdecode_generic(struct upb_Decoder * d,const char * ptr,upb_Message * msg,intptr_t table,uint64_t hasbits,uint64_t data)10213 const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr,
10214                                upb_Message* msg, intptr_t table,
10215                                uint64_t hasbits, uint64_t data) {
10216   (void)data;
10217   *(uint32_t*)msg |= hasbits;
10218   return decode_msg(d, ptr, msg, decode_totablep(table));
10219 }
10220 
decode_top(struct upb_Decoder * d,const char * buf,void * msg,const upb_MiniTable * l)10221 static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf,
10222                                    void* msg, const upb_MiniTable* l) {
10223   if (!decode_tryfastdispatch(d, &buf, msg, l)) {
10224     decode_msg(d, buf, msg, l);
10225   }
10226   if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
10227   if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
10228   return kUpb_DecodeStatus_Ok;
10229 }
10230 
upb_Decode(const char * buf,size_t size,void * msg,const upb_MiniTable * l,const upb_ExtensionRegistry * extreg,int options,upb_Arena * arena)10231 upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
10232                             const upb_MiniTable* l,
10233                             const upb_ExtensionRegistry* extreg, int options,
10234                             upb_Arena* arena) {
10235   upb_Decoder state;
10236   unsigned depth = (unsigned)options >> 16;
10237 
10238   if (size <= 16) {
10239     memset(&state.patch, 0, 32);
10240     if (size) memcpy(&state.patch, buf, size);
10241     buf = state.patch;
10242     state.end = buf + size;
10243     state.limit = 0;
10244     options &= ~kUpb_DecodeOption_AliasString;  // Can't alias patch buf.
10245   } else {
10246     state.end = buf + size - 16;
10247     state.limit = 16;
10248   }
10249 
10250   state.extreg = extreg;
10251   state.limit_ptr = state.end;
10252   state.unknown = NULL;
10253   state.depth = depth ? depth : 64;
10254   state.end_group = DECODE_NOGROUP;
10255   state.options = (uint16_t)options;
10256   state.missing_required = false;
10257   state.arena.head = arena->head;
10258   state.arena.last_size = arena->last_size;
10259   state.arena.cleanup_metadata = arena->cleanup_metadata;
10260   state.arena.parent = arena;
10261 
10262   upb_DecodeStatus status = UPB_SETJMP(state.err);
10263   if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) {
10264     status = decode_top(&state, buf, msg, l);
10265   }
10266 
10267   arena->head.ptr = state.arena.head.ptr;
10268   arena->head.end = state.arena.head.end;
10269   arena->cleanup_metadata = state.arena.cleanup_metadata;
10270   return status;
10271 }
10272 
10273 #undef OP_UNKNOWN
10274 #undef OP_SKIP
10275 #undef OP_SCALAR_LG2
10276 #undef OP_FIXPCK_LG2
10277 #undef OP_VARPCK_LG2
10278 #undef OP_STRING
10279 #undef OP_BYTES
10280 #undef OP_SUBMSG
10281 
10282 /** upb/encode.c ************************************************************/
10283 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
10284 
10285 
10286 #include <setjmp.h>
10287 #include <string.h>
10288 
10289 
10290 /* Must be last. */
10291 
10292 #define UPB_PB_VARINT_MAX_LEN 10
10293 
10294 UPB_NOINLINE
encode_varint64(uint64_t val,char * buf)10295 static size_t encode_varint64(uint64_t val, char* buf) {
10296   size_t i = 0;
10297   do {
10298     uint8_t byte = val & 0x7fU;
10299     val >>= 7;
10300     if (val) byte |= 0x80U;
10301     buf[i++] = byte;
10302   } while (val);
10303   return i;
10304 }
10305 
encode_zz32(int32_t n)10306 static uint32_t encode_zz32(int32_t n) {
10307   return ((uint32_t)n << 1) ^ (n >> 31);
10308 }
encode_zz64(int64_t n)10309 static uint64_t encode_zz64(int64_t n) {
10310   return ((uint64_t)n << 1) ^ (n >> 63);
10311 }
10312 
10313 typedef struct {
10314   jmp_buf err;
10315   upb_alloc* alloc;
10316   char *buf, *ptr, *limit;
10317   int options;
10318   int depth;
10319   _upb_mapsorter sorter;
10320 } upb_encstate;
10321 
upb_roundup_pow2(size_t bytes)10322 static size_t upb_roundup_pow2(size_t bytes) {
10323   size_t ret = 128;
10324   while (ret < bytes) {
10325     ret *= 2;
10326   }
10327   return ret;
10328 }
10329 
encode_err(upb_encstate * e)10330 UPB_NORETURN static void encode_err(upb_encstate* e) { UPB_LONGJMP(e->err, 1); }
10331 
10332 UPB_NOINLINE
encode_growbuffer(upb_encstate * e,size_t bytes)10333 static void encode_growbuffer(upb_encstate* e, size_t bytes) {
10334   size_t old_size = e->limit - e->buf;
10335   size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
10336   char* new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
10337 
10338   if (!new_buf) encode_err(e);
10339 
10340   /* We want previous data at the end, realloc() put it at the beginning. */
10341   if (old_size > 0) {
10342     memmove(new_buf + new_size - old_size, e->buf, old_size);
10343   }
10344 
10345   e->ptr = new_buf + new_size - (e->limit - e->ptr);
10346   e->limit = new_buf + new_size;
10347   e->buf = new_buf;
10348 
10349   e->ptr -= bytes;
10350 }
10351 
10352 /* Call to ensure that at least "bytes" bytes are available for writing at
10353  * e->ptr.  Returns false if the bytes could not be allocated. */
10354 UPB_FORCEINLINE
encode_reserve(upb_encstate * e,size_t bytes)10355 static void encode_reserve(upb_encstate* e, size_t bytes) {
10356   if ((size_t)(e->ptr - e->buf) < bytes) {
10357     encode_growbuffer(e, bytes);
10358     return;
10359   }
10360 
10361   e->ptr -= bytes;
10362 }
10363 
10364 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_encstate * e,const void * data,size_t len)10365 static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
10366   if (len == 0) return; /* memcpy() with zero size is UB */
10367   encode_reserve(e, len);
10368   memcpy(e->ptr, data, len);
10369 }
10370 
encode_fixed64(upb_encstate * e,uint64_t val)10371 static void encode_fixed64(upb_encstate* e, uint64_t val) {
10372   val = _upb_BigEndian_Swap64(val);
10373   encode_bytes(e, &val, sizeof(uint64_t));
10374 }
10375 
encode_fixed32(upb_encstate * e,uint32_t val)10376 static void encode_fixed32(upb_encstate* e, uint32_t val) {
10377   val = _upb_BigEndian_Swap32(val);
10378   encode_bytes(e, &val, sizeof(uint32_t));
10379 }
10380 
10381 UPB_NOINLINE
encode_longvarint(upb_encstate * e,uint64_t val)10382 static void encode_longvarint(upb_encstate* e, uint64_t val) {
10383   size_t len;
10384   char* start;
10385 
10386   encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
10387   len = encode_varint64(val, e->ptr);
10388   start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
10389   memmove(start, e->ptr, len);
10390   e->ptr = start;
10391 }
10392 
10393 UPB_FORCEINLINE
encode_varint(upb_encstate * e,uint64_t val)10394 static void encode_varint(upb_encstate* e, uint64_t val) {
10395   if (val < 128 && e->ptr != e->buf) {
10396     --e->ptr;
10397     *e->ptr = val;
10398   } else {
10399     encode_longvarint(e, val);
10400   }
10401 }
10402 
encode_double(upb_encstate * e,double d)10403 static void encode_double(upb_encstate* e, double d) {
10404   uint64_t u64;
10405   UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
10406   memcpy(&u64, &d, sizeof(uint64_t));
10407   encode_fixed64(e, u64);
10408 }
10409 
encode_float(upb_encstate * e,float d)10410 static void encode_float(upb_encstate* e, float d) {
10411   uint32_t u32;
10412   UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
10413   memcpy(&u32, &d, sizeof(uint32_t));
10414   encode_fixed32(e, u32);
10415 }
10416 
encode_tag(upb_encstate * e,uint32_t field_number,uint8_t wire_type)10417 static void encode_tag(upb_encstate* e, uint32_t field_number,
10418                        uint8_t wire_type) {
10419   encode_varint(e, (field_number << 3) | wire_type);
10420 }
10421 
encode_fixedarray(upb_encstate * e,const upb_Array * arr,size_t elem_size,uint32_t tag)10422 static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
10423                               size_t elem_size, uint32_t tag) {
10424   size_t bytes = arr->len * elem_size;
10425   const char* data = _upb_array_constptr(arr);
10426   const char* ptr = data + bytes - elem_size;
10427 
10428   if (tag || !_upb_IsLittleEndian()) {
10429     while (true) {
10430       if (elem_size == 4) {
10431         uint32_t val;
10432         memcpy(&val, ptr, sizeof(val));
10433         val = _upb_BigEndian_Swap32(val);
10434         encode_bytes(e, &val, elem_size);
10435       } else {
10436         UPB_ASSERT(elem_size == 8);
10437         uint64_t val;
10438         memcpy(&val, ptr, sizeof(val));
10439         val = _upb_BigEndian_Swap64(val);
10440         encode_bytes(e, &val, elem_size);
10441       }
10442 
10443       if (tag) encode_varint(e, tag);
10444       if (ptr == data) break;
10445       ptr -= elem_size;
10446     }
10447   } else {
10448     encode_bytes(e, data, bytes);
10449   }
10450 }
10451 
10452 static void encode_message(upb_encstate* e, const upb_Message* msg,
10453                            const upb_MiniTable* m, size_t* size);
10454 
encode_scalar(upb_encstate * e,const void * _field_mem,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10455 static void encode_scalar(upb_encstate* e, const void* _field_mem,
10456                           const upb_MiniTable_Sub* subs,
10457                           const upb_MiniTable_Field* f) {
10458   const char* field_mem = _field_mem;
10459   int wire_type;
10460 
10461 #define CASE(ctype, type, wtype, encodeval) \
10462   {                                         \
10463     ctype val = *(ctype*)field_mem;         \
10464     encode_##type(e, encodeval);            \
10465     wire_type = wtype;                      \
10466     break;                                  \
10467   }
10468 
10469   switch (f->descriptortype) {
10470     case kUpb_FieldType_Double:
10471       CASE(double, double, kUpb_WireType_64Bit, val);
10472     case kUpb_FieldType_Float:
10473       CASE(float, float, kUpb_WireType_32Bit, val);
10474     case kUpb_FieldType_Int64:
10475     case kUpb_FieldType_UInt64:
10476       CASE(uint64_t, varint, kUpb_WireType_Varint, val);
10477     case kUpb_FieldType_UInt32:
10478       CASE(uint32_t, varint, kUpb_WireType_Varint, val);
10479     case kUpb_FieldType_Int32:
10480     case kUpb_FieldType_Enum:
10481       CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
10482     case kUpb_FieldType_SFixed64:
10483     case kUpb_FieldType_Fixed64:
10484       CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
10485     case kUpb_FieldType_Fixed32:
10486     case kUpb_FieldType_SFixed32:
10487       CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
10488     case kUpb_FieldType_Bool:
10489       CASE(bool, varint, kUpb_WireType_Varint, val);
10490     case kUpb_FieldType_SInt32:
10491       CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
10492     case kUpb_FieldType_SInt64:
10493       CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
10494     case kUpb_FieldType_String:
10495     case kUpb_FieldType_Bytes: {
10496       upb_StringView view = *(upb_StringView*)field_mem;
10497       encode_bytes(e, view.data, view.size);
10498       encode_varint(e, view.size);
10499       wire_type = kUpb_WireType_Delimited;
10500       break;
10501     }
10502     case kUpb_FieldType_Group: {
10503       size_t size;
10504       void* submsg = *(void**)field_mem;
10505       const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10506       if (submsg == NULL) {
10507         return;
10508       }
10509       if (--e->depth == 0) encode_err(e);
10510       encode_tag(e, f->number, kUpb_WireType_EndGroup);
10511       encode_message(e, submsg, subm, &size);
10512       wire_type = kUpb_WireType_StartGroup;
10513       e->depth++;
10514       break;
10515     }
10516     case kUpb_FieldType_Message: {
10517       size_t size;
10518       void* submsg = *(void**)field_mem;
10519       const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10520       if (submsg == NULL) {
10521         return;
10522       }
10523       if (--e->depth == 0) encode_err(e);
10524       encode_message(e, submsg, subm, &size);
10525       encode_varint(e, size);
10526       wire_type = kUpb_WireType_Delimited;
10527       e->depth++;
10528       break;
10529     }
10530     default:
10531       UPB_UNREACHABLE();
10532   }
10533 #undef CASE
10534 
10535   encode_tag(e, f->number, wire_type);
10536 }
10537 
encode_array(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10538 static void encode_array(upb_encstate* e, const upb_Message* msg,
10539                          const upb_MiniTable_Sub* subs,
10540                          const upb_MiniTable_Field* f) {
10541   const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
10542   bool packed = f->mode & kUpb_LabelFlags_IsPacked;
10543   size_t pre_len = e->limit - e->ptr;
10544 
10545   if (arr == NULL || arr->len == 0) {
10546     return;
10547   }
10548 
10549 #define VARINT_CASE(ctype, encode)                                       \
10550   {                                                                      \
10551     const ctype* start = _upb_array_constptr(arr);                       \
10552     const ctype* ptr = start + arr->len;                                 \
10553     uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
10554     do {                                                                 \
10555       ptr--;                                                             \
10556       encode_varint(e, encode);                                          \
10557       if (tag) encode_varint(e, tag);                                    \
10558     } while (ptr != start);                                              \
10559   }                                                                      \
10560   break;
10561 
10562 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
10563 
10564   switch (f->descriptortype) {
10565     case kUpb_FieldType_Double:
10566       encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
10567       break;
10568     case kUpb_FieldType_Float:
10569       encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
10570       break;
10571     case kUpb_FieldType_SFixed64:
10572     case kUpb_FieldType_Fixed64:
10573       encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
10574       break;
10575     case kUpb_FieldType_Fixed32:
10576     case kUpb_FieldType_SFixed32:
10577       encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
10578       break;
10579     case kUpb_FieldType_Int64:
10580     case kUpb_FieldType_UInt64:
10581       VARINT_CASE(uint64_t, *ptr);
10582     case kUpb_FieldType_UInt32:
10583       VARINT_CASE(uint32_t, *ptr);
10584     case kUpb_FieldType_Int32:
10585     case kUpb_FieldType_Enum:
10586       VARINT_CASE(int32_t, (int64_t)*ptr);
10587     case kUpb_FieldType_Bool:
10588       VARINT_CASE(bool, *ptr);
10589     case kUpb_FieldType_SInt32:
10590       VARINT_CASE(int32_t, encode_zz32(*ptr));
10591     case kUpb_FieldType_SInt64:
10592       VARINT_CASE(int64_t, encode_zz64(*ptr));
10593     case kUpb_FieldType_String:
10594     case kUpb_FieldType_Bytes: {
10595       const upb_StringView* start = _upb_array_constptr(arr);
10596       const upb_StringView* ptr = start + arr->len;
10597       do {
10598         ptr--;
10599         encode_bytes(e, ptr->data, ptr->size);
10600         encode_varint(e, ptr->size);
10601         encode_tag(e, f->number, kUpb_WireType_Delimited);
10602       } while (ptr != start);
10603       return;
10604     }
10605     case kUpb_FieldType_Group: {
10606       const void* const* start = _upb_array_constptr(arr);
10607       const void* const* ptr = start + arr->len;
10608       const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10609       if (--e->depth == 0) encode_err(e);
10610       do {
10611         size_t size;
10612         ptr--;
10613         encode_tag(e, f->number, kUpb_WireType_EndGroup);
10614         encode_message(e, *ptr, subm, &size);
10615         encode_tag(e, f->number, kUpb_WireType_StartGroup);
10616       } while (ptr != start);
10617       e->depth++;
10618       return;
10619     }
10620     case kUpb_FieldType_Message: {
10621       const void* const* start = _upb_array_constptr(arr);
10622       const void* const* ptr = start + arr->len;
10623       const upb_MiniTable* subm = subs[f->submsg_index].submsg;
10624       if (--e->depth == 0) encode_err(e);
10625       do {
10626         size_t size;
10627         ptr--;
10628         encode_message(e, *ptr, subm, &size);
10629         encode_varint(e, size);
10630         encode_tag(e, f->number, kUpb_WireType_Delimited);
10631       } while (ptr != start);
10632       e->depth++;
10633       return;
10634     }
10635   }
10636 #undef VARINT_CASE
10637 
10638   if (packed) {
10639     encode_varint(e, e->limit - e->ptr - pre_len);
10640     encode_tag(e, f->number, kUpb_WireType_Delimited);
10641   }
10642 }
10643 
encode_mapentry(upb_encstate * e,uint32_t number,const upb_MiniTable * layout,const upb_MapEntry * ent)10644 static void encode_mapentry(upb_encstate* e, uint32_t number,
10645                             const upb_MiniTable* layout,
10646                             const upb_MapEntry* ent) {
10647   const upb_MiniTable_Field* key_field = &layout->fields[0];
10648   const upb_MiniTable_Field* val_field = &layout->fields[1];
10649   size_t pre_len = e->limit - e->ptr;
10650   size_t size;
10651   encode_scalar(e, &ent->v, layout->subs, val_field);
10652   encode_scalar(e, &ent->k, layout->subs, key_field);
10653   size = (e->limit - e->ptr) - pre_len;
10654   encode_varint(e, size);
10655   encode_tag(e, number, kUpb_WireType_Delimited);
10656 }
10657 
encode_map(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10658 static void encode_map(upb_encstate* e, const upb_Message* msg,
10659                        const upb_MiniTable_Sub* subs,
10660                        const upb_MiniTable_Field* f) {
10661   const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
10662   const upb_MiniTable* layout = subs[f->submsg_index].submsg;
10663   UPB_ASSERT(layout->field_count == 2);
10664 
10665   if (map == NULL) return;
10666 
10667   if (e->options & kUpb_Encode_Deterministic) {
10668     _upb_sortedmap sorted;
10669     _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
10670                            &sorted);
10671     upb_MapEntry ent;
10672     while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
10673       encode_mapentry(e, f->number, layout, &ent);
10674     }
10675     _upb_mapsorter_popmap(&e->sorter, &sorted);
10676   } else {
10677     upb_strtable_iter i;
10678     upb_strtable_begin(&i, &map->table);
10679     for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
10680       upb_StringView key = upb_strtable_iter_key(&i);
10681       const upb_value val = upb_strtable_iter_value(&i);
10682       upb_MapEntry ent;
10683       _upb_map_fromkey(key, &ent.k, map->key_size);
10684       _upb_map_fromvalue(val, &ent.v, map->val_size);
10685       encode_mapentry(e, f->number, layout, &ent);
10686     }
10687   }
10688 }
10689 
encode_shouldencode(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * f)10690 static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
10691                                 const upb_MiniTable_Sub* subs,
10692                                 const upb_MiniTable_Field* f) {
10693   if (f->presence == 0) {
10694     /* Proto3 presence or map/array. */
10695     const void* mem = UPB_PTR_AT(msg, f->offset, void);
10696     switch (f->mode >> kUpb_FieldRep_Shift) {
10697       case kUpb_FieldRep_1Byte: {
10698         char ch;
10699         memcpy(&ch, mem, 1);
10700         return ch != 0;
10701       }
10702 #if UINTPTR_MAX == 0xffffffff
10703       case kUpb_FieldRep_Pointer:
10704 #endif
10705       case kUpb_FieldRep_4Byte: {
10706         uint32_t u32;
10707         memcpy(&u32, mem, 4);
10708         return u32 != 0;
10709       }
10710 #if UINTPTR_MAX != 0xffffffff
10711       case kUpb_FieldRep_Pointer:
10712 #endif
10713       case kUpb_FieldRep_8Byte: {
10714         uint64_t u64;
10715         memcpy(&u64, mem, 8);
10716         return u64 != 0;
10717       }
10718       case kUpb_FieldRep_StringView: {
10719         const upb_StringView* str = (const upb_StringView*)mem;
10720         return str->size != 0;
10721       }
10722       default:
10723         UPB_UNREACHABLE();
10724     }
10725   } else if (f->presence > 0) {
10726     /* Proto2 presence: hasbit. */
10727     return _upb_hasbit_field(msg, f);
10728   } else {
10729     /* Field is in a oneof. */
10730     return _upb_getoneofcase_field(msg, f) == f->number;
10731   }
10732 }
10733 
encode_field(upb_encstate * e,const upb_Message * msg,const upb_MiniTable_Sub * subs,const upb_MiniTable_Field * field)10734 static void encode_field(upb_encstate* e, const upb_Message* msg,
10735                          const upb_MiniTable_Sub* subs,
10736                          const upb_MiniTable_Field* field) {
10737   switch (upb_FieldMode_Get(field)) {
10738     case kUpb_FieldMode_Array:
10739       encode_array(e, msg, subs, field);
10740       break;
10741     case kUpb_FieldMode_Map:
10742       encode_map(e, msg, subs, field);
10743       break;
10744     case kUpb_FieldMode_Scalar:
10745       encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
10746       break;
10747     default:
10748       UPB_UNREACHABLE();
10749   }
10750 }
10751 
10752 /* message MessageSet {
10753  *   repeated group Item = 1 {
10754  *     required int32 type_id = 2;
10755  *     required string message = 3;
10756  *   }
10757  * } */
encode_msgset_item(upb_encstate * e,const upb_Message_Extension * ext)10758 static void encode_msgset_item(upb_encstate* e,
10759                                const upb_Message_Extension* ext) {
10760   size_t size;
10761   encode_tag(e, 1, kUpb_WireType_EndGroup);
10762   encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size);
10763   encode_varint(e, size);
10764   encode_tag(e, 3, kUpb_WireType_Delimited);
10765   encode_varint(e, ext->ext->field.number);
10766   encode_tag(e, 2, kUpb_WireType_Varint);
10767   encode_tag(e, 1, kUpb_WireType_StartGroup);
10768 }
10769 
encode_message(upb_encstate * e,const upb_Message * msg,const upb_MiniTable * m,size_t * size)10770 static void encode_message(upb_encstate* e, const upb_Message* msg,
10771                            const upb_MiniTable* m, size_t* size) {
10772   size_t pre_len = e->limit - e->ptr;
10773 
10774   if ((e->options & kUpb_Encode_CheckRequired) && m->required_count) {
10775     uint64_t msg_head;
10776     memcpy(&msg_head, msg, 8);
10777     msg_head = _upb_BigEndian_Swap64(msg_head);
10778     if (upb_MiniTable_requiredmask(m) & ~msg_head) {
10779       encode_err(e);
10780     }
10781   }
10782 
10783   if ((e->options & kUpb_Encode_SkipUnknown) == 0) {
10784     size_t unknown_size;
10785     const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
10786 
10787     if (unknown) {
10788       encode_bytes(e, unknown, unknown_size);
10789     }
10790   }
10791 
10792   if (m->ext != kUpb_ExtMode_NonExtendable) {
10793     /* Encode all extensions together. Unlike C++, we do not attempt to keep
10794      * these in field number order relative to normal fields or even to each
10795      * other. */
10796     size_t ext_count;
10797     const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
10798     if (ext_count) {
10799       const upb_Message_Extension* end = ext + ext_count;
10800       for (; ext != end; ext++) {
10801         if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) {
10802           encode_msgset_item(e, ext);
10803         } else {
10804           encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field);
10805         }
10806       }
10807     }
10808   }
10809 
10810   if (m->field_count) {
10811     const upb_MiniTable_Field* f = &m->fields[m->field_count];
10812     const upb_MiniTable_Field* first = &m->fields[0];
10813     while (f != first) {
10814       f--;
10815       if (encode_shouldencode(e, msg, m->subs, f)) {
10816         encode_field(e, msg, m->subs, f);
10817       }
10818     }
10819   }
10820 
10821   *size = (e->limit - e->ptr) - pre_len;
10822 }
10823 
upb_Encode(const void * msg,const upb_MiniTable * l,int options,upb_Arena * arena,size_t * size)10824 char* upb_Encode(const void* msg, const upb_MiniTable* l, int options,
10825                  upb_Arena* arena, size_t* size) {
10826   upb_encstate e;
10827   unsigned depth = (unsigned)options >> 16;
10828 
10829   e.alloc = upb_Arena_Alloc(arena);
10830   e.buf = NULL;
10831   e.limit = NULL;
10832   e.ptr = NULL;
10833   e.depth = depth ? depth : 64;
10834   e.options = options;
10835   _upb_mapsorter_init(&e.sorter);
10836   char* ret = NULL;
10837 
10838   if (UPB_SETJMP(e.err)) {
10839     *size = 0;
10840     ret = NULL;
10841   } else {
10842     encode_message(&e, msg, l, size);
10843     *size = e.limit - e.ptr;
10844     if (*size == 0) {
10845       static char ch;
10846       ret = &ch;
10847     } else {
10848       UPB_ASSERT(e.ptr);
10849       ret = e.ptr;
10850     }
10851   }
10852 
10853   _upb_mapsorter_destroy(&e.sorter);
10854   return ret;
10855 }
10856 
10857 /** upb/msg.c ************************************************************/
10858 
10859 
10860 /** upb_Message ***************************************************************/
10861 
10862 static const size_t overhead = sizeof(upb_Message_InternalData);
10863 
upb_Message_Getinternal_const(const upb_Message * msg)10864 static const upb_Message_Internal* upb_Message_Getinternal_const(
10865     const upb_Message* msg) {
10866   ptrdiff_t size = sizeof(upb_Message_Internal);
10867   return (upb_Message_Internal*)((char*)msg - size);
10868 }
10869 
_upb_Message_New(const upb_MiniTable * l,upb_Arena * a)10870 upb_Message* _upb_Message_New(const upb_MiniTable* l, upb_Arena* a) {
10871   return _upb_Message_New_inl(l, a);
10872 }
10873 
_upb_Message_Clear(upb_Message * msg,const upb_MiniTable * l)10874 void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) {
10875   void* mem = UPB_PTR_AT(msg, -sizeof(upb_Message_Internal), char);
10876   memset(mem, 0, upb_msg_sizeof(l));
10877 }
10878 
realloc_internal(upb_Message * msg,size_t need,upb_Arena * arena)10879 static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
10880   upb_Message_Internal* in = upb_Message_Getinternal(msg);
10881   if (!in->internal) {
10882     /* No internal data, allocate from scratch. */
10883     size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead));
10884     upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
10885     if (!internal) return false;
10886     internal->size = size;
10887     internal->unknown_end = overhead;
10888     internal->ext_begin = size;
10889     in->internal = internal;
10890   } else if (in->internal->ext_begin - in->internal->unknown_end < need) {
10891     /* Internal data is too small, reallocate. */
10892     size_t new_size = _upb_Log2CeilingSize(in->internal->size + need);
10893     size_t ext_bytes = in->internal->size - in->internal->ext_begin;
10894     size_t new_ext_begin = new_size - ext_bytes;
10895     upb_Message_InternalData* internal =
10896         upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size);
10897     if (!internal) return false;
10898     if (ext_bytes) {
10899       /* Need to move extension data to the end. */
10900       char* ptr = (char*)internal;
10901       memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
10902     }
10903     internal->ext_begin = new_ext_begin;
10904     internal->size = new_size;
10905     in->internal = internal;
10906   }
10907   UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
10908   return true;
10909 }
10910 
_upb_Message_AddUnknown(upb_Message * msg,const char * data,size_t len,upb_Arena * arena)10911 bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
10912                              upb_Arena* arena) {
10913   if (!realloc_internal(msg, len, arena)) return false;
10914   upb_Message_Internal* in = upb_Message_Getinternal(msg);
10915   memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
10916   in->internal->unknown_end += len;
10917   return true;
10918 }
10919 
_upb_Message_DiscardUnknown_shallow(upb_Message * msg)10920 void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) {
10921   upb_Message_Internal* in = upb_Message_Getinternal(msg);
10922   if (in->internal) {
10923     in->internal->unknown_end = overhead;
10924   }
10925 }
10926 
upb_Message_GetUnknown(const upb_Message * msg,size_t * len)10927 const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) {
10928   const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
10929   if (in->internal) {
10930     *len = in->internal->unknown_end - overhead;
10931     return (char*)(in->internal + 1);
10932   } else {
10933     *len = 0;
10934     return NULL;
10935   }
10936 }
10937 
_upb_Message_Getexts(const upb_Message * msg,size_t * count)10938 const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
10939                                                   size_t* count) {
10940   const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
10941   if (in->internal) {
10942     *count = (in->internal->size - in->internal->ext_begin) /
10943              sizeof(upb_Message_Extension);
10944     return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10945   } else {
10946     *count = 0;
10947     return NULL;
10948   }
10949 }
10950 
_upb_Message_Getext(const upb_Message * msg,const upb_MiniTable_Extension * e)10951 const upb_Message_Extension* _upb_Message_Getext(
10952     const upb_Message* msg, const upb_MiniTable_Extension* e) {
10953   size_t n;
10954   const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n);
10955 
10956   /* For now we use linear search exclusively to find extensions. If this
10957    * becomes an issue due to messages with lots of extensions, we can introduce
10958    * a table of some sort. */
10959   for (size_t i = 0; i < n; i++) {
10960     if (ext[i].ext == e) {
10961       return &ext[i];
10962     }
10963   }
10964 
10965   return NULL;
10966 }
10967 
_upb_Message_Clearext(upb_Message * msg,const upb_MiniTable_Extension * ext_l)10968 void _upb_Message_Clearext(upb_Message* msg,
10969                            const upb_MiniTable_Extension* ext_l) {
10970   upb_Message_Internal* in = upb_Message_Getinternal(msg);
10971   if (!in->internal) return;
10972   const upb_Message_Extension* base =
10973       UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10974   upb_Message_Extension* ext =
10975       (upb_Message_Extension*)_upb_Message_Getext(msg, ext_l);
10976   if (ext) {
10977     *ext = *base;
10978     in->internal->ext_begin += sizeof(upb_Message_Extension);
10979   }
10980 }
10981 
_upb_Message_GetOrCreateExtension(upb_Message * msg,const upb_MiniTable_Extension * e,upb_Arena * arena)10982 upb_Message_Extension* _upb_Message_GetOrCreateExtension(
10983     upb_Message* msg, const upb_MiniTable_Extension* e, upb_Arena* arena) {
10984   upb_Message_Extension* ext =
10985       (upb_Message_Extension*)_upb_Message_Getext(msg, e);
10986   if (ext) return ext;
10987   if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL;
10988   upb_Message_Internal* in = upb_Message_Getinternal(msg);
10989   in->internal->ext_begin -= sizeof(upb_Message_Extension);
10990   ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
10991   memset(ext, 0, sizeof(upb_Message_Extension));
10992   ext->ext = e;
10993   return ext;
10994 }
10995 
upb_Message_ExtensionCount(const upb_Message * msg)10996 size_t upb_Message_ExtensionCount(const upb_Message* msg) {
10997   size_t count;
10998   _upb_Message_Getexts(msg, &count);
10999   return count;
11000 }
11001 
11002 /** upb_Array *****************************************************************/
11003 
_upb_array_realloc(upb_Array * arr,size_t min_size,upb_Arena * arena)11004 bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena) {
11005   size_t new_size = UPB_MAX(arr->size, 4);
11006   int elem_size_lg2 = arr->data & 7;
11007   size_t old_bytes = arr->size << elem_size_lg2;
11008   size_t new_bytes;
11009   void* ptr = _upb_array_ptr(arr);
11010 
11011   /* Log2 ceiling of size. */
11012   while (new_size < min_size) new_size *= 2;
11013 
11014   new_bytes = new_size << elem_size_lg2;
11015   ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes);
11016 
11017   if (!ptr) {
11018     return false;
11019   }
11020 
11021   arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
11022   arr->size = new_size;
11023   return true;
11024 }
11025 
getorcreate_array(upb_Array ** arr_ptr,int elem_size_lg2,upb_Arena * arena)11026 static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2,
11027                                     upb_Arena* arena) {
11028   upb_Array* arr = *arr_ptr;
11029   if (!arr) {
11030     arr = _upb_Array_New(arena, 4, elem_size_lg2);
11031     if (!arr) return NULL;
11032     *arr_ptr = arr;
11033   }
11034   return arr;
11035 }
11036 
_upb_Array_Resize_fallback(upb_Array ** arr_ptr,size_t size,int elem_size_lg2,upb_Arena * arena)11037 void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size,
11038                                  int elem_size_lg2, upb_Arena* arena) {
11039   upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
11040   return arr && _upb_Array_Resize(arr, size, arena) ? _upb_array_ptr(arr)
11041                                                     : NULL;
11042 }
11043 
_upb_Array_Append_fallback(upb_Array ** arr_ptr,const void * value,int elem_size_lg2,upb_Arena * arena)11044 bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value,
11045                                 int elem_size_lg2, upb_Arena* arena) {
11046   upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
11047   if (!arr) return false;
11048 
11049   size_t elems = arr->len;
11050 
11051   if (!_upb_Array_Resize(arr, elems + 1, arena)) {
11052     return false;
11053   }
11054 
11055   char* data = _upb_array_ptr(arr);
11056   memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
11057   return true;
11058 }
11059 
11060 /** upb_Map *******************************************************************/
11061 
_upb_Map_New(upb_Arena * a,size_t key_size,size_t value_size)11062 upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) {
11063   upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map));
11064 
11065   if (!map) {
11066     return NULL;
11067   }
11068 
11069   upb_strtable_init(&map->table, 4, a);
11070   map->key_size = key_size;
11071   map->val_size = value_size;
11072 
11073   return map;
11074 }
11075 
_upb_mapsorter_getkeys(const void * _a,const void * _b,void * a_key,void * b_key,size_t size)11076 static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key,
11077                                    void* b_key, size_t size) {
11078   const upb_tabent* const* a = _a;
11079   const upb_tabent* const* b = _b;
11080   upb_StringView a_tabkey = upb_tabstrview((*a)->key);
11081   upb_StringView b_tabkey = upb_tabstrview((*b)->key);
11082   _upb_map_fromkey(a_tabkey, a_key, size);
11083   _upb_map_fromkey(b_tabkey, b_key, size);
11084 }
11085 
11086 #define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1))
11087 
_upb_mapsorter_cmpi64(const void * _a,const void * _b)11088 static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) {
11089   int64_t a, b;
11090   _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
11091   return UPB_COMPARE_INTEGERS(a, b);
11092 }
11093 
_upb_mapsorter_cmpu64(const void * _a,const void * _b)11094 static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) {
11095   uint64_t a, b;
11096   _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
11097   return UPB_COMPARE_INTEGERS(a, b);
11098 }
11099 
_upb_mapsorter_cmpi32(const void * _a,const void * _b)11100 static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) {
11101   int32_t a, b;
11102   _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
11103   return UPB_COMPARE_INTEGERS(a, b);
11104 }
11105 
_upb_mapsorter_cmpu32(const void * _a,const void * _b)11106 static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) {
11107   uint32_t a, b;
11108   _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
11109   return UPB_COMPARE_INTEGERS(a, b);
11110 }
11111 
_upb_mapsorter_cmpbool(const void * _a,const void * _b)11112 static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) {
11113   bool a, b;
11114   _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
11115   return UPB_COMPARE_INTEGERS(a, b);
11116 }
11117 
_upb_mapsorter_cmpstr(const void * _a,const void * _b)11118 static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) {
11119   upb_StringView a, b;
11120   _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
11121   size_t common_size = UPB_MIN(a.size, b.size);
11122   int cmp = memcmp(a.data, b.data, common_size);
11123   if (cmp) return -cmp;
11124   return UPB_COMPARE_INTEGERS(a.size, b.size);
11125 }
11126 
11127 #undef UPB_COMPARE_INTEGERS
11128 
_upb_mapsorter_pushmap(_upb_mapsorter * s,upb_FieldType key_type,const upb_Map * map,_upb_sortedmap * sorted)11129 bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
11130                             const upb_Map* map, _upb_sortedmap* sorted) {
11131   int map_size = _upb_Map_Size(map);
11132   sorted->start = s->size;
11133   sorted->pos = sorted->start;
11134   sorted->end = sorted->start + map_size;
11135 
11136   /* Grow s->entries if necessary. */
11137   if (sorted->end > s->cap) {
11138     s->cap = _upb_Log2CeilingSize(sorted->end);
11139     s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
11140     if (!s->entries) return false;
11141   }
11142 
11143   s->size = sorted->end;
11144 
11145   /* Copy non-empty entries from the table to s->entries. */
11146   upb_tabent const** dst = &s->entries[sorted->start];
11147   const upb_tabent* src = map->table.t.entries;
11148   const upb_tabent* end = src + upb_table_size(&map->table.t);
11149   for (; src < end; src++) {
11150     if (!upb_tabent_isempty(src)) {
11151       *dst = src;
11152       dst++;
11153     }
11154   }
11155   UPB_ASSERT(dst == &s->entries[sorted->end]);
11156 
11157   /* Sort entries according to the key type. */
11158 
11159   int (*compar)(const void*, const void*);
11160 
11161   switch (key_type) {
11162     case kUpb_FieldType_Int64:
11163     case kUpb_FieldType_SFixed64:
11164     case kUpb_FieldType_SInt64:
11165       compar = _upb_mapsorter_cmpi64;
11166       break;
11167     case kUpb_FieldType_UInt64:
11168     case kUpb_FieldType_Fixed64:
11169       compar = _upb_mapsorter_cmpu64;
11170       break;
11171     case kUpb_FieldType_Int32:
11172     case kUpb_FieldType_SInt32:
11173     case kUpb_FieldType_SFixed32:
11174     case kUpb_FieldType_Enum:
11175       compar = _upb_mapsorter_cmpi32;
11176       break;
11177     case kUpb_FieldType_UInt32:
11178     case kUpb_FieldType_Fixed32:
11179       compar = _upb_mapsorter_cmpu32;
11180       break;
11181     case kUpb_FieldType_Bool:
11182       compar = _upb_mapsorter_cmpbool;
11183       break;
11184     case kUpb_FieldType_String:
11185     case kUpb_FieldType_Bytes:
11186       compar = _upb_mapsorter_cmpstr;
11187       break;
11188     default:
11189       UPB_UNREACHABLE();
11190   }
11191 
11192   qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
11193   return true;
11194 }
11195 
11196 /** upb_ExtensionRegistry *****************************************************/
11197 
11198 struct upb_ExtensionRegistry {
11199   upb_Arena* arena;
11200   upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */
11201 };
11202 
11203 #define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t))
11204 
extreg_key(char * buf,const upb_MiniTable * l,uint32_t fieldnum)11205 static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) {
11206   memcpy(buf, &l, sizeof(l));
11207   memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
11208 }
11209 
upb_ExtensionRegistry_New(upb_Arena * arena)11210 upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) {
11211   upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r));
11212   if (!r) return NULL;
11213   r->arena = arena;
11214   if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
11215   return r;
11216 }
11217 
_upb_extreg_add(upb_ExtensionRegistry * r,const upb_MiniTable_Extension ** e,size_t count)11218 bool _upb_extreg_add(upb_ExtensionRegistry* r,
11219                      const upb_MiniTable_Extension** e, size_t count) {
11220   char buf[EXTREG_KEY_SIZE];
11221   const upb_MiniTable_Extension** start = e;
11222   const upb_MiniTable_Extension** end = UPB_PTRADD(e, count);
11223   for (; e < end; e++) {
11224     const upb_MiniTable_Extension* ext = *e;
11225     extreg_key(buf, ext->extendee, ext->field.number);
11226     upb_value v;
11227     if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
11228       goto failure;
11229     }
11230     if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
11231                              upb_value_constptr(ext), r->arena)) {
11232       goto failure;
11233     }
11234   }
11235   return true;
11236 
11237 failure:
11238   /* Back out the entries previously added. */
11239   for (end = e, e = start; e < end; e++) {
11240     const upb_MiniTable_Extension* ext = *e;
11241     extreg_key(buf, ext->extendee, ext->field.number);
11242     upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
11243   }
11244   return false;
11245 }
11246 
_upb_extreg_get(const upb_ExtensionRegistry * r,const upb_MiniTable * l,uint32_t num)11247 const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r,
11248                                                const upb_MiniTable* l,
11249                                                uint32_t num) {
11250   char buf[EXTREG_KEY_SIZE];
11251   upb_value v;
11252   extreg_key(buf, l, num);
11253   if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
11254     return upb_value_getconstptr(v);
11255   } else {
11256     return NULL;
11257   }
11258 }
11259 
11260 /** upb/table.c ************************************************************/
11261 /*
11262  * upb_table Implementation
11263  *
11264  * Implementation is heavily inspired by Lua's ltable.c.
11265  */
11266 
11267 #include <string.h>
11268 
11269 
11270 /* Must be last. */
11271 
11272 #define UPB_MAXARRSIZE 16 /* 64k. */
11273 
11274 /* From Chromium. */
11275 #define ARRAY_SIZE(x) \
11276   ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
11277 
11278 static const double MAX_LOAD = 0.85;
11279 
11280 /* The minimum utilization of the array part of a mixed hash/array table.  This
11281  * is a speed/memory-usage tradeoff (though it's not straightforward because of
11282  * cache effects).  The lower this is, the more memory we'll use. */
11283 static const double MIN_DENSITY = 0.1;
11284 
is_pow2(uint64_t v)11285 static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
11286 
_upb_value_val(uint64_t val)11287 static upb_value _upb_value_val(uint64_t val) {
11288   upb_value ret;
11289   _upb_value_setval(&ret, val);
11290   return ret;
11291 }
11292 
log2ceil(uint64_t v)11293 static int log2ceil(uint64_t v) {
11294   int ret = 0;
11295   bool pow2 = is_pow2(v);
11296   while (v >>= 1) ret++;
11297   ret = pow2 ? ret : ret + 1; /* Ceiling. */
11298   return UPB_MIN(UPB_MAXARRSIZE, ret);
11299 }
11300 
upb_strdup2(const char * s,size_t len,upb_Arena * a)11301 char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
11302   size_t n;
11303   char* p;
11304 
11305   /* Prevent overflow errors. */
11306   if (len == SIZE_MAX) return NULL;
11307   /* Always null-terminate, even if binary data; but don't rely on the input to
11308    * have a null-terminating byte since it may be a raw binary buffer. */
11309   n = len + 1;
11310   p = upb_Arena_Malloc(a, n);
11311   if (p) {
11312     memcpy(p, s, len);
11313     p[len] = 0;
11314   }
11315   return p;
11316 }
11317 
11318 /* A type to represent the lookup key of either a strtable or an inttable. */
11319 typedef union {
11320   uintptr_t num;
11321   struct {
11322     const char* str;
11323     size_t len;
11324   } str;
11325 } lookupkey_t;
11326 
strkey2(const char * str,size_t len)11327 static lookupkey_t strkey2(const char* str, size_t len) {
11328   lookupkey_t k;
11329   k.str.str = str;
11330   k.str.len = len;
11331   return k;
11332 }
11333 
intkey(uintptr_t key)11334 static lookupkey_t intkey(uintptr_t key) {
11335   lookupkey_t k;
11336   k.num = key;
11337   return k;
11338 }
11339 
11340 typedef uint32_t hashfunc_t(upb_tabkey key);
11341 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
11342 
11343 /* Base table (shared code) ***************************************************/
11344 
upb_inthash(uintptr_t key)11345 static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
11346 
upb_getentry(const upb_table * t,uint32_t hash)11347 static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
11348   return t->entries + (hash & t->mask);
11349 }
11350 
upb_arrhas(upb_tabval key)11351 static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
11352 
isfull(upb_table * t)11353 static bool isfull(upb_table* t) { return t->count == t->max_count; }
11354 
init(upb_table * t,uint8_t size_lg2,upb_Arena * a)11355 static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
11356   size_t bytes;
11357 
11358   t->count = 0;
11359   t->size_lg2 = size_lg2;
11360   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
11361   t->max_count = upb_table_size(t) * MAX_LOAD;
11362   bytes = upb_table_size(t) * sizeof(upb_tabent);
11363   if (bytes > 0) {
11364     t->entries = upb_Arena_Malloc(a, bytes);
11365     if (!t->entries) return false;
11366     memset(t->entries, 0, bytes);
11367   } else {
11368     t->entries = NULL;
11369   }
11370   return true;
11371 }
11372 
emptyent(upb_table * t,upb_tabent * e)11373 static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
11374   upb_tabent* begin = t->entries;
11375   upb_tabent* end = begin + upb_table_size(t);
11376   for (e = e + 1; e < end; e++) {
11377     if (upb_tabent_isempty(e)) return e;
11378   }
11379   for (e = begin; e < end; e++) {
11380     if (upb_tabent_isempty(e)) return e;
11381   }
11382   UPB_ASSERT(false);
11383   return NULL;
11384 }
11385 
getentry_mutable(upb_table * t,uint32_t hash)11386 static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
11387   return (upb_tabent*)upb_getentry(t, hash);
11388 }
11389 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)11390 static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
11391                                    uint32_t hash, eqlfunc_t* eql) {
11392   const upb_tabent* e;
11393 
11394   if (t->size_lg2 == 0) return NULL;
11395   e = upb_getentry(t, hash);
11396   if (upb_tabent_isempty(e)) return NULL;
11397   while (1) {
11398     if (eql(e->key, key)) return e;
11399     if ((e = e->next) == NULL) return NULL;
11400   }
11401 }
11402 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)11403 static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
11404                                      uint32_t hash, eqlfunc_t* eql) {
11405   return (upb_tabent*)findentry(t, key, hash, eql);
11406 }
11407 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)11408 static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
11409                    uint32_t hash, eqlfunc_t* eql) {
11410   const upb_tabent* e = findentry(t, key, hash, eql);
11411   if (e) {
11412     if (v) {
11413       _upb_value_setval(v, e->val.val);
11414     }
11415     return true;
11416   } else {
11417     return false;
11418   }
11419 }
11420 
11421 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)11422 static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
11423                    upb_value val, uint32_t hash, hashfunc_t* hashfunc,
11424                    eqlfunc_t* eql) {
11425   upb_tabent* mainpos_e;
11426   upb_tabent* our_e;
11427 
11428   UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
11429 
11430   t->count++;
11431   mainpos_e = getentry_mutable(t, hash);
11432   our_e = mainpos_e;
11433 
11434   if (upb_tabent_isempty(mainpos_e)) {
11435     /* Our main position is empty; use it. */
11436     our_e->next = NULL;
11437   } else {
11438     /* Collision. */
11439     upb_tabent* new_e = emptyent(t, mainpos_e);
11440     /* Head of collider's chain. */
11441     upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
11442     if (chain == mainpos_e) {
11443       /* Existing ent is in its main position (it has the same hash as us, and
11444        * is the head of our chain).  Insert to new ent and append to this chain.
11445        */
11446       new_e->next = mainpos_e->next;
11447       mainpos_e->next = new_e;
11448       our_e = new_e;
11449     } else {
11450       /* Existing ent is not in its main position (it is a node in some other
11451        * chain).  This implies that no existing ent in the table has our hash.
11452        * Evict it (updating its chain) and use its ent for head of our chain. */
11453       *new_e = *mainpos_e; /* copies next. */
11454       while (chain->next != mainpos_e) {
11455         chain = (upb_tabent*)chain->next;
11456         UPB_ASSERT(chain);
11457       }
11458       chain->next = new_e;
11459       our_e = mainpos_e;
11460       our_e->next = NULL;
11461     }
11462   }
11463   our_e->key = tabkey;
11464   our_e->val.val = val.val;
11465   UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
11466 }
11467 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)11468 static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
11469                upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
11470   upb_tabent* chain = getentry_mutable(t, hash);
11471   if (upb_tabent_isempty(chain)) return false;
11472   if (eql(chain->key, key)) {
11473     /* Element to remove is at the head of its chain. */
11474     t->count--;
11475     if (val) _upb_value_setval(val, chain->val.val);
11476     if (removed) *removed = chain->key;
11477     if (chain->next) {
11478       upb_tabent* move = (upb_tabent*)chain->next;
11479       *chain = *move;
11480       move->key = 0; /* Make the slot empty. */
11481     } else {
11482       chain->key = 0; /* Make the slot empty. */
11483     }
11484     return true;
11485   } else {
11486     /* Element to remove is either in a non-head position or not in the
11487      * table. */
11488     while (chain->next && !eql(chain->next->key, key)) {
11489       chain = (upb_tabent*)chain->next;
11490     }
11491     if (chain->next) {
11492       /* Found element to remove. */
11493       upb_tabent* rm = (upb_tabent*)chain->next;
11494       t->count--;
11495       if (val) _upb_value_setval(val, chain->next->val.val);
11496       if (removed) *removed = rm->key;
11497       rm->key = 0; /* Make the slot empty. */
11498       chain->next = rm->next;
11499       return true;
11500     } else {
11501       /* Element to remove is not in the table. */
11502       return false;
11503     }
11504   }
11505 }
11506 
next(const upb_table * t,size_t i)11507 static size_t next(const upb_table* t, size_t i) {
11508   do {
11509     if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
11510   } while (upb_tabent_isempty(&t->entries[i]));
11511 
11512   return i;
11513 }
11514 
begin(const upb_table * t)11515 static size_t begin(const upb_table* t) { return next(t, -1); }
11516 
11517 /* upb_strtable ***************************************************************/
11518 
11519 /* A simple "subclass" of upb_table that only adds a hash function for strings.
11520  */
11521 
strcopy(lookupkey_t k2,upb_Arena * a)11522 static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
11523   uint32_t len = (uint32_t)k2.str.len;
11524   char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
11525   if (str == NULL) return 0;
11526   memcpy(str, &len, sizeof(uint32_t));
11527   if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
11528   str[sizeof(uint32_t) + k2.str.len] = '\0';
11529   return (uintptr_t)str;
11530 }
11531 
11532 /* Adapted from ABSL's wyhash. */
11533 
UnalignedLoad64(const void * p)11534 static uint64_t UnalignedLoad64(const void* p) {
11535   uint64_t val;
11536   memcpy(&val, p, 8);
11537   return val;
11538 }
11539 
UnalignedLoad32(const void * p)11540 static uint32_t UnalignedLoad32(const void* p) {
11541   uint32_t val;
11542   memcpy(&val, p, 4);
11543   return val;
11544 }
11545 
11546 #if defined(_MSC_VER) && defined(_M_X64)
11547 #include <intrin.h>
11548 #endif
11549 
11550 /* Computes a * b, returning the low 64 bits of the result and storing the high
11551  * 64 bits in |*high|. */
upb_umul128(uint64_t v0,uint64_t v1,uint64_t * out_high)11552 static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
11553 #ifdef __SIZEOF_INT128__
11554   __uint128_t p = v0;
11555   p *= v1;
11556   *out_high = (uint64_t)(p >> 64);
11557   return (uint64_t)p;
11558 #elif defined(_MSC_VER) && defined(_M_X64)
11559   return _umul128(v0, v1, out_high);
11560 #else
11561   uint64_t a32 = v0 >> 32;
11562   uint64_t a00 = v0 & 0xffffffff;
11563   uint64_t b32 = v1 >> 32;
11564   uint64_t b00 = v1 & 0xffffffff;
11565   uint64_t high = a32 * b32;
11566   uint64_t low = a00 * b00;
11567   uint64_t mid1 = a32 * b00;
11568   uint64_t mid2 = a00 * b32;
11569   low += (mid1 << 32) + (mid2 << 32);
11570   // Omit carry bit, for mixing we do not care about exact numerical precision.
11571   high += (mid1 >> 32) + (mid2 >> 32);
11572   *out_high = high;
11573   return low;
11574 #endif
11575 }
11576 
WyhashMix(uint64_t v0,uint64_t v1)11577 static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
11578   uint64_t high;
11579   uint64_t low = upb_umul128(v0, v1, &high);
11580   return low ^ high;
11581 }
11582 
Wyhash(const void * data,size_t len,uint64_t seed,const uint64_t salt[])11583 static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
11584                        const uint64_t salt[]) {
11585   const uint8_t* ptr = (const uint8_t*)data;
11586   uint64_t starting_length = (uint64_t)len;
11587   uint64_t current_state = seed ^ salt[0];
11588 
11589   if (len > 64) {
11590     // If we have more than 64 bytes, we're going to handle chunks of 64
11591     // bytes at a time. We're going to build up two separate hash states
11592     // which we will then hash together.
11593     uint64_t duplicated_state = current_state;
11594 
11595     do {
11596       uint64_t a = UnalignedLoad64(ptr);
11597       uint64_t b = UnalignedLoad64(ptr + 8);
11598       uint64_t c = UnalignedLoad64(ptr + 16);
11599       uint64_t d = UnalignedLoad64(ptr + 24);
11600       uint64_t e = UnalignedLoad64(ptr + 32);
11601       uint64_t f = UnalignedLoad64(ptr + 40);
11602       uint64_t g = UnalignedLoad64(ptr + 48);
11603       uint64_t h = UnalignedLoad64(ptr + 56);
11604 
11605       uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
11606       uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
11607       current_state = (cs0 ^ cs1);
11608 
11609       uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
11610       uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
11611       duplicated_state = (ds0 ^ ds1);
11612 
11613       ptr += 64;
11614       len -= 64;
11615     } while (len > 64);
11616 
11617     current_state = current_state ^ duplicated_state;
11618   }
11619 
11620   // We now have a data `ptr` with at most 64 bytes and the current state
11621   // of the hashing state machine stored in current_state.
11622   while (len > 16) {
11623     uint64_t a = UnalignedLoad64(ptr);
11624     uint64_t b = UnalignedLoad64(ptr + 8);
11625 
11626     current_state = WyhashMix(a ^ salt[1], b ^ current_state);
11627 
11628     ptr += 16;
11629     len -= 16;
11630   }
11631 
11632   // We now have a data `ptr` with at most 16 bytes.
11633   uint64_t a = 0;
11634   uint64_t b = 0;
11635   if (len > 8) {
11636     // When we have at least 9 and at most 16 bytes, set A to the first 64
11637     // bits of the input and B to the last 64 bits of the input. Yes, they will
11638     // overlap in the middle if we are working with less than the full 16
11639     // bytes.
11640     a = UnalignedLoad64(ptr);
11641     b = UnalignedLoad64(ptr + len - 8);
11642   } else if (len > 3) {
11643     // If we have at least 4 and at most 8 bytes, set A to the first 32
11644     // bits and B to the last 32 bits.
11645     a = UnalignedLoad32(ptr);
11646     b = UnalignedLoad32(ptr + len - 4);
11647   } else if (len > 0) {
11648     // If we have at least 1 and at most 3 bytes, read all of the provided
11649     // bits into A, with some adjustments.
11650     a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
11651     b = 0;
11652   } else {
11653     a = 0;
11654     b = 0;
11655   }
11656 
11657   uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
11658   uint64_t z = salt[1] ^ starting_length;
11659   return WyhashMix(w, z);
11660 }
11661 
11662 const uint64_t kWyhashSalt[5] = {
11663     0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
11664     0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
11665 };
11666 
_upb_Hash(const void * p,size_t n,uint64_t seed)11667 uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
11668   return Wyhash(p, n, seed, kWyhashSalt);
11669 }
11670 
_upb_Hash_NoSeed(const char * p,size_t n)11671 static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
11672   return _upb_Hash(p, n, 0);
11673 }
11674 
strhash(upb_tabkey key)11675 static uint32_t strhash(upb_tabkey key) {
11676   uint32_t len;
11677   char* str = upb_tabstr(key, &len);
11678   return _upb_Hash_NoSeed(str, len);
11679 }
11680 
streql(upb_tabkey k1,lookupkey_t k2)11681 static bool streql(upb_tabkey k1, lookupkey_t k2) {
11682   uint32_t len;
11683   char* str = upb_tabstr(k1, &len);
11684   return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
11685 }
11686 
upb_strtable_init(upb_strtable * t,size_t expected_size,upb_Arena * a)11687 bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
11688   // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
11689   // denominator.
11690   size_t need_entries = (expected_size + 1) * 1204 / 1024;
11691   UPB_ASSERT(need_entries >= expected_size * 0.85);
11692   int size_lg2 = _upb_Log2Ceiling(need_entries);
11693   return init(&t->t, size_lg2, a);
11694 }
11695 
upb_strtable_clear(upb_strtable * t)11696 void upb_strtable_clear(upb_strtable* t) {
11697   size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
11698   t->t.count = 0;
11699   memset((char*)t->t.entries, 0, bytes);
11700 }
11701 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_Arena * a)11702 bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
11703   upb_strtable new_table;
11704   upb_strtable_iter i;
11705 
11706   if (!init(&new_table.t, size_lg2, a)) return false;
11707   upb_strtable_begin(&i, t);
11708   for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
11709     upb_StringView key = upb_strtable_iter_key(&i);
11710     upb_strtable_insert(&new_table, key.data, key.size,
11711                         upb_strtable_iter_value(&i), a);
11712   }
11713   *t = new_table;
11714   return true;
11715 }
11716 
upb_strtable_insert(upb_strtable * t,const char * k,size_t len,upb_value v,upb_Arena * a)11717 bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
11718                          upb_value v, upb_Arena* a) {
11719   lookupkey_t key;
11720   upb_tabkey tabkey;
11721   uint32_t hash;
11722 
11723   if (isfull(&t->t)) {
11724     /* Need to resize.  New table of double the size, add old elements to it. */
11725     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
11726       return false;
11727     }
11728   }
11729 
11730   key = strkey2(k, len);
11731   tabkey = strcopy(key, a);
11732   if (tabkey == 0) return false;
11733 
11734   hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
11735   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
11736   return true;
11737 }
11738 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)11739 bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
11740                           upb_value* v) {
11741   uint32_t hash = _upb_Hash_NoSeed(key, len);
11742   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
11743 }
11744 
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)11745 bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
11746                           upb_value* val) {
11747   uint32_t hash = _upb_Hash_NoSeed(key, len);
11748   upb_tabkey tabkey;
11749   return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
11750 }
11751 
11752 /* Iteration */
11753 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)11754 void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
11755   i->t = t;
11756   i->index = begin(&t->t);
11757 }
11758 
upb_strtable_next(upb_strtable_iter * i)11759 void upb_strtable_next(upb_strtable_iter* i) {
11760   i->index = next(&i->t->t, i->index);
11761 }
11762 
upb_strtable_done(const upb_strtable_iter * i)11763 bool upb_strtable_done(const upb_strtable_iter* i) {
11764   if (!i->t) return true;
11765   return i->index >= upb_table_size(&i->t->t) ||
11766          upb_tabent_isempty(str_tabent(i));
11767 }
11768 
upb_strtable_iter_key(const upb_strtable_iter * i)11769 upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
11770   upb_StringView key;
11771   uint32_t len;
11772   UPB_ASSERT(!upb_strtable_done(i));
11773   key.data = upb_tabstr(str_tabent(i)->key, &len);
11774   key.size = len;
11775   return key;
11776 }
11777 
upb_strtable_iter_value(const upb_strtable_iter * i)11778 upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
11779   UPB_ASSERT(!upb_strtable_done(i));
11780   return _upb_value_val(str_tabent(i)->val.val);
11781 }
11782 
upb_strtable_iter_setdone(upb_strtable_iter * i)11783 void upb_strtable_iter_setdone(upb_strtable_iter* i) {
11784   i->t = NULL;
11785   i->index = SIZE_MAX;
11786 }
11787 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)11788 bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
11789                                const upb_strtable_iter* i2) {
11790   if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
11791   return i1->t == i2->t && i1->index == i2->index;
11792 }
11793 
11794 /* upb_inttable ***************************************************************/
11795 
11796 /* For inttables we use a hybrid structure where small keys are kept in an
11797  * array and large keys are put in the hash table. */
11798 
inthash(upb_tabkey key)11799 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
11800 
inteql(upb_tabkey k1,lookupkey_t k2)11801 static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
11802 
mutable_array(upb_inttable * t)11803 static upb_tabval* mutable_array(upb_inttable* t) {
11804   return (upb_tabval*)t->array;
11805 }
11806 
inttable_val(upb_inttable * t,uintptr_t key)11807 static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
11808   if (key < t->array_size) {
11809     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
11810   } else {
11811     upb_tabent* e =
11812         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
11813     return e ? &e->val : NULL;
11814   }
11815 }
11816 
inttable_val_const(const upb_inttable * t,uintptr_t key)11817 static const upb_tabval* inttable_val_const(const upb_inttable* t,
11818                                             uintptr_t key) {
11819   return inttable_val((upb_inttable*)t, key);
11820 }
11821 
upb_inttable_count(const upb_inttable * t)11822 size_t upb_inttable_count(const upb_inttable* t) {
11823   return t->t.count + t->array_count;
11824 }
11825 
check(upb_inttable * t)11826 static void check(upb_inttable* t) {
11827   UPB_UNUSED(t);
11828 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
11829   {
11830     /* This check is very expensive (makes inserts/deletes O(N)). */
11831     size_t count = 0;
11832     upb_inttable_iter i;
11833     upb_inttable_begin(&i, t);
11834     for (; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
11835       UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
11836     }
11837     UPB_ASSERT(count == upb_inttable_count(t));
11838   }
11839 #endif
11840 }
11841 
upb_inttable_sizedinit(upb_inttable * t,size_t asize,int hsize_lg2,upb_Arena * a)11842 bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
11843                             upb_Arena* a) {
11844   size_t array_bytes;
11845 
11846   if (!init(&t->t, hsize_lg2, a)) return false;
11847   /* Always make the array part at least 1 long, so that we know key 0
11848    * won't be in the hash part, which simplifies things. */
11849   t->array_size = UPB_MAX(1, asize);
11850   t->array_count = 0;
11851   array_bytes = t->array_size * sizeof(upb_value);
11852   t->array = upb_Arena_Malloc(a, array_bytes);
11853   if (!t->array) {
11854     return false;
11855   }
11856   memset(mutable_array(t), 0xff, array_bytes);
11857   check(t);
11858   return true;
11859 }
11860 
upb_inttable_init(upb_inttable * t,upb_Arena * a)11861 bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
11862   return upb_inttable_sizedinit(t, 0, 4, a);
11863 }
11864 
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val,upb_Arena * a)11865 bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
11866                          upb_Arena* a) {
11867   upb_tabval tabval;
11868   tabval.val = val.val;
11869   UPB_ASSERT(
11870       upb_arrhas(tabval)); /* This will reject (uint64_t)-1.  Fix this. */
11871 
11872   if (key < t->array_size) {
11873     UPB_ASSERT(!upb_arrhas(t->array[key]));
11874     t->array_count++;
11875     mutable_array(t)[key].val = val.val;
11876   } else {
11877     if (isfull(&t->t)) {
11878       /* Need to resize the hash part, but we re-use the array part. */
11879       size_t i;
11880       upb_table new_table;
11881 
11882       if (!init(&new_table, t->t.size_lg2 + 1, a)) {
11883         return false;
11884       }
11885 
11886       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
11887         const upb_tabent* e = &t->t.entries[i];
11888         uint32_t hash;
11889         upb_value v;
11890 
11891         _upb_value_setval(&v, e->val.val);
11892         hash = upb_inthash(e->key);
11893         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
11894       }
11895 
11896       UPB_ASSERT(t->t.count == new_table.count);
11897 
11898       t->t = new_table;
11899     }
11900     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
11901   }
11902   check(t);
11903   return true;
11904 }
11905 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)11906 bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
11907   const upb_tabval* table_v = inttable_val_const(t, key);
11908   if (!table_v) return false;
11909   if (v) _upb_value_setval(v, table_v->val);
11910   return true;
11911 }
11912 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)11913 bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
11914   upb_tabval* table_v = inttable_val(t, key);
11915   if (!table_v) return false;
11916   table_v->val = val.val;
11917   return true;
11918 }
11919 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)11920 bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
11921   bool success;
11922   if (key < t->array_size) {
11923     if (upb_arrhas(t->array[key])) {
11924       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
11925       t->array_count--;
11926       if (val) {
11927         _upb_value_setval(val, t->array[key].val);
11928       }
11929       mutable_array(t)[key] = empty;
11930       success = true;
11931     } else {
11932       success = false;
11933     }
11934   } else {
11935     success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
11936   }
11937   check(t);
11938   return success;
11939 }
11940 
upb_inttable_compact(upb_inttable * t,upb_Arena * a)11941 void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
11942   /* A power-of-two histogram of the table keys. */
11943   size_t counts[UPB_MAXARRSIZE + 1] = {0};
11944 
11945   /* The max key in each bucket. */
11946   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
11947 
11948   upb_inttable_iter i;
11949   size_t arr_count;
11950   int size_lg2;
11951   upb_inttable new_t;
11952 
11953   upb_inttable_begin(&i, t);
11954   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11955     uintptr_t key = upb_inttable_iter_key(&i);
11956     int bucket = log2ceil(key);
11957     max[bucket] = UPB_MAX(max[bucket], key);
11958     counts[bucket]++;
11959   }
11960 
11961   /* Find the largest power of two that satisfies the MIN_DENSITY
11962    * definition (while actually having some keys). */
11963   arr_count = upb_inttable_count(t);
11964 
11965   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
11966     if (counts[size_lg2] == 0) {
11967       /* We can halve again without losing any entries. */
11968       continue;
11969     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
11970       break;
11971     }
11972 
11973     arr_count -= counts[size_lg2];
11974   }
11975 
11976   UPB_ASSERT(arr_count <= upb_inttable_count(t));
11977 
11978   {
11979     /* Insert all elements into new, perfectly-sized table. */
11980     size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
11981     size_t hash_count = upb_inttable_count(t) - arr_count;
11982     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
11983     int hashsize_lg2 = log2ceil(hash_size);
11984 
11985     upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
11986     upb_inttable_begin(&i, t);
11987     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11988       uintptr_t k = upb_inttable_iter_key(&i);
11989       upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a);
11990     }
11991     UPB_ASSERT(new_t.array_size == arr_size);
11992     UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
11993   }
11994   *t = new_t;
11995 }
11996 
11997 /* Iteration. */
11998 
int_tabent(const upb_inttable_iter * i)11999 static const upb_tabent* int_tabent(const upb_inttable_iter* i) {
12000   UPB_ASSERT(!i->array_part);
12001   return &i->t->t.entries[i->index];
12002 }
12003 
int_arrent(const upb_inttable_iter * i)12004 static upb_tabval int_arrent(const upb_inttable_iter* i) {
12005   UPB_ASSERT(i->array_part);
12006   return i->t->array[i->index];
12007 }
12008 
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)12009 void upb_inttable_begin(upb_inttable_iter* i, const upb_inttable* t) {
12010   i->t = t;
12011   i->index = -1;
12012   i->array_part = true;
12013   upb_inttable_next(i);
12014 }
12015 
upb_inttable_next(upb_inttable_iter * iter)12016 void upb_inttable_next(upb_inttable_iter* iter) {
12017   const upb_inttable* t = iter->t;
12018   if (iter->array_part) {
12019     while (++iter->index < t->array_size) {
12020       if (upb_arrhas(int_arrent(iter))) {
12021         return;
12022       }
12023     }
12024     iter->array_part = false;
12025     iter->index = begin(&t->t);
12026   } else {
12027     iter->index = next(&t->t, iter->index);
12028   }
12029 }
12030 
upb_inttable_next2(const upb_inttable * t,uintptr_t * key,upb_value * val,intptr_t * iter)12031 bool upb_inttable_next2(const upb_inttable* t, uintptr_t* key, upb_value* val,
12032                         intptr_t* iter) {
12033   intptr_t i = *iter;
12034   if (i < t->array_size) {
12035     while (++i < t->array_size) {
12036       upb_tabval ent = t->array[i];
12037       if (upb_arrhas(ent)) {
12038         *key = i;
12039         *val = _upb_value_val(ent.val);
12040         *iter = i;
12041         return true;
12042       }
12043     }
12044   }
12045 
12046   size_t tab_idx = next(&t->t, i == -1 ? -1 : i - t->array_size);
12047   if (tab_idx < upb_table_size(&t->t)) {
12048     upb_tabent* ent = &t->t.entries[tab_idx];
12049     *key = ent->key;
12050     *val = _upb_value_val(ent->val.val);
12051     *iter = tab_idx + t->array_size;
12052     return true;
12053   }
12054 
12055   return false;
12056 }
12057 
upb_inttable_removeiter(upb_inttable * t,intptr_t * iter)12058 void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
12059   intptr_t i = *iter;
12060   if (i < t->array_size) {
12061     t->array_count--;
12062     mutable_array(t)[i].val = -1;
12063   } else {
12064     upb_tabent* ent = &t->t.entries[i - t->array_size];
12065     upb_tabent* prev = NULL;
12066 
12067     // Linear search, not great.
12068     upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
12069     for (upb_tabent* e = t->t.entries; e != end; e++) {
12070       if (e->next == ent) {
12071         prev = e;
12072         break;
12073       }
12074     }
12075 
12076     if (prev) {
12077       prev->next = ent->next;
12078     }
12079 
12080     t->t.count--;
12081     ent->key = 0;
12082     ent->next = NULL;
12083   }
12084 }
12085 
upb_strtable_next2(const upb_strtable * t,upb_StringView * key,upb_value * val,intptr_t * iter)12086 bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
12087                         upb_value* val, intptr_t* iter) {
12088   size_t tab_idx = next(&t->t, *iter);
12089   if (tab_idx < upb_table_size(&t->t)) {
12090     upb_tabent* ent = &t->t.entries[tab_idx];
12091     uint32_t len;
12092     key->data = upb_tabstr(ent->key, &len);
12093     key->size = len;
12094     *val = _upb_value_val(ent->val.val);
12095     *iter = tab_idx;
12096     return true;
12097   }
12098 
12099   return false;
12100 }
12101 
upb_strtable_removeiter(upb_strtable * t,intptr_t * iter)12102 void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
12103   intptr_t i = *iter;
12104   upb_tabent* ent = &t->t.entries[i];
12105   upb_tabent* prev = NULL;
12106 
12107   // Linear search, not great.
12108   upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
12109   for (upb_tabent* e = t->t.entries; e != end; e++) {
12110     if (e->next == ent) {
12111       prev = e;
12112       break;
12113     }
12114   }
12115 
12116   if (prev) {
12117     prev->next = ent->next;
12118   }
12119 
12120   t->t.count--;
12121   ent->key = 0;
12122   ent->next = NULL;
12123 }
12124 
upb_inttable_done(const upb_inttable_iter * i)12125 bool upb_inttable_done(const upb_inttable_iter* i) {
12126   if (!i->t) return true;
12127   if (i->array_part) {
12128     return i->index >= i->t->array_size || !upb_arrhas(int_arrent(i));
12129   } else {
12130     return i->index >= upb_table_size(&i->t->t) ||
12131            upb_tabent_isempty(int_tabent(i));
12132   }
12133 }
12134 
upb_inttable_iter_key(const upb_inttable_iter * i)12135 uintptr_t upb_inttable_iter_key(const upb_inttable_iter* i) {
12136   UPB_ASSERT(!upb_inttable_done(i));
12137   return i->array_part ? i->index : int_tabent(i)->key;
12138 }
12139 
upb_inttable_iter_value(const upb_inttable_iter * i)12140 upb_value upb_inttable_iter_value(const upb_inttable_iter* i) {
12141   UPB_ASSERT(!upb_inttable_done(i));
12142   return _upb_value_val(i->array_part ? i->t->array[i->index].val
12143                                       : int_tabent(i)->val.val);
12144 }
12145 
upb_inttable_iter_setdone(upb_inttable_iter * i)12146 void upb_inttable_iter_setdone(upb_inttable_iter* i) {
12147   i->t = NULL;
12148   i->index = SIZE_MAX;
12149   i->array_part = false;
12150 }
12151 
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)12152 bool upb_inttable_iter_isequal(const upb_inttable_iter* i1,
12153                                const upb_inttable_iter* i2) {
12154   if (upb_inttable_done(i1) && upb_inttable_done(i2)) return true;
12155   return i1->t == i2->t && i1->index == i2->index &&
12156          i1->array_part == i2->array_part;
12157 }
12158 
12159 /** upb/upb.c ************************************************************/
12160 #include <errno.h>
12161 #include <float.h>
12162 #include <stdarg.h>
12163 #include <stddef.h>
12164 #include <stdint.h>
12165 #include <stdio.h>
12166 #include <stdlib.h>
12167 #include <string.h>
12168 
12169 
12170 // Must be last.
12171 
12172 /* upb_Status *****************************************************************/
12173 
upb_Status_Clear(upb_Status * status)12174 void upb_Status_Clear(upb_Status* status) {
12175   if (!status) return;
12176   status->ok = true;
12177   status->msg[0] = '\0';
12178 }
12179 
upb_Status_IsOk(const upb_Status * status)12180 bool upb_Status_IsOk(const upb_Status* status) { return status->ok; }
12181 
upb_Status_ErrorMessage(const upb_Status * status)12182 const char* upb_Status_ErrorMessage(const upb_Status* status) {
12183   return status->msg;
12184 }
12185 
upb_Status_SetErrorMessage(upb_Status * status,const char * msg)12186 void upb_Status_SetErrorMessage(upb_Status* status, const char* msg) {
12187   if (!status) return;
12188   status->ok = false;
12189   strncpy(status->msg, msg, _kUpb_Status_MaxMessage - 1);
12190   status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12191 }
12192 
upb_Status_SetErrorFormat(upb_Status * status,const char * fmt,...)12193 void upb_Status_SetErrorFormat(upb_Status* status, const char* fmt, ...) {
12194   va_list args;
12195   va_start(args, fmt);
12196   upb_Status_VSetErrorFormat(status, fmt, args);
12197   va_end(args);
12198 }
12199 
upb_Status_VSetErrorFormat(upb_Status * status,const char * fmt,va_list args)12200 void upb_Status_VSetErrorFormat(upb_Status* status, const char* fmt,
12201                                 va_list args) {
12202   if (!status) return;
12203   status->ok = false;
12204   vsnprintf(status->msg, sizeof(status->msg), fmt, args);
12205   status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12206 }
12207 
upb_Status_VAppendErrorFormat(upb_Status * status,const char * fmt,va_list args)12208 void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt,
12209                                    va_list args) {
12210   size_t len;
12211   if (!status) return;
12212   status->ok = false;
12213   len = strlen(status->msg);
12214   vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
12215   status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
12216 }
12217 
12218 /* upb_alloc ******************************************************************/
12219 
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)12220 static void* upb_global_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
12221                                   size_t size) {
12222   UPB_UNUSED(alloc);
12223   UPB_UNUSED(oldsize);
12224   if (size == 0) {
12225     free(ptr);
12226     return NULL;
12227   } else {
12228     return realloc(ptr, size);
12229   }
12230 }
12231 
upb_cleanup_pointer(uintptr_t cleanup_metadata)12232 static uint32_t* upb_cleanup_pointer(uintptr_t cleanup_metadata) {
12233   return (uint32_t*)(cleanup_metadata & ~0x1);
12234 }
12235 
upb_cleanup_has_initial_block(uintptr_t cleanup_metadata)12236 static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) {
12237   return cleanup_metadata & 0x1;
12238 }
12239 
upb_cleanup_metadata(uint32_t * cleanup,bool has_initial_block)12240 static uintptr_t upb_cleanup_metadata(uint32_t* cleanup,
12241                                       bool has_initial_block) {
12242   return (uintptr_t)cleanup | has_initial_block;
12243 }
12244 
12245 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
12246 
12247 /* upb_Arena ******************************************************************/
12248 
12249 struct mem_block {
12250   struct mem_block* next;
12251   uint32_t size;
12252   uint32_t cleanups;
12253   /* Data follows. */
12254 };
12255 
12256 typedef struct cleanup_ent {
12257   upb_CleanupFunc* cleanup;
12258   void* ud;
12259 } cleanup_ent;
12260 
12261 static const size_t memblock_reserve =
12262     UPB_ALIGN_UP(sizeof(mem_block), UPB_MALLOC_ALIGN);
12263 
arena_findroot(upb_Arena * a)12264 static upb_Arena* arena_findroot(upb_Arena* a) {
12265   /* Path splitting keeps time complexity down, see:
12266    *   https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
12267   while (a->parent != a) {
12268     upb_Arena* next = a->parent;
12269     a->parent = next->parent;
12270     a = next;
12271   }
12272   return a;
12273 }
12274 
upb_Arena_addblock(upb_Arena * a,upb_Arena * root,void * ptr,size_t size)12275 static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr,
12276                                size_t size) {
12277   mem_block* block = ptr;
12278 
12279   /* The block is for arena |a|, but should appear in the freelist of |root|. */
12280   block->next = root->freelist;
12281   block->size = (uint32_t)size;
12282   block->cleanups = 0;
12283   root->freelist = block;
12284   a->last_size = block->size;
12285   if (!root->freelist_tail) root->freelist_tail = block;
12286 
12287   a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
12288   a->head.end = UPB_PTR_AT(block, size, char);
12289   a->cleanup_metadata = upb_cleanup_metadata(
12290       &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata));
12291 
12292   UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
12293 }
12294 
upb_Arena_Allocblock(upb_Arena * a,size_t size)12295 static bool upb_Arena_Allocblock(upb_Arena* a, size_t size) {
12296   upb_Arena* root = arena_findroot(a);
12297   size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve;
12298   mem_block* block = upb_malloc(root->block_alloc, block_size);
12299 
12300   if (!block) return false;
12301   upb_Arena_addblock(a, root, block, block_size);
12302   return true;
12303 }
12304 
_upb_Arena_SlowMalloc(upb_Arena * a,size_t size)12305 void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size) {
12306   if (!upb_Arena_Allocblock(a, size)) return NULL; /* Out of memory. */
12307   UPB_ASSERT(_upb_ArenaHas(a) >= size);
12308   return upb_Arena_Malloc(a, size);
12309 }
12310 
upb_Arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)12311 static void* upb_Arena_doalloc(upb_alloc* alloc, void* ptr, size_t oldsize,
12312                                size_t size) {
12313   upb_Arena* a = (upb_Arena*)alloc; /* upb_alloc is initial member. */
12314   return upb_Arena_Realloc(a, ptr, oldsize, size);
12315 }
12316 
12317 /* Public Arena API ***********************************************************/
12318 
arena_initslow(void * mem,size_t n,upb_alloc * alloc)12319 upb_Arena* arena_initslow(void* mem, size_t n, upb_alloc* alloc) {
12320   const size_t first_block_overhead = sizeof(upb_Arena) + memblock_reserve;
12321   upb_Arena* a;
12322 
12323   /* We need to malloc the initial block. */
12324   n = first_block_overhead + 256;
12325   if (!alloc || !(mem = upb_malloc(alloc, n))) {
12326     return NULL;
12327   }
12328 
12329   a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
12330   n -= sizeof(*a);
12331 
12332   a->head.alloc.func = &upb_Arena_doalloc;
12333   a->block_alloc = alloc;
12334   a->parent = a;
12335   a->refcount = 1;
12336   a->freelist = NULL;
12337   a->freelist_tail = NULL;
12338   a->cleanup_metadata = upb_cleanup_metadata(NULL, false);
12339 
12340   upb_Arena_addblock(a, a, mem, n);
12341 
12342   return a;
12343 }
12344 
upb_Arena_Init(void * mem,size_t n,upb_alloc * alloc)12345 upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
12346   upb_Arena* a;
12347 
12348   if (n) {
12349     /* Align initial pointer up so that we return properly-aligned pointers. */
12350     void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, UPB_MALLOC_ALIGN);
12351     size_t delta = (uintptr_t)aligned - (uintptr_t)mem;
12352     n = delta <= n ? n - delta : 0;
12353     mem = aligned;
12354   }
12355 
12356   /* Round block size down to alignof(*a) since we will allocate the arena
12357    * itself at the end. */
12358   n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_Arena));
12359 
12360   if (UPB_UNLIKELY(n < sizeof(upb_Arena))) {
12361     return arena_initslow(mem, n, alloc);
12362   }
12363 
12364   a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
12365 
12366   a->head.alloc.func = &upb_Arena_doalloc;
12367   a->block_alloc = alloc;
12368   a->parent = a;
12369   a->refcount = 1;
12370   a->last_size = UPB_MAX(128, n);
12371   a->head.ptr = mem;
12372   a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char);
12373   a->freelist = NULL;
12374   a->cleanup_metadata = upb_cleanup_metadata(NULL, true);
12375 
12376   return a;
12377 }
12378 
arena_dofree(upb_Arena * a)12379 static void arena_dofree(upb_Arena* a) {
12380   mem_block* block = a->freelist;
12381   UPB_ASSERT(a->parent == a);
12382   UPB_ASSERT(a->refcount == 0);
12383 
12384   while (block) {
12385     /* Load first since we are deleting block. */
12386     mem_block* next = block->next;
12387 
12388     if (block->cleanups > 0) {
12389       cleanup_ent* end = UPB_PTR_AT(block, block->size, void);
12390       cleanup_ent* ptr = end - block->cleanups;
12391 
12392       for (; ptr < end; ptr++) {
12393         ptr->cleanup(ptr->ud);
12394       }
12395     }
12396 
12397     upb_free(a->block_alloc, block);
12398     block = next;
12399   }
12400 }
12401 
upb_Arena_Free(upb_Arena * a)12402 void upb_Arena_Free(upb_Arena* a) {
12403   a = arena_findroot(a);
12404   if (--a->refcount == 0) arena_dofree(a);
12405 }
12406 
upb_Arena_AddCleanup(upb_Arena * a,void * ud,upb_CleanupFunc * func)12407 bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func) {
12408   cleanup_ent* ent;
12409   uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata);
12410 
12411   if (!cleanups || _upb_ArenaHas(a) < sizeof(cleanup_ent)) {
12412     if (!upb_Arena_Allocblock(a, 128)) return false; /* Out of memory. */
12413     UPB_ASSERT(_upb_ArenaHas(a) >= sizeof(cleanup_ent));
12414     cleanups = upb_cleanup_pointer(a->cleanup_metadata);
12415   }
12416 
12417   a->head.end -= sizeof(cleanup_ent);
12418   ent = (cleanup_ent*)a->head.end;
12419   (*cleanups)++;
12420   UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
12421 
12422   ent->cleanup = func;
12423   ent->ud = ud;
12424 
12425   return true;
12426 }
12427 
upb_Arena_Fuse(upb_Arena * a1,upb_Arena * a2)12428 bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
12429   upb_Arena* r1 = arena_findroot(a1);
12430   upb_Arena* r2 = arena_findroot(a2);
12431 
12432   if (r1 == r2) return true; /* Already fused. */
12433 
12434   /* Do not fuse initial blocks since we cannot lifetime extend them. */
12435   if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false;
12436   if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false;
12437 
12438   /* Only allow fuse with a common allocator */
12439   if (r1->block_alloc != r2->block_alloc) return false;
12440 
12441   /* We want to join the smaller tree to the larger tree.
12442    * So swap first if they are backwards. */
12443   if (r1->refcount < r2->refcount) {
12444     upb_Arena* tmp = r1;
12445     r1 = r2;
12446     r2 = tmp;
12447   }
12448 
12449   /* r1 takes over r2's freelist and refcount. */
12450   r1->refcount += r2->refcount;
12451   if (r2->freelist_tail) {
12452     UPB_ASSERT(r2->freelist_tail->next == NULL);
12453     r2->freelist_tail->next = r1->freelist;
12454     r1->freelist = r2->freelist;
12455   }
12456   r2->parent = r1;
12457   return true;
12458 }
12459 
12460 /* Miscellaneous utilities ****************************************************/
12461 
upb_FixLocale(char * p)12462 static void upb_FixLocale(char* p) {
12463   /* printf() is dependent on locales; sadly there is no easy and portable way
12464    * to avoid this. This little post-processing step will translate 1,2 -> 1.2
12465    * since JSON needs the latter. Arguably a hack, but it is simple and the
12466    * alternatives are far more complicated, platform-dependent, and/or larger
12467    * in code size. */
12468   for (; *p; p++) {
12469     if (*p == ',') *p = '.';
12470   }
12471 }
12472 
_upb_EncodeRoundTripDouble(double val,char * buf,size_t size)12473 void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) {
12474   assert(size >= kUpb_RoundTripBufferSize);
12475   snprintf(buf, size, "%.*g", DBL_DIG, val);
12476   if (strtod(buf, NULL) != val) {
12477     snprintf(buf, size, "%.*g", DBL_DIG + 2, val);
12478     assert(strtod(buf, NULL) == val);
12479   }
12480   upb_FixLocale(buf);
12481 }
12482 
_upb_EncodeRoundTripFloat(float val,char * buf,size_t size)12483 void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) {
12484   assert(size >= kUpb_RoundTripBufferSize);
12485   snprintf(buf, size, "%.*g", FLT_DIG, val);
12486   if (strtof(buf, NULL) != val) {
12487     snprintf(buf, size, "%.*g", FLT_DIG + 3, val);
12488     assert(strtof(buf, NULL) == val);
12489   }
12490   upb_FixLocale(buf);
12491 }
12492 
12493 /** upb/port_undef.inc ************************************************************/
12494 /* See port_def.inc.  This should #undef all macros #defined there. */
12495 
12496 #undef UPB_SIZE
12497 #undef UPB_PTR_AT
12498 #undef UPB_READ_ONEOF
12499 #undef UPB_WRITE_ONEOF
12500 #undef UPB_MAPTYPE_STRING
12501 #undef UPB_INLINE
12502 #undef UPB_ALIGN_UP
12503 #undef UPB_ALIGN_DOWN
12504 #undef UPB_ALIGN_MALLOC
12505 #undef UPB_ALIGN_OF
12506 #undef UPB_MALLOC_ALIGN
12507 #undef UPB_LIKELY
12508 #undef UPB_UNLIKELY
12509 #undef UPB_FORCEINLINE
12510 #undef UPB_NOINLINE
12511 #undef UPB_NORETURN
12512 #undef UPB_PRINTF
12513 #undef UPB_MAX
12514 #undef UPB_MIN
12515 #undef UPB_UNUSED
12516 #undef UPB_ASSUME
12517 #undef UPB_ASSERT
12518 #undef UPB_UNREACHABLE
12519 #undef UPB_SETJMP
12520 #undef UPB_LONGJMP
12521 #undef UPB_PTRADD
12522 #undef UPB_MUSTTAIL
12523 #undef UPB_FASTTABLE_SUPPORTED
12524 #undef UPB_FASTTABLE
12525 #undef UPB_FASTTABLE_INIT
12526 #undef UPB_POISON_MEMORY_REGION
12527 #undef UPB_UNPOISON_MEMORY_REGION
12528 #undef UPB_ASAN
12529 #undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3
12530