xref: /aosp_15_r20/external/grpc-grpc/third_party/upb/upb/reflection/internal/def_builder.c (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "upb/reflection/internal/def_builder.h"
9 
10 #include <string.h>
11 
12 #include "upb/base/internal/log2.h"
13 #include "upb/base/upcast.h"
14 #include "upb/mem/alloc.h"
15 #include "upb/message/copy.h"
16 #include "upb/reflection/def_pool.h"
17 #include "upb/reflection/def_type.h"
18 #include "upb/reflection/field_def.h"
19 #include "upb/reflection/file_def.h"
20 #include "upb/reflection/internal/strdup2.h"
21 #include "upb/wire/decode.h"
22 
23 // Must be last.
24 #include "upb/port/def.inc"
25 
26 /* The upb core does not generally have a concept of default instances. However
27  * for descriptor options we make an exception since the max size is known and
28  * modest (<200 bytes). All types can share a default instance since it is
29  * initialized to zeroes.
30  *
31  * We have to allocate an extra pointer for upb's internal metadata. */
32 static UPB_ALIGN_AS(8) const
33     char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
34 const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)];
35 
_upb_DefBuilder_FullToShort(const char * fullname)36 const char* _upb_DefBuilder_FullToShort(const char* fullname) {
37   const char* p;
38 
39   if (fullname == NULL) {
40     return NULL;
41   } else if ((p = strrchr(fullname, '.')) == NULL) {
42     /* No '.' in the name, return the full string. */
43     return fullname;
44   } else {
45     /* Return one past the last '.'. */
46     return p + 1;
47   }
48 }
49 
_upb_DefBuilder_FailJmp(upb_DefBuilder * ctx)50 void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); }
51 
_upb_DefBuilder_Errf(upb_DefBuilder * ctx,const char * fmt,...)52 void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) {
53   va_list argp;
54   va_start(argp, fmt);
55   upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
56   va_end(argp);
57   _upb_DefBuilder_FailJmp(ctx);
58 }
59 
_upb_DefBuilder_OomErr(upb_DefBuilder * ctx)60 void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
61   upb_Status_SetErrorMessage(ctx->status, "out of memory");
62   _upb_DefBuilder_FailJmp(ctx);
63 }
64 
65 // Verify a relative identifier string. The loop is branchless for speed.
_upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder * ctx,upb_StringView name)66 static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
67                                               upb_StringView name) {
68   bool good = name.size > 0;
69 
70   for (size_t i = 0; i < name.size; i++) {
71     const char c = name.data[i];
72     const char d = c | 0x20;  // force lowercase
73     const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
74     const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
75 
76     good &= is_alpha | is_numer;
77   }
78 
79   if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
80 }
81 
_upb_DefBuilder_MakeFullName(upb_DefBuilder * ctx,const char * prefix,upb_StringView name)82 const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
83                                          const char* prefix,
84                                          upb_StringView name) {
85   _upb_DefBuilder_CheckIdentNotFull(ctx, name);
86   if (prefix) {
87     // ret = prefix + '.' + name;
88     size_t n = strlen(prefix);
89     char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2);
90     strcpy(ret, prefix);
91     ret[n] = '.';
92     memcpy(&ret[n + 1], name.data, name.size);
93     ret[n + 1 + name.size] = '\0';
94     return ret;
95   } else {
96     char* ret = upb_strdup2(name.data, name.size, ctx->arena);
97     if (!ret) _upb_DefBuilder_OomErr(ctx);
98     return ret;
99   }
100 }
101 
remove_component(char * base,size_t * len)102 static bool remove_component(char* base, size_t* len) {
103   if (*len == 0) return false;
104 
105   for (size_t i = *len - 1; i > 0; i--) {
106     if (base[i] == '.') {
107       *len = i;
108       return true;
109     }
110   }
111 
112   *len = 0;
113   return true;
114 }
115 
_upb_DefBuilder_ResolveAny(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)116 const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
117                                        const char* from_name_dbg,
118                                        const char* base, upb_StringView sym,
119                                        upb_deftype_t* type) {
120   if (sym.size == 0) goto notfound;
121   upb_value v;
122   if (sym.data[0] == '.') {
123     // Symbols starting with '.' are absolute, so we do a single lookup.
124     // Slice to omit the leading '.'
125     if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) {
126       goto notfound;
127     }
128   } else {
129     // Remove components from base until we find an entry or run out.
130     size_t baselen = base ? strlen(base) : 0;
131     char* tmp = upb_gmalloc(sym.size + baselen + 1);
132     while (1) {
133       char* p = tmp;
134       if (baselen) {
135         memcpy(p, base, baselen);
136         p[baselen] = '.';
137         p += baselen + 1;
138       }
139       memcpy(p, sym.data, sym.size);
140       p += sym.size;
141       if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) {
142         break;
143       }
144       if (!remove_component(tmp, &baselen)) {
145         upb_gfree(tmp);
146         goto notfound;
147       }
148     }
149     upb_gfree(tmp);
150   }
151 
152   *type = _upb_DefType_Type(v);
153   return _upb_DefType_Unpack(v, *type);
154 
155 notfound:
156   _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
157                        UPB_STRINGVIEW_ARGS(sym));
158 }
159 
_upb_DefBuilder_Resolve(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)160 const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
161                                     const char* from_name_dbg, const char* base,
162                                     upb_StringView sym, upb_deftype_t type) {
163   upb_deftype_t found_type;
164   const void* ret =
165       _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type);
166   if (ret && found_type != type) {
167     _upb_DefBuilder_Errf(ctx,
168                          "type mismatch when resolving %s: couldn't find "
169                          "name " UPB_STRINGVIEW_FORMAT " with type=%d",
170                          from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
171   }
172   return ret;
173 }
174 
175 // Per ASCII this will lower-case a letter. If the result is a letter, the
176 // input was definitely a letter. If the output is not a letter, this may
177 // have transformed the character unpredictably.
upb_ascii_lower(char ch)178 static char upb_ascii_lower(char ch) { return ch | 0x20; }
179 
180 // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)181 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
182   return low <= c && c <= high;
183 }
184 
upb_isletter(char c)185 static bool upb_isletter(char c) {
186   char lower = upb_ascii_lower(c);
187   return upb_isbetween(lower, 'a', 'z') || c == '_';
188 }
189 
upb_isalphanum(char c)190 static bool upb_isalphanum(char c) {
191   return upb_isletter(c) || upb_isbetween(c, '0', '9');
192 }
193 
TryGetChar(const char ** src,const char * end,char * ch)194 static bool TryGetChar(const char** src, const char* end, char* ch) {
195   if (*src == end) return false;
196   *ch = **src;
197   *src += 1;
198   return true;
199 }
200 
TryGetHexDigit(const char ** src,const char * end)201 static int TryGetHexDigit(const char** src, const char* end) {
202   char ch;
203   if (!TryGetChar(src, end, &ch)) return -1;
204   if ('0' <= ch && ch <= '9') {
205     return ch - '0';
206   }
207   ch = upb_ascii_lower(ch);
208   if ('a' <= ch && ch <= 'f') {
209     return ch - 'a' + 0xa;
210   }
211   *src -= 1;  // Char wasn't actually a hex digit.
212   return -1;
213 }
214 
upb_DefBuilder_ParseHexEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)215 static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
216                                           const upb_FieldDef* f,
217                                           const char** src, const char* end) {
218   int hex_digit = TryGetHexDigit(src, end);
219   if (hex_digit < 0) {
220     _upb_DefBuilder_Errf(
221         ctx, "\\x must be followed by at least one hex digit (field='%s')",
222         upb_FieldDef_FullName(f));
223     return 0;
224   }
225   unsigned int ret = hex_digit;
226   while ((hex_digit = TryGetHexDigit(src, end)) >= 0) {
227     ret = (ret << 4) | hex_digit;
228   }
229   if (ret > 0xff) {
230     _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
231                          upb_FieldDef_FullName(f));
232     return 0;
233   }
234   return ret;
235 }
236 
TryGetOctalDigit(const char ** src,const char * end)237 static char TryGetOctalDigit(const char** src, const char* end) {
238   char ch;
239   if (!TryGetChar(src, end, &ch)) return -1;
240   if ('0' <= ch && ch <= '7') {
241     return ch - '0';
242   }
243   *src -= 1;  // Char wasn't actually an octal digit.
244   return -1;
245 }
246 
upb_DefBuilder_ParseOctalEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)247 static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx,
248                                             const upb_FieldDef* f,
249                                             const char** src, const char* end) {
250   char ch = 0;
251   for (int i = 0; i < 3; i++) {
252     char digit;
253     if ((digit = TryGetOctalDigit(src, end)) >= 0) {
254       ch = (ch << 3) | digit;
255     }
256   }
257   return ch;
258 }
259 
_upb_DefBuilder_ParseEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)260 char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
261                                  const char** src, const char* end) {
262   char ch;
263   if (!TryGetChar(src, end, &ch)) {
264     _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s",
265                          upb_FieldDef_FullName(f));
266     return 0;
267   }
268   switch (ch) {
269     case 'a':
270       return '\a';
271     case 'b':
272       return '\b';
273     case 'f':
274       return '\f';
275     case 'n':
276       return '\n';
277     case 'r':
278       return '\r';
279     case 't':
280       return '\t';
281     case 'v':
282       return '\v';
283     case '\\':
284       return '\\';
285     case '\'':
286       return '\'';
287     case '\"':
288       return '\"';
289     case '?':
290       return '\?';
291     case 'x':
292     case 'X':
293       return upb_DefBuilder_ParseHexEscape(ctx, f, src, end);
294     case '0':
295     case '1':
296     case '2':
297     case '3':
298     case '4':
299     case '5':
300     case '6':
301     case '7':
302       *src -= 1;
303       return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end);
304   }
305   _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch);
306 }
307 
_upb_DefBuilder_CheckIdentSlow(upb_DefBuilder * ctx,upb_StringView name,bool full)308 void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
309                                     bool full) {
310   const char* str = name.data;
311   const size_t len = name.size;
312   bool start = true;
313   for (size_t i = 0; i < len; i++) {
314     const char c = str[i];
315     if (c == '.') {
316       if (start || !full) {
317         _upb_DefBuilder_Errf(
318             ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")",
319             UPB_STRINGVIEW_ARGS(name));
320       }
321       start = true;
322     } else if (start) {
323       if (!upb_isletter(c)) {
324         _upb_DefBuilder_Errf(ctx,
325                              "invalid name: path components must start with a "
326                              "letter (" UPB_STRINGVIEW_FORMAT ")",
327                              UPB_STRINGVIEW_ARGS(name));
328       }
329       start = false;
330     } else if (!upb_isalphanum(c)) {
331       _upb_DefBuilder_Errf(
332           ctx,
333           "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT
334           ")",
335           UPB_STRINGVIEW_ARGS(name));
336     }
337   }
338   if (start) {
339     _upb_DefBuilder_Errf(ctx,
340                          "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")",
341                          UPB_STRINGVIEW_ARGS(name));
342   }
343 
344   // We should never reach this point.
345   UPB_ASSERT(false);
346 }
347 
_upb_DefBuilder_MakeKey(upb_DefBuilder * ctx,const UPB_DESC (FeatureSet *)parent,upb_StringView key)348 upb_StringView _upb_DefBuilder_MakeKey(upb_DefBuilder* ctx,
349                                        const UPB_DESC(FeatureSet*) parent,
350                                        upb_StringView key) {
351   size_t need = key.size + sizeof(void*);
352   if (ctx->tmp_buf_size < need) {
353     ctx->tmp_buf_size = UPB_MAX(64, upb_Log2Ceiling(need));
354     ctx->tmp_buf = upb_Arena_Malloc(ctx->tmp_arena, ctx->tmp_buf_size);
355     if (!ctx->tmp_buf) _upb_DefBuilder_OomErr(ctx);
356   }
357 
358   memcpy(ctx->tmp_buf, &parent, sizeof(void*));
359   memcpy(ctx->tmp_buf + sizeof(void*), key.data, key.size);
360   return upb_StringView_FromDataAndSize(ctx->tmp_buf, need);
361 }
362 
_upb_DefBuilder_GetOrCreateFeatureSet(upb_DefBuilder * ctx,const UPB_DESC (FeatureSet *)parent,upb_StringView key,UPB_DESC (FeatureSet **)set)363 bool _upb_DefBuilder_GetOrCreateFeatureSet(upb_DefBuilder* ctx,
364                                            const UPB_DESC(FeatureSet*) parent,
365                                            upb_StringView key,
366                                            UPB_DESC(FeatureSet**) set) {
367   upb_StringView k = _upb_DefBuilder_MakeKey(ctx, parent, key);
368   upb_value v;
369   if (upb_strtable_lookup2(&ctx->feature_cache, k.data, k.size, &v)) {
370     *set = upb_value_getptr(v);
371     return false;
372   }
373 
374   *set = (UPB_DESC(FeatureSet*))upb_Message_DeepClone(
375       UPB_UPCAST(parent), UPB_DESC_MINITABLE(FeatureSet), ctx->arena);
376   if (!*set) _upb_DefBuilder_OomErr(ctx);
377 
378   v = upb_value_ptr(*set);
379   if (!upb_strtable_insert(&ctx->feature_cache, k.data, k.size, v,
380                            ctx->tmp_arena)) {
381     _upb_DefBuilder_OomErr(ctx);
382   }
383 
384   return true;
385 }
386 
UPB_DESC(FeatureSet *)387 const UPB_DESC(FeatureSet*)
388     _upb_DefBuilder_DoResolveFeatures(upb_DefBuilder* ctx,
389                                       const UPB_DESC(FeatureSet*) parent,
390                                       const UPB_DESC(FeatureSet*) child,
391                                       bool is_implicit) {
392   assert(parent);
393   if (!child) return parent;
394 
395   if (child && !is_implicit &&
396       upb_FileDef_Syntax(ctx->file) != kUpb_Syntax_Editions) {
397     _upb_DefBuilder_Errf(ctx, "Features can only be specified for editions");
398   }
399 
400   UPB_DESC(FeatureSet*) resolved;
401   size_t child_size;
402   const char* child_bytes =
403       UPB_DESC(FeatureSet_serialize)(child, ctx->tmp_arena, &child_size);
404   if (!child_bytes) _upb_DefBuilder_OomErr(ctx);
405 
406   upb_StringView key = upb_StringView_FromDataAndSize(child_bytes, child_size);
407   if (!_upb_DefBuilder_GetOrCreateFeatureSet(ctx, parent, key, &resolved)) {
408     return resolved;
409   }
410 
411   upb_DecodeStatus dec_status =
412       upb_Decode(child_bytes, child_size, UPB_UPCAST(resolved),
413                  UPB_DESC_MINITABLE(FeatureSet), NULL, 0, ctx->arena);
414   if (dec_status != kUpb_DecodeStatus_Ok) _upb_DefBuilder_OomErr(ctx);
415 
416   return resolved;
417 }
418