1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "upb/reflection/internal/def_builder.h"
9
10 #include <string.h>
11
12 #include "upb/base/internal/log2.h"
13 #include "upb/base/upcast.h"
14 #include "upb/mem/alloc.h"
15 #include "upb/message/copy.h"
16 #include "upb/reflection/def_pool.h"
17 #include "upb/reflection/def_type.h"
18 #include "upb/reflection/field_def.h"
19 #include "upb/reflection/file_def.h"
20 #include "upb/reflection/internal/strdup2.h"
21 #include "upb/wire/decode.h"
22
23 // Must be last.
24 #include "upb/port/def.inc"
25
26 /* The upb core does not generally have a concept of default instances. However
27 * for descriptor options we make an exception since the max size is known and
28 * modest (<200 bytes). All types can share a default instance since it is
29 * initialized to zeroes.
30 *
31 * We have to allocate an extra pointer for upb's internal metadata. */
32 static UPB_ALIGN_AS(8) const
33 char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
34 const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)];
35
_upb_DefBuilder_FullToShort(const char * fullname)36 const char* _upb_DefBuilder_FullToShort(const char* fullname) {
37 const char* p;
38
39 if (fullname == NULL) {
40 return NULL;
41 } else if ((p = strrchr(fullname, '.')) == NULL) {
42 /* No '.' in the name, return the full string. */
43 return fullname;
44 } else {
45 /* Return one past the last '.'. */
46 return p + 1;
47 }
48 }
49
_upb_DefBuilder_FailJmp(upb_DefBuilder * ctx)50 void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); }
51
_upb_DefBuilder_Errf(upb_DefBuilder * ctx,const char * fmt,...)52 void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) {
53 va_list argp;
54 va_start(argp, fmt);
55 upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
56 va_end(argp);
57 _upb_DefBuilder_FailJmp(ctx);
58 }
59
_upb_DefBuilder_OomErr(upb_DefBuilder * ctx)60 void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
61 upb_Status_SetErrorMessage(ctx->status, "out of memory");
62 _upb_DefBuilder_FailJmp(ctx);
63 }
64
65 // Verify a relative identifier string. The loop is branchless for speed.
_upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder * ctx,upb_StringView name)66 static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
67 upb_StringView name) {
68 bool good = name.size > 0;
69
70 for (size_t i = 0; i < name.size; i++) {
71 const char c = name.data[i];
72 const char d = c | 0x20; // force lowercase
73 const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
74 const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
75
76 good &= is_alpha | is_numer;
77 }
78
79 if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
80 }
81
_upb_DefBuilder_MakeFullName(upb_DefBuilder * ctx,const char * prefix,upb_StringView name)82 const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
83 const char* prefix,
84 upb_StringView name) {
85 _upb_DefBuilder_CheckIdentNotFull(ctx, name);
86 if (prefix) {
87 // ret = prefix + '.' + name;
88 size_t n = strlen(prefix);
89 char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2);
90 strcpy(ret, prefix);
91 ret[n] = '.';
92 memcpy(&ret[n + 1], name.data, name.size);
93 ret[n + 1 + name.size] = '\0';
94 return ret;
95 } else {
96 char* ret = upb_strdup2(name.data, name.size, ctx->arena);
97 if (!ret) _upb_DefBuilder_OomErr(ctx);
98 return ret;
99 }
100 }
101
remove_component(char * base,size_t * len)102 static bool remove_component(char* base, size_t* len) {
103 if (*len == 0) return false;
104
105 for (size_t i = *len - 1; i > 0; i--) {
106 if (base[i] == '.') {
107 *len = i;
108 return true;
109 }
110 }
111
112 *len = 0;
113 return true;
114 }
115
_upb_DefBuilder_ResolveAny(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t * type)116 const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx,
117 const char* from_name_dbg,
118 const char* base, upb_StringView sym,
119 upb_deftype_t* type) {
120 if (sym.size == 0) goto notfound;
121 upb_value v;
122 if (sym.data[0] == '.') {
123 // Symbols starting with '.' are absolute, so we do a single lookup.
124 // Slice to omit the leading '.'
125 if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) {
126 goto notfound;
127 }
128 } else {
129 // Remove components from base until we find an entry or run out.
130 size_t baselen = base ? strlen(base) : 0;
131 char* tmp = upb_gmalloc(sym.size + baselen + 1);
132 while (1) {
133 char* p = tmp;
134 if (baselen) {
135 memcpy(p, base, baselen);
136 p[baselen] = '.';
137 p += baselen + 1;
138 }
139 memcpy(p, sym.data, sym.size);
140 p += sym.size;
141 if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) {
142 break;
143 }
144 if (!remove_component(tmp, &baselen)) {
145 upb_gfree(tmp);
146 goto notfound;
147 }
148 }
149 upb_gfree(tmp);
150 }
151
152 *type = _upb_DefType_Type(v);
153 return _upb_DefType_Unpack(v, *type);
154
155 notfound:
156 _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
157 UPB_STRINGVIEW_ARGS(sym));
158 }
159
_upb_DefBuilder_Resolve(upb_DefBuilder * ctx,const char * from_name_dbg,const char * base,upb_StringView sym,upb_deftype_t type)160 const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx,
161 const char* from_name_dbg, const char* base,
162 upb_StringView sym, upb_deftype_t type) {
163 upb_deftype_t found_type;
164 const void* ret =
165 _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type);
166 if (ret && found_type != type) {
167 _upb_DefBuilder_Errf(ctx,
168 "type mismatch when resolving %s: couldn't find "
169 "name " UPB_STRINGVIEW_FORMAT " with type=%d",
170 from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
171 }
172 return ret;
173 }
174
175 // Per ASCII this will lower-case a letter. If the result is a letter, the
176 // input was definitely a letter. If the output is not a letter, this may
177 // have transformed the character unpredictably.
upb_ascii_lower(char ch)178 static char upb_ascii_lower(char ch) { return ch | 0x20; }
179
180 // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
upb_isbetween(uint8_t c,uint8_t low,uint8_t high)181 static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
182 return low <= c && c <= high;
183 }
184
upb_isletter(char c)185 static bool upb_isletter(char c) {
186 char lower = upb_ascii_lower(c);
187 return upb_isbetween(lower, 'a', 'z') || c == '_';
188 }
189
upb_isalphanum(char c)190 static bool upb_isalphanum(char c) {
191 return upb_isletter(c) || upb_isbetween(c, '0', '9');
192 }
193
TryGetChar(const char ** src,const char * end,char * ch)194 static bool TryGetChar(const char** src, const char* end, char* ch) {
195 if (*src == end) return false;
196 *ch = **src;
197 *src += 1;
198 return true;
199 }
200
TryGetHexDigit(const char ** src,const char * end)201 static int TryGetHexDigit(const char** src, const char* end) {
202 char ch;
203 if (!TryGetChar(src, end, &ch)) return -1;
204 if ('0' <= ch && ch <= '9') {
205 return ch - '0';
206 }
207 ch = upb_ascii_lower(ch);
208 if ('a' <= ch && ch <= 'f') {
209 return ch - 'a' + 0xa;
210 }
211 *src -= 1; // Char wasn't actually a hex digit.
212 return -1;
213 }
214
upb_DefBuilder_ParseHexEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)215 static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
216 const upb_FieldDef* f,
217 const char** src, const char* end) {
218 int hex_digit = TryGetHexDigit(src, end);
219 if (hex_digit < 0) {
220 _upb_DefBuilder_Errf(
221 ctx, "\\x must be followed by at least one hex digit (field='%s')",
222 upb_FieldDef_FullName(f));
223 return 0;
224 }
225 unsigned int ret = hex_digit;
226 while ((hex_digit = TryGetHexDigit(src, end)) >= 0) {
227 ret = (ret << 4) | hex_digit;
228 }
229 if (ret > 0xff) {
230 _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
231 upb_FieldDef_FullName(f));
232 return 0;
233 }
234 return ret;
235 }
236
TryGetOctalDigit(const char ** src,const char * end)237 static char TryGetOctalDigit(const char** src, const char* end) {
238 char ch;
239 if (!TryGetChar(src, end, &ch)) return -1;
240 if ('0' <= ch && ch <= '7') {
241 return ch - '0';
242 }
243 *src -= 1; // Char wasn't actually an octal digit.
244 return -1;
245 }
246
upb_DefBuilder_ParseOctalEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)247 static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx,
248 const upb_FieldDef* f,
249 const char** src, const char* end) {
250 char ch = 0;
251 for (int i = 0; i < 3; i++) {
252 char digit;
253 if ((digit = TryGetOctalDigit(src, end)) >= 0) {
254 ch = (ch << 3) | digit;
255 }
256 }
257 return ch;
258 }
259
_upb_DefBuilder_ParseEscape(upb_DefBuilder * ctx,const upb_FieldDef * f,const char ** src,const char * end)260 char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f,
261 const char** src, const char* end) {
262 char ch;
263 if (!TryGetChar(src, end, &ch)) {
264 _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s",
265 upb_FieldDef_FullName(f));
266 return 0;
267 }
268 switch (ch) {
269 case 'a':
270 return '\a';
271 case 'b':
272 return '\b';
273 case 'f':
274 return '\f';
275 case 'n':
276 return '\n';
277 case 'r':
278 return '\r';
279 case 't':
280 return '\t';
281 case 'v':
282 return '\v';
283 case '\\':
284 return '\\';
285 case '\'':
286 return '\'';
287 case '\"':
288 return '\"';
289 case '?':
290 return '\?';
291 case 'x':
292 case 'X':
293 return upb_DefBuilder_ParseHexEscape(ctx, f, src, end);
294 case '0':
295 case '1':
296 case '2':
297 case '3':
298 case '4':
299 case '5':
300 case '6':
301 case '7':
302 *src -= 1;
303 return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end);
304 }
305 _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch);
306 }
307
_upb_DefBuilder_CheckIdentSlow(upb_DefBuilder * ctx,upb_StringView name,bool full)308 void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
309 bool full) {
310 const char* str = name.data;
311 const size_t len = name.size;
312 bool start = true;
313 for (size_t i = 0; i < len; i++) {
314 const char c = str[i];
315 if (c == '.') {
316 if (start || !full) {
317 _upb_DefBuilder_Errf(
318 ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")",
319 UPB_STRINGVIEW_ARGS(name));
320 }
321 start = true;
322 } else if (start) {
323 if (!upb_isletter(c)) {
324 _upb_DefBuilder_Errf(ctx,
325 "invalid name: path components must start with a "
326 "letter (" UPB_STRINGVIEW_FORMAT ")",
327 UPB_STRINGVIEW_ARGS(name));
328 }
329 start = false;
330 } else if (!upb_isalphanum(c)) {
331 _upb_DefBuilder_Errf(
332 ctx,
333 "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT
334 ")",
335 UPB_STRINGVIEW_ARGS(name));
336 }
337 }
338 if (start) {
339 _upb_DefBuilder_Errf(ctx,
340 "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")",
341 UPB_STRINGVIEW_ARGS(name));
342 }
343
344 // We should never reach this point.
345 UPB_ASSERT(false);
346 }
347
_upb_DefBuilder_MakeKey(upb_DefBuilder * ctx,const UPB_DESC (FeatureSet *)parent,upb_StringView key)348 upb_StringView _upb_DefBuilder_MakeKey(upb_DefBuilder* ctx,
349 const UPB_DESC(FeatureSet*) parent,
350 upb_StringView key) {
351 size_t need = key.size + sizeof(void*);
352 if (ctx->tmp_buf_size < need) {
353 ctx->tmp_buf_size = UPB_MAX(64, upb_Log2Ceiling(need));
354 ctx->tmp_buf = upb_Arena_Malloc(ctx->tmp_arena, ctx->tmp_buf_size);
355 if (!ctx->tmp_buf) _upb_DefBuilder_OomErr(ctx);
356 }
357
358 memcpy(ctx->tmp_buf, &parent, sizeof(void*));
359 memcpy(ctx->tmp_buf + sizeof(void*), key.data, key.size);
360 return upb_StringView_FromDataAndSize(ctx->tmp_buf, need);
361 }
362
_upb_DefBuilder_GetOrCreateFeatureSet(upb_DefBuilder * ctx,const UPB_DESC (FeatureSet *)parent,upb_StringView key,UPB_DESC (FeatureSet **)set)363 bool _upb_DefBuilder_GetOrCreateFeatureSet(upb_DefBuilder* ctx,
364 const UPB_DESC(FeatureSet*) parent,
365 upb_StringView key,
366 UPB_DESC(FeatureSet**) set) {
367 upb_StringView k = _upb_DefBuilder_MakeKey(ctx, parent, key);
368 upb_value v;
369 if (upb_strtable_lookup2(&ctx->feature_cache, k.data, k.size, &v)) {
370 *set = upb_value_getptr(v);
371 return false;
372 }
373
374 *set = (UPB_DESC(FeatureSet*))upb_Message_DeepClone(
375 UPB_UPCAST(parent), UPB_DESC_MINITABLE(FeatureSet), ctx->arena);
376 if (!*set) _upb_DefBuilder_OomErr(ctx);
377
378 v = upb_value_ptr(*set);
379 if (!upb_strtable_insert(&ctx->feature_cache, k.data, k.size, v,
380 ctx->tmp_arena)) {
381 _upb_DefBuilder_OomErr(ctx);
382 }
383
384 return true;
385 }
386
UPB_DESC(FeatureSet *)387 const UPB_DESC(FeatureSet*)
388 _upb_DefBuilder_DoResolveFeatures(upb_DefBuilder* ctx,
389 const UPB_DESC(FeatureSet*) parent,
390 const UPB_DESC(FeatureSet*) child,
391 bool is_implicit) {
392 assert(parent);
393 if (!child) return parent;
394
395 if (child && !is_implicit &&
396 upb_FileDef_Syntax(ctx->file) != kUpb_Syntax_Editions) {
397 _upb_DefBuilder_Errf(ctx, "Features can only be specified for editions");
398 }
399
400 UPB_DESC(FeatureSet*) resolved;
401 size_t child_size;
402 const char* child_bytes =
403 UPB_DESC(FeatureSet_serialize)(child, ctx->tmp_arena, &child_size);
404 if (!child_bytes) _upb_DefBuilder_OomErr(ctx);
405
406 upb_StringView key = upb_StringView_FromDataAndSize(child_bytes, child_size);
407 if (!_upb_DefBuilder_GetOrCreateFeatureSet(ctx, parent, key, &resolved)) {
408 return resolved;
409 }
410
411 upb_DecodeStatus dec_status =
412 upb_Decode(child_bytes, child_size, UPB_UPCAST(resolved),
413 UPB_DESC_MINITABLE(FeatureSet), NULL, 0, ctx->arena);
414 if (dec_status != kUpb_DecodeStatus_Ok) _upb_DefBuilder_OomErr(ctx);
415
416 return resolved;
417 }
418