xref: /aosp_15_r20/external/pigweed/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1#!/usr/bin/env python3
2# Copyright 2022 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Fixes identifiers that would cause compiler errors in generated C++ code."""
16
17from typing import Set
18
19# Set of words that can't be used as identifiers in the generated code. Many of
20# these are valid identifiers in proto syntax, but they need special handling in
21# the generated C++ code.
22#
23# Note: This is primarily used for "if x in y" operations, hence the use of a
24# set rather than a list.
25PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = {
26    # Identifiers that conflict with the codegen internals when used in certain
27    # contexts:
28    "Fields",
29    "Message",
30    # C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
31    "alignas",
32    "alignof",
33    "and",
34    "and_eq",
35    "asm",
36    "atomic_cancel",
37    "atomic_commit",
38    "atomic_noexcept",
39    "auto",
40    "bitand",
41    "bitor",
42    "bool",
43    "break",
44    "case",
45    "catch",
46    "char",
47    "char8_t",
48    "char16_t",
49    "char32_t",
50    "class",
51    "compl",
52    "concept",
53    "const",
54    "consteval",
55    "constexpr",
56    "constinit",
57    "const_cast",
58    "continue",
59    "co_await",
60    "co_return",
61    "co_yield",
62    "decltype",
63    "default",
64    "delete",
65    "do",
66    "double",
67    "dynamic_cast",
68    "else",
69    "enum",
70    "explicit",
71    "export",
72    "extern",
73    "false",
74    "float",
75    "for",
76    "friend",
77    "goto",
78    "if",
79    "inline",
80    "int",
81    "long",
82    "mutable",
83    "namespace",
84    "new",
85    "noexcept",
86    "not",
87    "not_eq",
88    "nullptr",
89    "operator",
90    "or",
91    "or_eq",
92    "private",
93    "protected",
94    "public",
95    "reflexpr",
96    "register",
97    "reinterpret_cast",
98    "requires",
99    "return",
100    "short",
101    "signed",
102    "sizeof",
103    "static",
104    "static_assert",
105    "static_cast",
106    "struct",
107    "switch",
108    "synchronized",
109    "template",
110    "this",
111    "thread_local",
112    "throw",
113    "true",
114    "try",
115    "typedef",
116    "typeid",
117    "typename",
118    "union",
119    "unsigned",
120    "using",
121    "virtual",
122    "void",
123    "volatile",
124    "wchar_t",
125    "while",
126    "xor",
127    "xor_eq",
128    # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
129    # excluding the following:
130    # - Function-like macros, which have unambiguous syntax and thus won't
131    #   conflict with generated symbols.
132    # - Macros that couldn't be made valid by appending underscores, namely
133    #   those containing "__" or starting with "_[A-Z]". C++ reserves all such
134    #   identifiers for the compiler, and appending underscores wouldn't change
135    #   that.
136    "ATOMIC_BOOL_LOCK_FREE",
137    "ATOMIC_CHAR_LOCK_FREE",
138    "ATOMIC_CHAR16_T_LOCK_FREE",
139    "ATOMIC_CHAR32_T_LOCK_FREE",
140    "ATOMIC_CHAR8_T_LOCK_FREE",
141    "ATOMIC_FLAG_INIT",
142    "ATOMIC_INT_LOCK_FREE",
143    "ATOMIC_LLONG_LOCK_FREE",
144    "ATOMIC_LONG_LOCK_FREE",
145    "ATOMIC_POINTER_LOCK_FREE",
146    "ATOMIC_SHORT_LOCK_FREE",
147    "ATOMIC_WCHAR_T_LOCK_FREE",
148    "BUFSIZ",
149    "CHAR_BIT",
150    "CHAR_MAX",
151    "CHAR_MIN",
152    "CLOCKS_PER_SEC",
153    "DBL_DECIMAL_DIG",
154    "DBL_DIG",
155    "DBL_EPSILON",
156    "DBL_HAS_SUBNORM",
157    "DBL_MANT_DIG",
158    "DBL_MAX",
159    "DBL_MAX_10_EXP",
160    "DBL_MAX_EXP",
161    "DBL_MIN",
162    "DBL_MIN_10_EXP",
163    "DBL_MIN_EXP",
164    "DBL_TRUE_MIN",
165    "DECIMAL_DIG",
166    "E2BIG",
167    "EACCES",
168    "EADDRINUSE",
169    "EADDRNOTAVAIL",
170    "EAFNOSUPPORT",
171    "EAGAIN",
172    "EALREADY",
173    "EBADF",
174    "EBADMSG",
175    "EBUSY",
176    "ECANCELED",
177    "ECHILD",
178    "ECONNABORTED",
179    "ECONNREFUSED",
180    "ECONNRESET",
181    "EDEADLK",
182    "EDESTADDRREQ",
183    "EDOM",
184    "EEXIST",
185    "EFAULT",
186    "EFBIG",
187    "EHOSTUNREACH",
188    "EIDRM",
189    "EILSEQ",
190    "EINPROGRESS",
191    "EINTR",
192    "EINVAL",
193    "EIO",
194    "EISCONN",
195    "EISDIR",
196    "ELOOP",
197    "EMFILE",
198    "EMLINK",
199    "EMSGSIZE",
200    "ENAMETOOLONG",
201    "ENETDOWN",
202    "ENETRESET",
203    "ENETUNREACH",
204    "ENFILE",
205    "ENOBUFS",
206    "ENODATA",
207    "ENODEV",
208    "ENOENT",
209    "ENOEXEC",
210    "ENOLCK",
211    "ENOLINK",
212    "ENOMEM",
213    "ENOMSG",
214    "ENOPROTOOPT",
215    "ENOSPC",
216    "ENOSR",
217    "ENOSTR",
218    "ENOSYS",
219    "ENOTCONN",
220    "ENOTDIR",
221    "ENOTEMPTY",
222    "ENOTRECOVERABLE",
223    "ENOTSOCK",
224    "ENOTSUP",
225    "ENOTTY",
226    "ENXIO",
227    "EOF",
228    "EOPNOTSUPP",
229    "EOVERFLOW",
230    "EOWNERDEAD",
231    "EPERM",
232    "EPIPE",
233    "EPROTO",
234    "EPROTONOSUPPORT",
235    "EPROTOTYPE",
236    "ERANGE",
237    "EROFS",
238    "errno",
239    "ESPIPE",
240    "ESRCH",
241    "ETIME",
242    "ETIMEDOUT",
243    "ETXTBSY",
244    "EWOULDBLOCK",
245    "EXDEV",
246    "EXIT_FAILURE",
247    "EXIT_SUCCESS",
248    "FE_ALL_EXCEPT",
249    "FE_DFL_ENV",
250    "FE_DIVBYZERO",
251    "FE_DOWNWARD",
252    "FE_INEXACT",
253    "FE_INVALID",
254    "FE_OVERFLOW",
255    "FE_TONEAREST",
256    "FE_TOWARDZERO",
257    "FE_UNDERFLOW",
258    "FE_UPWARD",
259    "FILENAME_MAX",
260    "FLT_DECIMAL_DIG",
261    "FLT_DIG",
262    "FLT_EPSILON",
263    "FLT_EVAL_METHOD",
264    "FLT_HAS_SUBNORM",
265    "FLT_MANT_DIG",
266    "FLT_MAX",
267    "FLT_MAX_10_EXP",
268    "FLT_MAX_EXP",
269    "FLT_MIN",
270    "FLT_MIN_10_EXP",
271    "FLT_MIN_EXP",
272    "FLT_RADIX",
273    "FLT_ROUNDS",
274    "FLT_TRUE_MIN",
275    "FOPEN_MAX",
276    "FP_FAST_FMA",
277    "FP_FAST_FMAF",
278    "FP_FAST_FMAL",
279    "FP_ILOGB0",
280    "FP_ILOGBNAN",
281    "FP_SUBNORMAL",
282    "FP_ZERO",
283    "FP_INFINITE",
284    "FP_NAN",
285    "FP_NORMAL",
286    "HUGE_VAL",
287    "HUGE_VALF",
288    "HUGE_VALL",
289    "INFINITY",
290    "INT_FAST16_MAX",
291    "INT_FAST16_MIN",
292    "INT_FAST32_MAX",
293    "INT_FAST32_MIN",
294    "INT_FAST64_MAX",
295    "INT_FAST64_MIN",
296    "INT_FAST8_MAX",
297    "INT_FAST8_MIN",
298    "INT_LEAST16_MAX",
299    "INT_LEAST16_MIN",
300    "INT_LEAST32_MAX",
301    "INT_LEAST32_MIN",
302    "INT_LEAST64_MAX",
303    "INT_LEAST64_MIN",
304    "INT_LEAST8_MAX",
305    "INT_LEAST8_MIN",
306    "INT_MAX",
307    "INT_MIN",
308    "INT16_MAX",
309    "INT16_MIN",
310    "INT32_MAX",
311    "INT32_MIN",
312    "INT64_MAX",
313    "INT64_MIN",
314    "INT8_MAX",
315    "INT8_MIN",
316    "INTMAX_MAX",
317    "INTMAX_MIN",
318    "INTPTR_MAX",
319    "INTPTR_MIN",
320    "L_tmpnam",
321    "LC_ALL",
322    "LC_COLLATE",
323    "LC_CTYPE",
324    "LC_MONETARY",
325    "LC_NUMERIC",
326    "LC_TIME",
327    "LDBL_DECIMAL_DIG",
328    "LDBL_DIG",
329    "LDBL_EPSILON",
330    "LDBL_HAS_SUBNORM",
331    "LDBL_MANT_DIG",
332    "LDBL_MAX",
333    "LDBL_MAX_10_EXP",
334    "LDBL_MAX_EXP",
335    "LDBL_MIN",
336    "LDBL_MIN_10_EXP",
337    "LDBL_MIN_EXP",
338    "LDBL_TRUE_MIN",
339    "LLONG_MAX",
340    "LLONG_MIN",
341    "LONG_MAX",
342    "LONG_MIN",
343    "MATH_ERREXCEPT",
344    "math_errhandling",
345    "MATH_ERRNO",
346    "MB_CUR_MAX",
347    "MB_LEN_MAX",
348    "NAN",
349    "NULL",
350    "ONCE_FLAG_INIT",
351    "PRId16",
352    "PRId32",
353    "PRId64",
354    "PRId8",
355    "PRIdFAST16",
356    "PRIdFAST32",
357    "PRIdFAST64",
358    "PRIdFAST8",
359    "PRIdLEAST16",
360    "PRIdLEAST32",
361    "PRIdLEAST64",
362    "PRIdLEAST8",
363    "PRIdMAX",
364    "PRIdPTR",
365    "PRIi16",
366    "PRIi32",
367    "PRIi64",
368    "PRIi8",
369    "PRIiFAST16",
370    "PRIiFAST32",
371    "PRIiFAST64",
372    "PRIiFAST8",
373    "PRIiLEAST16",
374    "PRIiLEAST32",
375    "PRIiLEAST64",
376    "PRIiLEAST8",
377    "PRIiMAX",
378    "PRIiPTR",
379    "PRIo16",
380    "PRIo32",
381    "PRIo64",
382    "PRIo8",
383    "PRIoFAST16",
384    "PRIoFAST32",
385    "PRIoFAST64",
386    "PRIoFAST8",
387    "PRIoLEAST16",
388    "PRIoLEAST32",
389    "PRIoLEAST64",
390    "PRIoLEAST8",
391    "PRIoMAX",
392    "PRIoPTR",
393    "PRIu16",
394    "PRIu32",
395    "PRIu64",
396    "PRIu8",
397    "PRIuFAST16",
398    "PRIuFAST32",
399    "PRIuFAST64",
400    "PRIuFAST8",
401    "PRIuLEAST16",
402    "PRIuLEAST32",
403    "PRIuLEAST64",
404    "PRIuLEAST8",
405    "PRIuMAX",
406    "PRIuPTR",
407    "PRIx16",
408    "PRIX16",
409    "PRIx32",
410    "PRIX32",
411    "PRIx64",
412    "PRIX64",
413    "PRIx8",
414    "PRIX8",
415    "PRIxFAST16",
416    "PRIXFAST16",
417    "PRIxFAST32",
418    "PRIXFAST32",
419    "PRIxFAST64",
420    "PRIXFAST64",
421    "PRIxFAST8",
422    "PRIXFAST8",
423    "PRIxLEAST16",
424    "PRIXLEAST16",
425    "PRIxLEAST32",
426    "PRIXLEAST32",
427    "PRIxLEAST64",
428    "PRIXLEAST64",
429    "PRIxLEAST8",
430    "PRIXLEAST8",
431    "PRIxMAX",
432    "PRIXMAX",
433    "PRIxPTR",
434    "PRIXPTR",
435    "PTRDIFF_MAX",
436    "PTRDIFF_MIN",
437    "RAND_MAX",
438    "SCHAR_MAX",
439    "SCHAR_MIN",
440    "SCNd16",
441    "SCNd32",
442    "SCNd64",
443    "SCNd8",
444    "SCNdFAST16",
445    "SCNdFAST32",
446    "SCNdFAST64",
447    "SCNdFAST8",
448    "SCNdLEAST16",
449    "SCNdLEAST32",
450    "SCNdLEAST64",
451    "SCNdLEAST8",
452    "SCNdMAX",
453    "SCNdPTR",
454    "SCNi16",
455    "SCNi32",
456    "SCNi64",
457    "SCNi8",
458    "SCNiFAST16",
459    "SCNiFAST32",
460    "SCNiFAST64",
461    "SCNiFAST8",
462    "SCNiLEAST16",
463    "SCNiLEAST32",
464    "SCNiLEAST64",
465    "SCNiLEAST8",
466    "SCNiMAX",
467    "SCNiPTR",
468    "SCNo16",
469    "SCNo32",
470    "SCNo64",
471    "SCNo8",
472    "SCNoFAST16",
473    "SCNoFAST32",
474    "SCNoFAST64",
475    "SCNoFAST8",
476    "SCNoLEAST16",
477    "SCNoLEAST32",
478    "SCNoLEAST64",
479    "SCNoLEAST8",
480    "SCNoMAX",
481    "SCNoPTR",
482    "SCNu16",
483    "SCNu32",
484    "SCNu64",
485    "SCNu8",
486    "SCNuFAST16",
487    "SCNuFAST32",
488    "SCNuFAST64",
489    "SCNuFAST8",
490    "SCNuLEAST16",
491    "SCNuLEAST32",
492    "SCNuLEAST64",
493    "SCNuLEAST8",
494    "SCNuMAX",
495    "SCNuPTR",
496    "SCNx16",
497    "SCNx32",
498    "SCNx64",
499    "SCNx8",
500    "SCNxFAST16",
501    "SCNxFAST32",
502    "SCNxFAST64",
503    "SCNxFAST8",
504    "SCNxLEAST16",
505    "SCNxLEAST32",
506    "SCNxLEAST64",
507    "SCNxLEAST8",
508    "SCNxMAX",
509    "SCNxPTR",
510    "SEEK_CUR",
511    "SEEK_END",
512    "SEEK_SET",
513    "SHRT_MAX",
514    "SHRT_MIN",
515    "SIG_ATOMIC_MAX",
516    "SIG_ATOMIC_MIN",
517    "SIG_DFL",
518    "SIG_ERR",
519    "SIG_IGN",
520    "SIGABRT",
521    "SIGFPE",
522    "SIGILL",
523    "SIGINT",
524    "SIGSEGV",
525    "SIGTERM",
526    "SIZE_MAX",
527    "stderr",
528    "stdin",
529    "stdout",
530    "TIME_UTC",
531    "TMP_MAX",
532    "UCHAR_MAX",
533    "UINT_FAST16_MAX",
534    "UINT_FAST32_MAX",
535    "UINT_FAST64_MAX",
536    "UINT_FAST8_MAX",
537    "UINT_LEAST16_MAX",
538    "UINT_LEAST32_MAX",
539    "UINT_LEAST64_MAX",
540    "UINT_LEAST8_MAX",
541    "UINT_MAX",
542    "UINT16_MAX",
543    "UINT32_MAX",
544    "UINT64_MAX",
545    "UINT8_MAX",
546    "UINTMAX_MAX",
547    "UINTPTR_MAX",
548    "ULLONG_MAX",
549    "ULONG_MAX",
550    "USHRT_MAX",
551    "WCHAR_MAX",
552    "WCHAR_MIN",
553    "WEOF",
554    "WINT_MAX",
555    "WINT_MIN",
556}
557
558
559def _transform_invalid_identifier(invalid_identifier: str) -> str:
560    """Applies a transformation to an invalid C++ identifier to make it valid.
561
562    Currently, this simply appends an underscore. This addresses the vast
563    majority of realistic cases, but there are some caveats; see
564    `fix_cc_identifier` function documentation for details.
565    """
566    return f"{invalid_identifier}_"
567
568
569def fix_cc_identifier(proto_identifier: str) -> str:
570    """Returns an adjusted form of the identifier for use in generated C++ code.
571
572    If the given identifier is already valid for use in the generated C++ code,
573    it will be returned as-is. If the identifier is a C++ keyword or a
574    preprocessor macro from the standard library, the returned identifier will
575    be modified slightly in order to avoid compiler errors.
576
577    Currently, this simply appends an underscore if necessary. This handles the
578    vast majority of realistic cases, though it doesn't attempt to fix
579    identifiers that the C++ spec reserves for the compiler's use.
580
581    For reference, C++ reserves two categories of identifiers for the compiler:
582    - Any identifier that contains the substring "__" anywhere in it.
583    - Any identifier with an underscore for the first character and a capital
584      letter for the second character.
585    """
586    return (
587        _transform_invalid_identifier(proto_identifier)  #
588        if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS  #
589        else proto_identifier
590    )
591
592
593def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
594    """Returns an adjusted form of the enum-value name for use in generated C++.
595
596    Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
597    checks it for collisions with C++ keywords and standard-library macros.
598    Returns a potentially modified version of the input in order to fix
599    collisions if any are found.
600
601    Note that, although the code generation also creates enum-value aliases in
602    kHungarianNotationPascalCase, symbols of that form never conflict with
603    keywords or standard-library macros in C++20. Therefore, only the
604    UPPER_SNAKE_CASE versions need to be checked for conflicts.
605
606    See `fix_cc_identifier` for further details.
607    """
608    upper_snake_case = proto_enum_entry.upper()
609    return (
610        _transform_invalid_identifier(proto_enum_entry)  #
611        if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS  #
612        else proto_enum_entry
613    )
614