1#!/usr/bin/env python3 2# Copyright 2022 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Fixes identifiers that would cause compiler errors in generated C++ code.""" 16 17from typing import Set 18 19# Set of words that can't be used as identifiers in the generated code. Many of 20# these are valid identifiers in proto syntax, but they need special handling in 21# the generated C++ code. 22# 23# Note: This is primarily used for "if x in y" operations, hence the use of a 24# set rather than a list. 25PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = { 26 # Identifiers that conflict with the codegen internals when used in certain 27 # contexts: 28 "Fields", 29 "Message", 30 # C++20 keywords (https://en.cppreference.com/w/cpp/keyword): 31 "alignas", 32 "alignof", 33 "and", 34 "and_eq", 35 "asm", 36 "atomic_cancel", 37 "atomic_commit", 38 "atomic_noexcept", 39 "auto", 40 "bitand", 41 "bitor", 42 "bool", 43 "break", 44 "case", 45 "catch", 46 "char", 47 "char8_t", 48 "char16_t", 49 "char32_t", 50 "class", 51 "compl", 52 "concept", 53 "const", 54 "consteval", 55 "constexpr", 56 "constinit", 57 "const_cast", 58 "continue", 59 "co_await", 60 "co_return", 61 "co_yield", 62 "decltype", 63 "default", 64 "delete", 65 "do", 66 "double", 67 "dynamic_cast", 68 "else", 69 "enum", 70 "explicit", 71 "export", 72 "extern", 73 "false", 74 "float", 75 "for", 76 "friend", 77 "goto", 78 "if", 79 "inline", 80 "int", 81 "long", 82 "mutable", 83 "namespace", 84 "new", 85 "noexcept", 86 "not", 87 "not_eq", 88 "nullptr", 89 "operator", 90 "or", 91 "or_eq", 92 "private", 93 "protected", 94 "public", 95 "reflexpr", 96 "register", 97 "reinterpret_cast", 98 "requires", 99 "return", 100 "short", 101 "signed", 102 "sizeof", 103 "static", 104 "static_assert", 105 "static_cast", 106 "struct", 107 "switch", 108 "synchronized", 109 "template", 110 "this", 111 "thread_local", 112 "throw", 113 "true", 114 "try", 115 "typedef", 116 "typeid", 117 "typename", 118 "union", 119 "unsigned", 120 "using", 121 "virtual", 122 "void", 123 "volatile", 124 "wchar_t", 125 "while", 126 "xor", 127 "xor_eq", 128 # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro), 129 # excluding the following: 130 # - Function-like macros, which have unambiguous syntax and thus won't 131 # conflict with generated symbols. 132 # - Macros that couldn't be made valid by appending underscores, namely 133 # those containing "__" or starting with "_[A-Z]". C++ reserves all such 134 # identifiers for the compiler, and appending underscores wouldn't change 135 # that. 136 "ATOMIC_BOOL_LOCK_FREE", 137 "ATOMIC_CHAR_LOCK_FREE", 138 "ATOMIC_CHAR16_T_LOCK_FREE", 139 "ATOMIC_CHAR32_T_LOCK_FREE", 140 "ATOMIC_CHAR8_T_LOCK_FREE", 141 "ATOMIC_FLAG_INIT", 142 "ATOMIC_INT_LOCK_FREE", 143 "ATOMIC_LLONG_LOCK_FREE", 144 "ATOMIC_LONG_LOCK_FREE", 145 "ATOMIC_POINTER_LOCK_FREE", 146 "ATOMIC_SHORT_LOCK_FREE", 147 "ATOMIC_WCHAR_T_LOCK_FREE", 148 "BUFSIZ", 149 "CHAR_BIT", 150 "CHAR_MAX", 151 "CHAR_MIN", 152 "CLOCKS_PER_SEC", 153 "DBL_DECIMAL_DIG", 154 "DBL_DIG", 155 "DBL_EPSILON", 156 "DBL_HAS_SUBNORM", 157 "DBL_MANT_DIG", 158 "DBL_MAX", 159 "DBL_MAX_10_EXP", 160 "DBL_MAX_EXP", 161 "DBL_MIN", 162 "DBL_MIN_10_EXP", 163 "DBL_MIN_EXP", 164 "DBL_TRUE_MIN", 165 "DECIMAL_DIG", 166 "E2BIG", 167 "EACCES", 168 "EADDRINUSE", 169 "EADDRNOTAVAIL", 170 "EAFNOSUPPORT", 171 "EAGAIN", 172 "EALREADY", 173 "EBADF", 174 "EBADMSG", 175 "EBUSY", 176 "ECANCELED", 177 "ECHILD", 178 "ECONNABORTED", 179 "ECONNREFUSED", 180 "ECONNRESET", 181 "EDEADLK", 182 "EDESTADDRREQ", 183 "EDOM", 184 "EEXIST", 185 "EFAULT", 186 "EFBIG", 187 "EHOSTUNREACH", 188 "EIDRM", 189 "EILSEQ", 190 "EINPROGRESS", 191 "EINTR", 192 "EINVAL", 193 "EIO", 194 "EISCONN", 195 "EISDIR", 196 "ELOOP", 197 "EMFILE", 198 "EMLINK", 199 "EMSGSIZE", 200 "ENAMETOOLONG", 201 "ENETDOWN", 202 "ENETRESET", 203 "ENETUNREACH", 204 "ENFILE", 205 "ENOBUFS", 206 "ENODATA", 207 "ENODEV", 208 "ENOENT", 209 "ENOEXEC", 210 "ENOLCK", 211 "ENOLINK", 212 "ENOMEM", 213 "ENOMSG", 214 "ENOPROTOOPT", 215 "ENOSPC", 216 "ENOSR", 217 "ENOSTR", 218 "ENOSYS", 219 "ENOTCONN", 220 "ENOTDIR", 221 "ENOTEMPTY", 222 "ENOTRECOVERABLE", 223 "ENOTSOCK", 224 "ENOTSUP", 225 "ENOTTY", 226 "ENXIO", 227 "EOF", 228 "EOPNOTSUPP", 229 "EOVERFLOW", 230 "EOWNERDEAD", 231 "EPERM", 232 "EPIPE", 233 "EPROTO", 234 "EPROTONOSUPPORT", 235 "EPROTOTYPE", 236 "ERANGE", 237 "EROFS", 238 "errno", 239 "ESPIPE", 240 "ESRCH", 241 "ETIME", 242 "ETIMEDOUT", 243 "ETXTBSY", 244 "EWOULDBLOCK", 245 "EXDEV", 246 "EXIT_FAILURE", 247 "EXIT_SUCCESS", 248 "FE_ALL_EXCEPT", 249 "FE_DFL_ENV", 250 "FE_DIVBYZERO", 251 "FE_DOWNWARD", 252 "FE_INEXACT", 253 "FE_INVALID", 254 "FE_OVERFLOW", 255 "FE_TONEAREST", 256 "FE_TOWARDZERO", 257 "FE_UNDERFLOW", 258 "FE_UPWARD", 259 "FILENAME_MAX", 260 "FLT_DECIMAL_DIG", 261 "FLT_DIG", 262 "FLT_EPSILON", 263 "FLT_EVAL_METHOD", 264 "FLT_HAS_SUBNORM", 265 "FLT_MANT_DIG", 266 "FLT_MAX", 267 "FLT_MAX_10_EXP", 268 "FLT_MAX_EXP", 269 "FLT_MIN", 270 "FLT_MIN_10_EXP", 271 "FLT_MIN_EXP", 272 "FLT_RADIX", 273 "FLT_ROUNDS", 274 "FLT_TRUE_MIN", 275 "FOPEN_MAX", 276 "FP_FAST_FMA", 277 "FP_FAST_FMAF", 278 "FP_FAST_FMAL", 279 "FP_ILOGB0", 280 "FP_ILOGBNAN", 281 "FP_SUBNORMAL", 282 "FP_ZERO", 283 "FP_INFINITE", 284 "FP_NAN", 285 "FP_NORMAL", 286 "HUGE_VAL", 287 "HUGE_VALF", 288 "HUGE_VALL", 289 "INFINITY", 290 "INT_FAST16_MAX", 291 "INT_FAST16_MIN", 292 "INT_FAST32_MAX", 293 "INT_FAST32_MIN", 294 "INT_FAST64_MAX", 295 "INT_FAST64_MIN", 296 "INT_FAST8_MAX", 297 "INT_FAST8_MIN", 298 "INT_LEAST16_MAX", 299 "INT_LEAST16_MIN", 300 "INT_LEAST32_MAX", 301 "INT_LEAST32_MIN", 302 "INT_LEAST64_MAX", 303 "INT_LEAST64_MIN", 304 "INT_LEAST8_MAX", 305 "INT_LEAST8_MIN", 306 "INT_MAX", 307 "INT_MIN", 308 "INT16_MAX", 309 "INT16_MIN", 310 "INT32_MAX", 311 "INT32_MIN", 312 "INT64_MAX", 313 "INT64_MIN", 314 "INT8_MAX", 315 "INT8_MIN", 316 "INTMAX_MAX", 317 "INTMAX_MIN", 318 "INTPTR_MAX", 319 "INTPTR_MIN", 320 "L_tmpnam", 321 "LC_ALL", 322 "LC_COLLATE", 323 "LC_CTYPE", 324 "LC_MONETARY", 325 "LC_NUMERIC", 326 "LC_TIME", 327 "LDBL_DECIMAL_DIG", 328 "LDBL_DIG", 329 "LDBL_EPSILON", 330 "LDBL_HAS_SUBNORM", 331 "LDBL_MANT_DIG", 332 "LDBL_MAX", 333 "LDBL_MAX_10_EXP", 334 "LDBL_MAX_EXP", 335 "LDBL_MIN", 336 "LDBL_MIN_10_EXP", 337 "LDBL_MIN_EXP", 338 "LDBL_TRUE_MIN", 339 "LLONG_MAX", 340 "LLONG_MIN", 341 "LONG_MAX", 342 "LONG_MIN", 343 "MATH_ERREXCEPT", 344 "math_errhandling", 345 "MATH_ERRNO", 346 "MB_CUR_MAX", 347 "MB_LEN_MAX", 348 "NAN", 349 "NULL", 350 "ONCE_FLAG_INIT", 351 "PRId16", 352 "PRId32", 353 "PRId64", 354 "PRId8", 355 "PRIdFAST16", 356 "PRIdFAST32", 357 "PRIdFAST64", 358 "PRIdFAST8", 359 "PRIdLEAST16", 360 "PRIdLEAST32", 361 "PRIdLEAST64", 362 "PRIdLEAST8", 363 "PRIdMAX", 364 "PRIdPTR", 365 "PRIi16", 366 "PRIi32", 367 "PRIi64", 368 "PRIi8", 369 "PRIiFAST16", 370 "PRIiFAST32", 371 "PRIiFAST64", 372 "PRIiFAST8", 373 "PRIiLEAST16", 374 "PRIiLEAST32", 375 "PRIiLEAST64", 376 "PRIiLEAST8", 377 "PRIiMAX", 378 "PRIiPTR", 379 "PRIo16", 380 "PRIo32", 381 "PRIo64", 382 "PRIo8", 383 "PRIoFAST16", 384 "PRIoFAST32", 385 "PRIoFAST64", 386 "PRIoFAST8", 387 "PRIoLEAST16", 388 "PRIoLEAST32", 389 "PRIoLEAST64", 390 "PRIoLEAST8", 391 "PRIoMAX", 392 "PRIoPTR", 393 "PRIu16", 394 "PRIu32", 395 "PRIu64", 396 "PRIu8", 397 "PRIuFAST16", 398 "PRIuFAST32", 399 "PRIuFAST64", 400 "PRIuFAST8", 401 "PRIuLEAST16", 402 "PRIuLEAST32", 403 "PRIuLEAST64", 404 "PRIuLEAST8", 405 "PRIuMAX", 406 "PRIuPTR", 407 "PRIx16", 408 "PRIX16", 409 "PRIx32", 410 "PRIX32", 411 "PRIx64", 412 "PRIX64", 413 "PRIx8", 414 "PRIX8", 415 "PRIxFAST16", 416 "PRIXFAST16", 417 "PRIxFAST32", 418 "PRIXFAST32", 419 "PRIxFAST64", 420 "PRIXFAST64", 421 "PRIxFAST8", 422 "PRIXFAST8", 423 "PRIxLEAST16", 424 "PRIXLEAST16", 425 "PRIxLEAST32", 426 "PRIXLEAST32", 427 "PRIxLEAST64", 428 "PRIXLEAST64", 429 "PRIxLEAST8", 430 "PRIXLEAST8", 431 "PRIxMAX", 432 "PRIXMAX", 433 "PRIxPTR", 434 "PRIXPTR", 435 "PTRDIFF_MAX", 436 "PTRDIFF_MIN", 437 "RAND_MAX", 438 "SCHAR_MAX", 439 "SCHAR_MIN", 440 "SCNd16", 441 "SCNd32", 442 "SCNd64", 443 "SCNd8", 444 "SCNdFAST16", 445 "SCNdFAST32", 446 "SCNdFAST64", 447 "SCNdFAST8", 448 "SCNdLEAST16", 449 "SCNdLEAST32", 450 "SCNdLEAST64", 451 "SCNdLEAST8", 452 "SCNdMAX", 453 "SCNdPTR", 454 "SCNi16", 455 "SCNi32", 456 "SCNi64", 457 "SCNi8", 458 "SCNiFAST16", 459 "SCNiFAST32", 460 "SCNiFAST64", 461 "SCNiFAST8", 462 "SCNiLEAST16", 463 "SCNiLEAST32", 464 "SCNiLEAST64", 465 "SCNiLEAST8", 466 "SCNiMAX", 467 "SCNiPTR", 468 "SCNo16", 469 "SCNo32", 470 "SCNo64", 471 "SCNo8", 472 "SCNoFAST16", 473 "SCNoFAST32", 474 "SCNoFAST64", 475 "SCNoFAST8", 476 "SCNoLEAST16", 477 "SCNoLEAST32", 478 "SCNoLEAST64", 479 "SCNoLEAST8", 480 "SCNoMAX", 481 "SCNoPTR", 482 "SCNu16", 483 "SCNu32", 484 "SCNu64", 485 "SCNu8", 486 "SCNuFAST16", 487 "SCNuFAST32", 488 "SCNuFAST64", 489 "SCNuFAST8", 490 "SCNuLEAST16", 491 "SCNuLEAST32", 492 "SCNuLEAST64", 493 "SCNuLEAST8", 494 "SCNuMAX", 495 "SCNuPTR", 496 "SCNx16", 497 "SCNx32", 498 "SCNx64", 499 "SCNx8", 500 "SCNxFAST16", 501 "SCNxFAST32", 502 "SCNxFAST64", 503 "SCNxFAST8", 504 "SCNxLEAST16", 505 "SCNxLEAST32", 506 "SCNxLEAST64", 507 "SCNxLEAST8", 508 "SCNxMAX", 509 "SCNxPTR", 510 "SEEK_CUR", 511 "SEEK_END", 512 "SEEK_SET", 513 "SHRT_MAX", 514 "SHRT_MIN", 515 "SIG_ATOMIC_MAX", 516 "SIG_ATOMIC_MIN", 517 "SIG_DFL", 518 "SIG_ERR", 519 "SIG_IGN", 520 "SIGABRT", 521 "SIGFPE", 522 "SIGILL", 523 "SIGINT", 524 "SIGSEGV", 525 "SIGTERM", 526 "SIZE_MAX", 527 "stderr", 528 "stdin", 529 "stdout", 530 "TIME_UTC", 531 "TMP_MAX", 532 "UCHAR_MAX", 533 "UINT_FAST16_MAX", 534 "UINT_FAST32_MAX", 535 "UINT_FAST64_MAX", 536 "UINT_FAST8_MAX", 537 "UINT_LEAST16_MAX", 538 "UINT_LEAST32_MAX", 539 "UINT_LEAST64_MAX", 540 "UINT_LEAST8_MAX", 541 "UINT_MAX", 542 "UINT16_MAX", 543 "UINT32_MAX", 544 "UINT64_MAX", 545 "UINT8_MAX", 546 "UINTMAX_MAX", 547 "UINTPTR_MAX", 548 "ULLONG_MAX", 549 "ULONG_MAX", 550 "USHRT_MAX", 551 "WCHAR_MAX", 552 "WCHAR_MIN", 553 "WEOF", 554 "WINT_MAX", 555 "WINT_MIN", 556} 557 558 559def _transform_invalid_identifier(invalid_identifier: str) -> str: 560 """Applies a transformation to an invalid C++ identifier to make it valid. 561 562 Currently, this simply appends an underscore. This addresses the vast 563 majority of realistic cases, but there are some caveats; see 564 `fix_cc_identifier` function documentation for details. 565 """ 566 return f"{invalid_identifier}_" 567 568 569def fix_cc_identifier(proto_identifier: str) -> str: 570 """Returns an adjusted form of the identifier for use in generated C++ code. 571 572 If the given identifier is already valid for use in the generated C++ code, 573 it will be returned as-is. If the identifier is a C++ keyword or a 574 preprocessor macro from the standard library, the returned identifier will 575 be modified slightly in order to avoid compiler errors. 576 577 Currently, this simply appends an underscore if necessary. This handles the 578 vast majority of realistic cases, though it doesn't attempt to fix 579 identifiers that the C++ spec reserves for the compiler's use. 580 581 For reference, C++ reserves two categories of identifiers for the compiler: 582 - Any identifier that contains the substring "__" anywhere in it. 583 - Any identifier with an underscore for the first character and a capital 584 letter for the second character. 585 """ 586 return ( 587 _transform_invalid_identifier(proto_identifier) # 588 if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS # 589 else proto_identifier 590 ) 591 592 593def fix_cc_enum_value_name(proto_enum_entry: str) -> str: 594 """Returns an adjusted form of the enum-value name for use in generated C++. 595 596 Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then 597 checks it for collisions with C++ keywords and standard-library macros. 598 Returns a potentially modified version of the input in order to fix 599 collisions if any are found. 600 601 Note that, although the code generation also creates enum-value aliases in 602 kHungarianNotationPascalCase, symbols of that form never conflict with 603 keywords or standard-library macros in C++20. Therefore, only the 604 UPPER_SNAKE_CASE versions need to be checked for conflicts. 605 606 See `fix_cc_identifier` for further details. 607 """ 608 upper_snake_case = proto_enum_entry.upper() 609 return ( 610 _transform_invalid_identifier(proto_enum_entry) # 611 if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS # 612 else proto_enum_entry 613 ) 614