diff options
Diffstat (limited to 'pw_protobuf/py/pw_protobuf/symbol_name_mapping.py')
-rwxr-xr-x | pw_protobuf/py/pw_protobuf/symbol_name_mapping.py | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py new file mode 100755 index 000000000..ffe3c9451 --- /dev/null +++ b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python3 +# Copyright 2022 The Pigweed Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +"""Fixes identifiers that would cause compiler errors in generated C++ code.""" + +from typing import Set + +# Set of words that can't be used as identifiers in the generated code. Many of +# these are valid identifiers in proto syntax, but they need special handling in +# the generated C++ code. +# +# Note: This is primarily used for "if x in y" operations, hence the use of a +# set rather than a list. +PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = { + # Identifiers that conflict with the codegen internals when used in certain + # contexts: + "Fields", + "Message", + # C++20 keywords (https://en.cppreference.com/w/cpp/keyword): + "alignas", + "alignof", + "and", + "and_eq", + "asm", + "atomic_cancel", + "atomic_commit", + "atomic_noexcept", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "char8_t", + "char16_t", + "char32_t", + "class", + "compl", + "concept", + "const", + "consteval", + "constexpr", + "constinit", + "const_cast", + "continue", + "co_await", + "co_return", + "co_yield", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "noexcept", + "not", + "not_eq", + "nullptr", + "operator", + "or", + "or_eq", + "private", + "protected", + "public", + "reflexpr", + "register", + "reinterpret_cast", + "requires", + "return", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "synchronized", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq", + # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro), + # excluding the following: + # - Function-like macros, which have unambiguous syntax and thus won't + # conflict with generated symbols. + # - Macros that couldn't be made valid by appending underscores, namely + # those containing "__" or starting with "_[A-Z]". C++ reserves all such + # identifiers for the compiler, and appending underscores wouldn't change + # that. + "ATOMIC_BOOL_LOCK_FREE", + "ATOMIC_CHAR_LOCK_FREE", + "ATOMIC_CHAR16_T_LOCK_FREE", + "ATOMIC_CHAR32_T_LOCK_FREE", + "ATOMIC_CHAR8_T_LOCK_FREE", + "ATOMIC_FLAG_INIT", + "ATOMIC_INT_LOCK_FREE", + "ATOMIC_LLONG_LOCK_FREE", + "ATOMIC_LONG_LOCK_FREE", + "ATOMIC_POINTER_LOCK_FREE", + "ATOMIC_SHORT_LOCK_FREE", + "ATOMIC_WCHAR_T_LOCK_FREE", + "BUFSIZ", + "CHAR_BIT", + "CHAR_MAX", + "CHAR_MIN", + "CLOCKS_PER_SEC", + "DBL_DECIMAL_DIG", + "DBL_DIG", + "DBL_EPSILON", + "DBL_HAS_SUBNORM", + "DBL_MANT_DIG", + "DBL_MAX", + "DBL_MAX_10_EXP", + "DBL_MAX_EXP", + "DBL_MIN", + "DBL_MIN_10_EXP", + "DBL_MIN_EXP", + "DBL_TRUE_MIN", + "DECIMAL_DIG", + "E2BIG", + "EACCES", + "EADDRINUSE", + "EADDRNOTAVAIL", + "EAFNOSUPPORT", + "EAGAIN", + "EALREADY", + "EBADF", + "EBADMSG", + "EBUSY", + "ECANCELED", + "ECHILD", + "ECONNABORTED", + "ECONNREFUSED", + "ECONNRESET", + "EDEADLK", + "EDESTADDRREQ", + "EDOM", + "EEXIST", + "EFAULT", + "EFBIG", + "EHOSTUNREACH", + "EIDRM", + "EILSEQ", + "EINPROGRESS", + "EINTR", + "EINVAL", + "EIO", + "EISCONN", + "EISDIR", + "ELOOP", + "EMFILE", + "EMLINK", + "EMSGSIZE", + "ENAMETOOLONG", + "ENETDOWN", + "ENETRESET", + "ENETUNREACH", + "ENFILE", + "ENOBUFS", + "ENODATA", + "ENODEV", + "ENOENT", + "ENOEXEC", + "ENOLCK", + "ENOLINK", + "ENOMEM", + "ENOMSG", + "ENOPROTOOPT", + "ENOSPC", + "ENOSR", + "ENOSTR", + "ENOSYS", + "ENOTCONN", + "ENOTDIR", + "ENOTEMPTY", + "ENOTRECOVERABLE", + "ENOTSOCK", + "ENOTSUP", + "ENOTTY", + "ENXIO", + "EOF", + "EOPNOTSUPP", + "EOVERFLOW", + "EOWNERDEAD", + "EPERM", + "EPIPE", + "EPROTO", + "EPROTONOSUPPORT", + "EPROTOTYPE", + "ERANGE", + "EROFS", + "errno", + "ESPIPE", + "ESRCH", + "ETIME", + "ETIMEDOUT", + "ETXTBSY", + "EWOULDBLOCK", + "EXDEV", + "EXIT_FAILURE", + "EXIT_SUCCESS", + "FE_ALL_EXCEPT", + "FE_DFL_ENV", + "FE_DIVBYZERO", + "FE_DOWNWARD", + "FE_INEXACT", + "FE_INVALID", + "FE_OVERFLOW", + "FE_TONEAREST", + "FE_TOWARDZERO", + "FE_UNDERFLOW", + "FE_UPWARD", + "FILENAME_MAX", + "FLT_DECIMAL_DIG", + "FLT_DIG", + "FLT_EPSILON", + "FLT_EVAL_METHOD", + "FLT_HAS_SUBNORM", + "FLT_MANT_DIG", + "FLT_MAX", + "FLT_MAX_10_EXP", + "FLT_MAX_EXP", + "FLT_MIN", + "FLT_MIN_10_EXP", + "FLT_MIN_EXP", + "FLT_RADIX", + "FLT_ROUNDS", + "FLT_TRUE_MIN", + "FOPEN_MAX", + "FP_FAST_FMA", + "FP_FAST_FMAF", + "FP_FAST_FMAL", + "FP_ILOGB0", + "FP_ILOGBNAN", + "FP_SUBNORMAL", + "FP_ZERO", + "FP_INFINITE", + "FP_NAN", + "FP_NORMAL", + "HUGE_VAL", + "HUGE_VALF", + "HUGE_VALL", + "INFINITY", + "INT_FAST16_MAX", + "INT_FAST16_MIN", + "INT_FAST32_MAX", + "INT_FAST32_MIN", + "INT_FAST64_MAX", + "INT_FAST64_MIN", + "INT_FAST8_MAX", + "INT_FAST8_MIN", + "INT_LEAST16_MAX", + "INT_LEAST16_MIN", + "INT_LEAST32_MAX", + "INT_LEAST32_MIN", + "INT_LEAST64_MAX", + "INT_LEAST64_MIN", + "INT_LEAST8_MAX", + "INT_LEAST8_MIN", + "INT_MAX", + "INT_MIN", + "INT16_MAX", + "INT16_MIN", + "INT32_MAX", + "INT32_MIN", + "INT64_MAX", + "INT64_MIN", + "INT8_MAX", + "INT8_MIN", + "INTMAX_MAX", + "INTMAX_MIN", + "INTPTR_MAX", + "INTPTR_MIN", + "L_tmpnam", + "LC_ALL", + "LC_COLLATE", + "LC_CTYPE", + "LC_MONETARY", + "LC_NUMERIC", + "LC_TIME", + "LDBL_DECIMAL_DIG", + "LDBL_DIG", + "LDBL_EPSILON", + "LDBL_HAS_SUBNORM", + "LDBL_MANT_DIG", + "LDBL_MAX", + "LDBL_MAX_10_EXP", + "LDBL_MAX_EXP", + "LDBL_MIN", + "LDBL_MIN_10_EXP", + "LDBL_MIN_EXP", + "LDBL_TRUE_MIN", + "LLONG_MAX", + "LLONG_MIN", + "LONG_MAX", + "LONG_MIN", + "MATH_ERREXCEPT", + "math_errhandling", + "MATH_ERRNO", + "MB_CUR_MAX", + "MB_LEN_MAX", + "NAN", + "NULL", + "ONCE_FLAG_INIT", + "PRId16", + "PRId32", + "PRId64", + "PRId8", + "PRIdFAST16", + "PRIdFAST32", + "PRIdFAST64", + "PRIdFAST8", + "PRIdLEAST16", + "PRIdLEAST32", + "PRIdLEAST64", + "PRIdLEAST8", + "PRIdMAX", + "PRIdPTR", + "PRIi16", + "PRIi32", + "PRIi64", + "PRIi8", + "PRIiFAST16", + "PRIiFAST32", + "PRIiFAST64", + "PRIiFAST8", + "PRIiLEAST16", + "PRIiLEAST32", + "PRIiLEAST64", + "PRIiLEAST8", + "PRIiMAX", + "PRIiPTR", + "PRIo16", + "PRIo32", + "PRIo64", + "PRIo8", + "PRIoFAST16", + "PRIoFAST32", + "PRIoFAST64", + "PRIoFAST8", + "PRIoLEAST16", + "PRIoLEAST32", + "PRIoLEAST64", + "PRIoLEAST8", + "PRIoMAX", + "PRIoPTR", + "PRIu16", + "PRIu32", + "PRIu64", + "PRIu8", + "PRIuFAST16", + "PRIuFAST32", + "PRIuFAST64", + "PRIuFAST8", + "PRIuLEAST16", + "PRIuLEAST32", + "PRIuLEAST64", + "PRIuLEAST8", + "PRIuMAX", + "PRIuPTR", + "PRIx16", + "PRIX16", + "PRIx32", + "PRIX32", + "PRIx64", + "PRIX64", + "PRIx8", + "PRIX8", + "PRIxFAST16", + "PRIXFAST16", + "PRIxFAST32", + "PRIXFAST32", + "PRIxFAST64", + "PRIXFAST64", + "PRIxFAST8", + "PRIXFAST8", + "PRIxLEAST16", + "PRIXLEAST16", + "PRIxLEAST32", + "PRIXLEAST32", + "PRIxLEAST64", + "PRIXLEAST64", + "PRIxLEAST8", + "PRIXLEAST8", + "PRIxMAX", + "PRIXMAX", + "PRIxPTR", + "PRIXPTR", + "PTRDIFF_MAX", + "PTRDIFF_MIN", + "RAND_MAX", + "SCHAR_MAX", + "SCHAR_MIN", + "SCNd16", + "SCNd32", + "SCNd64", + "SCNd8", + "SCNdFAST16", + "SCNdFAST32", + "SCNdFAST64", + "SCNdFAST8", + "SCNdLEAST16", + "SCNdLEAST32", + "SCNdLEAST64", + "SCNdLEAST8", + "SCNdMAX", + "SCNdPTR", + "SCNi16", + "SCNi32", + "SCNi64", + "SCNi8", + "SCNiFAST16", + "SCNiFAST32", + "SCNiFAST64", + "SCNiFAST8", + "SCNiLEAST16", + "SCNiLEAST32", + "SCNiLEAST64", + "SCNiLEAST8", + "SCNiMAX", + "SCNiPTR", + "SCNo16", + "SCNo32", + "SCNo64", + "SCNo8", + "SCNoFAST16", + "SCNoFAST32", + "SCNoFAST64", + "SCNoFAST8", + "SCNoLEAST16", + "SCNoLEAST32", + "SCNoLEAST64", + "SCNoLEAST8", + "SCNoMAX", + "SCNoPTR", + "SCNu16", + "SCNu32", + "SCNu64", + "SCNu8", + "SCNuFAST16", + "SCNuFAST32", + "SCNuFAST64", + "SCNuFAST8", + "SCNuLEAST16", + "SCNuLEAST32", + "SCNuLEAST64", + "SCNuLEAST8", + "SCNuMAX", + "SCNuPTR", + "SCNx16", + "SCNx32", + "SCNx64", + "SCNx8", + "SCNxFAST16", + "SCNxFAST32", + "SCNxFAST64", + "SCNxFAST8", + "SCNxLEAST16", + "SCNxLEAST32", + "SCNxLEAST64", + "SCNxLEAST8", + "SCNxMAX", + "SCNxPTR", + "SEEK_CUR", + "SEEK_END", + "SEEK_SET", + "SHRT_MAX", + "SHRT_MIN", + "SIG_ATOMIC_MAX", + "SIG_ATOMIC_MIN", + "SIG_DFL", + "SIG_ERR", + "SIG_IGN", + "SIGABRT", + "SIGFPE", + "SIGILL", + "SIGINT", + "SIGSEGV", + "SIGTERM", + "SIZE_MAX", + "stderr", + "stdin", + "stdout", + "TIME_UTC", + "TMP_MAX", + "UCHAR_MAX", + "UINT_FAST16_MAX", + "UINT_FAST32_MAX", + "UINT_FAST64_MAX", + "UINT_FAST8_MAX", + "UINT_LEAST16_MAX", + "UINT_LEAST32_MAX", + "UINT_LEAST64_MAX", + "UINT_LEAST8_MAX", + "UINT_MAX", + "UINT16_MAX", + "UINT32_MAX", + "UINT64_MAX", + "UINT8_MAX", + "UINTMAX_MAX", + "UINTPTR_MAX", + "ULLONG_MAX", + "ULONG_MAX", + "USHRT_MAX", + "WCHAR_MAX", + "WCHAR_MIN", + "WEOF", + "WINT_MAX", + "WINT_MIN", +} + + +def _transform_invalid_identifier(invalid_identifier: str) -> str: + """Applies a transformation to an invalid C++ identifier to make it valid. + + Currently, this simply appends an underscore. This addresses the vast + majority of realistic cases, but there are some caveats; see + `fix_cc_identifier` function documentation for details. + """ + return f"{invalid_identifier}_" + + +def fix_cc_identifier(proto_identifier: str) -> str: + """Returns an adjusted form of the identifier for use in generated C++ code. + + If the given identifier is already valid for use in the generated C++ code, + it will be returned as-is. If the identifier is a C++ keyword or a + preprocessor macro from the standard library, the returned identifier will + be modified slightly in order to avoid compiler errors. + + Currently, this simply appends an underscore if necessary. This handles the + vast majority of realistic cases, though it doesn't attempt to fix + identifiers that the C++ spec reserves for the compiler's use. + + For reference, C++ reserves two categories of identifiers for the compiler: + - Any identifier that contains the substring "__" anywhere in it. + - Any identifier with an underscore for the first character and a capital + letter for the second character. + """ + return ( + _transform_invalid_identifier(proto_identifier) # + if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS # + else proto_identifier + ) + + +def fix_cc_enum_value_name(proto_enum_entry: str) -> str: + """Returns an adjusted form of the enum-value name for use in generated C++. + + Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then + checks it for collisions with C++ keywords and standard-library macros. + Returns a potentially modified version of the input in order to fix + collisions if any are found. + + Note that, although the code generation also creates enum-value aliases in + kHungarianNotationPascalCase, symbols of that form never conflict with + keywords or standard-library macros in C++20. Therefore, only the + UPPER_SNAKE_CASE versions need to be checked for conflicts. + + See `fix_cc_identifier` for further details. + """ + upper_snake_case = proto_enum_entry.upper() + return ( + _transform_invalid_identifier(proto_enum_entry) # + if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS # + else proto_enum_entry + ) |