aboutsummaryrefslogtreecommitdiff
path: root/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
diff options
context:
space:
mode:
Diffstat (limited to 'pw_protobuf/py/pw_protobuf/symbol_name_mapping.py')
-rwxr-xr-xpw_protobuf/py/pw_protobuf/symbol_name_mapping.py613
1 files changed, 613 insertions, 0 deletions
diff --git a/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
new file mode 100755
index 000000000..ffe3c9451
--- /dev/null
+++ b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
@@ -0,0 +1,613 @@
+#!/usr/bin/env python3
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Fixes identifiers that would cause compiler errors in generated C++ code."""
+
+from typing import Set
+
+# Set of words that can't be used as identifiers in the generated code. Many of
+# these are valid identifiers in proto syntax, but they need special handling in
+# the generated C++ code.
+#
+# Note: This is primarily used for "if x in y" operations, hence the use of a
+# set rather than a list.
+PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = {
+ # Identifiers that conflict with the codegen internals when used in certain
+ # contexts:
+ "Fields",
+ "Message",
+ # C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
+ "alignas",
+ "alignof",
+ "and",
+ "and_eq",
+ "asm",
+ "atomic_cancel",
+ "atomic_commit",
+ "atomic_noexcept",
+ "auto",
+ "bitand",
+ "bitor",
+ "bool",
+ "break",
+ "case",
+ "catch",
+ "char",
+ "char8_t",
+ "char16_t",
+ "char32_t",
+ "class",
+ "compl",
+ "concept",
+ "const",
+ "consteval",
+ "constexpr",
+ "constinit",
+ "const_cast",
+ "continue",
+ "co_await",
+ "co_return",
+ "co_yield",
+ "decltype",
+ "default",
+ "delete",
+ "do",
+ "double",
+ "dynamic_cast",
+ "else",
+ "enum",
+ "explicit",
+ "export",
+ "extern",
+ "false",
+ "float",
+ "for",
+ "friend",
+ "goto",
+ "if",
+ "inline",
+ "int",
+ "long",
+ "mutable",
+ "namespace",
+ "new",
+ "noexcept",
+ "not",
+ "not_eq",
+ "nullptr",
+ "operator",
+ "or",
+ "or_eq",
+ "private",
+ "protected",
+ "public",
+ "reflexpr",
+ "register",
+ "reinterpret_cast",
+ "requires",
+ "return",
+ "short",
+ "signed",
+ "sizeof",
+ "static",
+ "static_assert",
+ "static_cast",
+ "struct",
+ "switch",
+ "synchronized",
+ "template",
+ "this",
+ "thread_local",
+ "throw",
+ "true",
+ "try",
+ "typedef",
+ "typeid",
+ "typename",
+ "union",
+ "unsigned",
+ "using",
+ "virtual",
+ "void",
+ "volatile",
+ "wchar_t",
+ "while",
+ "xor",
+ "xor_eq",
+ # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
+ # excluding the following:
+ # - Function-like macros, which have unambiguous syntax and thus won't
+ # conflict with generated symbols.
+ # - Macros that couldn't be made valid by appending underscores, namely
+ # those containing "__" or starting with "_[A-Z]". C++ reserves all such
+ # identifiers for the compiler, and appending underscores wouldn't change
+ # that.
+ "ATOMIC_BOOL_LOCK_FREE",
+ "ATOMIC_CHAR_LOCK_FREE",
+ "ATOMIC_CHAR16_T_LOCK_FREE",
+ "ATOMIC_CHAR32_T_LOCK_FREE",
+ "ATOMIC_CHAR8_T_LOCK_FREE",
+ "ATOMIC_FLAG_INIT",
+ "ATOMIC_INT_LOCK_FREE",
+ "ATOMIC_LLONG_LOCK_FREE",
+ "ATOMIC_LONG_LOCK_FREE",
+ "ATOMIC_POINTER_LOCK_FREE",
+ "ATOMIC_SHORT_LOCK_FREE",
+ "ATOMIC_WCHAR_T_LOCK_FREE",
+ "BUFSIZ",
+ "CHAR_BIT",
+ "CHAR_MAX",
+ "CHAR_MIN",
+ "CLOCKS_PER_SEC",
+ "DBL_DECIMAL_DIG",
+ "DBL_DIG",
+ "DBL_EPSILON",
+ "DBL_HAS_SUBNORM",
+ "DBL_MANT_DIG",
+ "DBL_MAX",
+ "DBL_MAX_10_EXP",
+ "DBL_MAX_EXP",
+ "DBL_MIN",
+ "DBL_MIN_10_EXP",
+ "DBL_MIN_EXP",
+ "DBL_TRUE_MIN",
+ "DECIMAL_DIG",
+ "E2BIG",
+ "EACCES",
+ "EADDRINUSE",
+ "EADDRNOTAVAIL",
+ "EAFNOSUPPORT",
+ "EAGAIN",
+ "EALREADY",
+ "EBADF",
+ "EBADMSG",
+ "EBUSY",
+ "ECANCELED",
+ "ECHILD",
+ "ECONNABORTED",
+ "ECONNREFUSED",
+ "ECONNRESET",
+ "EDEADLK",
+ "EDESTADDRREQ",
+ "EDOM",
+ "EEXIST",
+ "EFAULT",
+ "EFBIG",
+ "EHOSTUNREACH",
+ "EIDRM",
+ "EILSEQ",
+ "EINPROGRESS",
+ "EINTR",
+ "EINVAL",
+ "EIO",
+ "EISCONN",
+ "EISDIR",
+ "ELOOP",
+ "EMFILE",
+ "EMLINK",
+ "EMSGSIZE",
+ "ENAMETOOLONG",
+ "ENETDOWN",
+ "ENETRESET",
+ "ENETUNREACH",
+ "ENFILE",
+ "ENOBUFS",
+ "ENODATA",
+ "ENODEV",
+ "ENOENT",
+ "ENOEXEC",
+ "ENOLCK",
+ "ENOLINK",
+ "ENOMEM",
+ "ENOMSG",
+ "ENOPROTOOPT",
+ "ENOSPC",
+ "ENOSR",
+ "ENOSTR",
+ "ENOSYS",
+ "ENOTCONN",
+ "ENOTDIR",
+ "ENOTEMPTY",
+ "ENOTRECOVERABLE",
+ "ENOTSOCK",
+ "ENOTSUP",
+ "ENOTTY",
+ "ENXIO",
+ "EOF",
+ "EOPNOTSUPP",
+ "EOVERFLOW",
+ "EOWNERDEAD",
+ "EPERM",
+ "EPIPE",
+ "EPROTO",
+ "EPROTONOSUPPORT",
+ "EPROTOTYPE",
+ "ERANGE",
+ "EROFS",
+ "errno",
+ "ESPIPE",
+ "ESRCH",
+ "ETIME",
+ "ETIMEDOUT",
+ "ETXTBSY",
+ "EWOULDBLOCK",
+ "EXDEV",
+ "EXIT_FAILURE",
+ "EXIT_SUCCESS",
+ "FE_ALL_EXCEPT",
+ "FE_DFL_ENV",
+ "FE_DIVBYZERO",
+ "FE_DOWNWARD",
+ "FE_INEXACT",
+ "FE_INVALID",
+ "FE_OVERFLOW",
+ "FE_TONEAREST",
+ "FE_TOWARDZERO",
+ "FE_UNDERFLOW",
+ "FE_UPWARD",
+ "FILENAME_MAX",
+ "FLT_DECIMAL_DIG",
+ "FLT_DIG",
+ "FLT_EPSILON",
+ "FLT_EVAL_METHOD",
+ "FLT_HAS_SUBNORM",
+ "FLT_MANT_DIG",
+ "FLT_MAX",
+ "FLT_MAX_10_EXP",
+ "FLT_MAX_EXP",
+ "FLT_MIN",
+ "FLT_MIN_10_EXP",
+ "FLT_MIN_EXP",
+ "FLT_RADIX",
+ "FLT_ROUNDS",
+ "FLT_TRUE_MIN",
+ "FOPEN_MAX",
+ "FP_FAST_FMA",
+ "FP_FAST_FMAF",
+ "FP_FAST_FMAL",
+ "FP_ILOGB0",
+ "FP_ILOGBNAN",
+ "FP_SUBNORMAL",
+ "FP_ZERO",
+ "FP_INFINITE",
+ "FP_NAN",
+ "FP_NORMAL",
+ "HUGE_VAL",
+ "HUGE_VALF",
+ "HUGE_VALL",
+ "INFINITY",
+ "INT_FAST16_MAX",
+ "INT_FAST16_MIN",
+ "INT_FAST32_MAX",
+ "INT_FAST32_MIN",
+ "INT_FAST64_MAX",
+ "INT_FAST64_MIN",
+ "INT_FAST8_MAX",
+ "INT_FAST8_MIN",
+ "INT_LEAST16_MAX",
+ "INT_LEAST16_MIN",
+ "INT_LEAST32_MAX",
+ "INT_LEAST32_MIN",
+ "INT_LEAST64_MAX",
+ "INT_LEAST64_MIN",
+ "INT_LEAST8_MAX",
+ "INT_LEAST8_MIN",
+ "INT_MAX",
+ "INT_MIN",
+ "INT16_MAX",
+ "INT16_MIN",
+ "INT32_MAX",
+ "INT32_MIN",
+ "INT64_MAX",
+ "INT64_MIN",
+ "INT8_MAX",
+ "INT8_MIN",
+ "INTMAX_MAX",
+ "INTMAX_MIN",
+ "INTPTR_MAX",
+ "INTPTR_MIN",
+ "L_tmpnam",
+ "LC_ALL",
+ "LC_COLLATE",
+ "LC_CTYPE",
+ "LC_MONETARY",
+ "LC_NUMERIC",
+ "LC_TIME",
+ "LDBL_DECIMAL_DIG",
+ "LDBL_DIG",
+ "LDBL_EPSILON",
+ "LDBL_HAS_SUBNORM",
+ "LDBL_MANT_DIG",
+ "LDBL_MAX",
+ "LDBL_MAX_10_EXP",
+ "LDBL_MAX_EXP",
+ "LDBL_MIN",
+ "LDBL_MIN_10_EXP",
+ "LDBL_MIN_EXP",
+ "LDBL_TRUE_MIN",
+ "LLONG_MAX",
+ "LLONG_MIN",
+ "LONG_MAX",
+ "LONG_MIN",
+ "MATH_ERREXCEPT",
+ "math_errhandling",
+ "MATH_ERRNO",
+ "MB_CUR_MAX",
+ "MB_LEN_MAX",
+ "NAN",
+ "NULL",
+ "ONCE_FLAG_INIT",
+ "PRId16",
+ "PRId32",
+ "PRId64",
+ "PRId8",
+ "PRIdFAST16",
+ "PRIdFAST32",
+ "PRIdFAST64",
+ "PRIdFAST8",
+ "PRIdLEAST16",
+ "PRIdLEAST32",
+ "PRIdLEAST64",
+ "PRIdLEAST8",
+ "PRIdMAX",
+ "PRIdPTR",
+ "PRIi16",
+ "PRIi32",
+ "PRIi64",
+ "PRIi8",
+ "PRIiFAST16",
+ "PRIiFAST32",
+ "PRIiFAST64",
+ "PRIiFAST8",
+ "PRIiLEAST16",
+ "PRIiLEAST32",
+ "PRIiLEAST64",
+ "PRIiLEAST8",
+ "PRIiMAX",
+ "PRIiPTR",
+ "PRIo16",
+ "PRIo32",
+ "PRIo64",
+ "PRIo8",
+ "PRIoFAST16",
+ "PRIoFAST32",
+ "PRIoFAST64",
+ "PRIoFAST8",
+ "PRIoLEAST16",
+ "PRIoLEAST32",
+ "PRIoLEAST64",
+ "PRIoLEAST8",
+ "PRIoMAX",
+ "PRIoPTR",
+ "PRIu16",
+ "PRIu32",
+ "PRIu64",
+ "PRIu8",
+ "PRIuFAST16",
+ "PRIuFAST32",
+ "PRIuFAST64",
+ "PRIuFAST8",
+ "PRIuLEAST16",
+ "PRIuLEAST32",
+ "PRIuLEAST64",
+ "PRIuLEAST8",
+ "PRIuMAX",
+ "PRIuPTR",
+ "PRIx16",
+ "PRIX16",
+ "PRIx32",
+ "PRIX32",
+ "PRIx64",
+ "PRIX64",
+ "PRIx8",
+ "PRIX8",
+ "PRIxFAST16",
+ "PRIXFAST16",
+ "PRIxFAST32",
+ "PRIXFAST32",
+ "PRIxFAST64",
+ "PRIXFAST64",
+ "PRIxFAST8",
+ "PRIXFAST8",
+ "PRIxLEAST16",
+ "PRIXLEAST16",
+ "PRIxLEAST32",
+ "PRIXLEAST32",
+ "PRIxLEAST64",
+ "PRIXLEAST64",
+ "PRIxLEAST8",
+ "PRIXLEAST8",
+ "PRIxMAX",
+ "PRIXMAX",
+ "PRIxPTR",
+ "PRIXPTR",
+ "PTRDIFF_MAX",
+ "PTRDIFF_MIN",
+ "RAND_MAX",
+ "SCHAR_MAX",
+ "SCHAR_MIN",
+ "SCNd16",
+ "SCNd32",
+ "SCNd64",
+ "SCNd8",
+ "SCNdFAST16",
+ "SCNdFAST32",
+ "SCNdFAST64",
+ "SCNdFAST8",
+ "SCNdLEAST16",
+ "SCNdLEAST32",
+ "SCNdLEAST64",
+ "SCNdLEAST8",
+ "SCNdMAX",
+ "SCNdPTR",
+ "SCNi16",
+ "SCNi32",
+ "SCNi64",
+ "SCNi8",
+ "SCNiFAST16",
+ "SCNiFAST32",
+ "SCNiFAST64",
+ "SCNiFAST8",
+ "SCNiLEAST16",
+ "SCNiLEAST32",
+ "SCNiLEAST64",
+ "SCNiLEAST8",
+ "SCNiMAX",
+ "SCNiPTR",
+ "SCNo16",
+ "SCNo32",
+ "SCNo64",
+ "SCNo8",
+ "SCNoFAST16",
+ "SCNoFAST32",
+ "SCNoFAST64",
+ "SCNoFAST8",
+ "SCNoLEAST16",
+ "SCNoLEAST32",
+ "SCNoLEAST64",
+ "SCNoLEAST8",
+ "SCNoMAX",
+ "SCNoPTR",
+ "SCNu16",
+ "SCNu32",
+ "SCNu64",
+ "SCNu8",
+ "SCNuFAST16",
+ "SCNuFAST32",
+ "SCNuFAST64",
+ "SCNuFAST8",
+ "SCNuLEAST16",
+ "SCNuLEAST32",
+ "SCNuLEAST64",
+ "SCNuLEAST8",
+ "SCNuMAX",
+ "SCNuPTR",
+ "SCNx16",
+ "SCNx32",
+ "SCNx64",
+ "SCNx8",
+ "SCNxFAST16",
+ "SCNxFAST32",
+ "SCNxFAST64",
+ "SCNxFAST8",
+ "SCNxLEAST16",
+ "SCNxLEAST32",
+ "SCNxLEAST64",
+ "SCNxLEAST8",
+ "SCNxMAX",
+ "SCNxPTR",
+ "SEEK_CUR",
+ "SEEK_END",
+ "SEEK_SET",
+ "SHRT_MAX",
+ "SHRT_MIN",
+ "SIG_ATOMIC_MAX",
+ "SIG_ATOMIC_MIN",
+ "SIG_DFL",
+ "SIG_ERR",
+ "SIG_IGN",
+ "SIGABRT",
+ "SIGFPE",
+ "SIGILL",
+ "SIGINT",
+ "SIGSEGV",
+ "SIGTERM",
+ "SIZE_MAX",
+ "stderr",
+ "stdin",
+ "stdout",
+ "TIME_UTC",
+ "TMP_MAX",
+ "UCHAR_MAX",
+ "UINT_FAST16_MAX",
+ "UINT_FAST32_MAX",
+ "UINT_FAST64_MAX",
+ "UINT_FAST8_MAX",
+ "UINT_LEAST16_MAX",
+ "UINT_LEAST32_MAX",
+ "UINT_LEAST64_MAX",
+ "UINT_LEAST8_MAX",
+ "UINT_MAX",
+ "UINT16_MAX",
+ "UINT32_MAX",
+ "UINT64_MAX",
+ "UINT8_MAX",
+ "UINTMAX_MAX",
+ "UINTPTR_MAX",
+ "ULLONG_MAX",
+ "ULONG_MAX",
+ "USHRT_MAX",
+ "WCHAR_MAX",
+ "WCHAR_MIN",
+ "WEOF",
+ "WINT_MAX",
+ "WINT_MIN",
+}
+
+
+def _transform_invalid_identifier(invalid_identifier: str) -> str:
+ """Applies a transformation to an invalid C++ identifier to make it valid.
+
+ Currently, this simply appends an underscore. This addresses the vast
+ majority of realistic cases, but there are some caveats; see
+ `fix_cc_identifier` function documentation for details.
+ """
+ return f"{invalid_identifier}_"
+
+
+def fix_cc_identifier(proto_identifier: str) -> str:
+ """Returns an adjusted form of the identifier for use in generated C++ code.
+
+ If the given identifier is already valid for use in the generated C++ code,
+ it will be returned as-is. If the identifier is a C++ keyword or a
+ preprocessor macro from the standard library, the returned identifier will
+ be modified slightly in order to avoid compiler errors.
+
+ Currently, this simply appends an underscore if necessary. This handles the
+ vast majority of realistic cases, though it doesn't attempt to fix
+ identifiers that the C++ spec reserves for the compiler's use.
+
+ For reference, C++ reserves two categories of identifiers for the compiler:
+ - Any identifier that contains the substring "__" anywhere in it.
+ - Any identifier with an underscore for the first character and a capital
+ letter for the second character.
+ """
+ return (
+ _transform_invalid_identifier(proto_identifier) #
+ if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS #
+ else proto_identifier
+ )
+
+
+def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
+ """Returns an adjusted form of the enum-value name for use in generated C++.
+
+ Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
+ checks it for collisions with C++ keywords and standard-library macros.
+ Returns a potentially modified version of the input in order to fix
+ collisions if any are found.
+
+ Note that, although the code generation also creates enum-value aliases in
+ kHungarianNotationPascalCase, symbols of that form never conflict with
+ keywords or standard-library macros in C++20. Therefore, only the
+ UPPER_SNAKE_CASE versions need to be checked for conflicts.
+
+ See `fix_cc_identifier` for further details.
+ """
+ upper_snake_case = proto_enum_entry.upper()
+ return (
+ _transform_invalid_identifier(proto_enum_entry) #
+ if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS #
+ else proto_enum_entry
+ )