1 files changed, 613 insertions, 0 deletions
diff --git a/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
new file mode 100755
index 000000000..ffe3c9451
--- /dev/null
+++ b/pw_protobuf/py/pw_protobuf/symbol_name_mapping.py
@@ -0,0 +1,613 @@
+#!/usr/bin/env python3
+# Copyright 2022 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Fixes identifiers that would cause compiler errors in generated C++ code."""
+
+from typing import Set
+
+# Set of words that can't be used as identifiers in the generated code. Many of
+# these are valid identifiers in proto syntax, but they need special handling in
+# the generated C++ code.
+#
+# Note: This is primarily used for "if x in y" operations, hence the use of a
+# set rather than a list.
+PW_PROTO_CODEGEN_RESERVED_WORDS: Set[str] = {
+    # Identifiers that conflict with the codegen internals when used in certain
+    # contexts:
+    "Fields",
+    "Message",
+    # C++20 keywords (https://en.cppreference.com/w/cpp/keyword):
+    "alignas",
+    "alignof",
+    "and",
+    "and_eq",
+    "asm",
+    "atomic_cancel",
+    "atomic_commit",
+    "atomic_noexcept",
+    "auto",
+    "bitand",
+    "bitor",
+    "bool",
+    "break",
+    "case",
+    "catch",
+    "char",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+    "class",
+    "compl",
+    "concept",
+    "const",
+    "consteval",
+    "constexpr",
+    "constinit",
+    "const_cast",
+    "continue",
+    "co_await",
+    "co_return",
+    "co_yield",
+    "decltype",
+    "default",
+    "delete",
+    "do",
+    "double",
+    "dynamic_cast",
+    "else",
+    "enum",
+    "explicit",
+    "export",
+    "extern",
+    "false",
+    "float",
+    "for",
+    "friend",
+    "goto",
+    "if",
+    "inline",
+    "int",
+    "long",
+    "mutable",
+    "namespace",
+    "new",
+    "noexcept",
+    "not",
+    "not_eq",
+    "nullptr",
+    "operator",
+    "or",
+    "or_eq",
+    "private",
+    "protected",
+    "public",
+    "reflexpr",
+    "register",
+    "reinterpret_cast",
+    "requires",
+    "return",
+    "short",
+    "signed",
+    "sizeof",
+    "static",
+    "static_assert",
+    "static_cast",
+    "struct",
+    "switch",
+    "synchronized",
+    "template",
+    "this",
+    "thread_local",
+    "throw",
+    "true",
+    "try",
+    "typedef",
+    "typeid",
+    "typename",
+    "union",
+    "unsigned",
+    "using",
+    "virtual",
+    "void",
+    "volatile",
+    "wchar_t",
+    "while",
+    "xor",
+    "xor_eq",
+    # C++20 macros (https://en.cppreference.com/w/cpp/symbol_index/macro),
+    # excluding the following:
+    # - Function-like macros, which have unambiguous syntax and thus won't
+    #   conflict with generated symbols.
+    # - Macros that couldn't be made valid by appending underscores, namely
+    #   those containing "__" or starting with "_[A-Z]". C++ reserves all such
+    #   identifiers for the compiler, and appending underscores wouldn't change
+    #   that.
+    "ATOMIC_BOOL_LOCK_FREE",
+    "ATOMIC_CHAR_LOCK_FREE",
+    "ATOMIC_CHAR16_T_LOCK_FREE",
+    "ATOMIC_CHAR32_T_LOCK_FREE",
+    "ATOMIC_CHAR8_T_LOCK_FREE",
+    "ATOMIC_FLAG_INIT",
+    "ATOMIC_INT_LOCK_FREE",
+    "ATOMIC_LLONG_LOCK_FREE",
+    "ATOMIC_LONG_LOCK_FREE",
+    "ATOMIC_POINTER_LOCK_FREE",
+    "ATOMIC_SHORT_LOCK_FREE",
+    "ATOMIC_WCHAR_T_LOCK_FREE",
+    "BUFSIZ",
+    "CHAR_BIT",
+    "CHAR_MAX",
+    "CHAR_MIN",
+    "CLOCKS_PER_SEC",
+    "DBL_DECIMAL_DIG",
+    "DBL_DIG",
+    "DBL_EPSILON",
+    "DBL_HAS_SUBNORM",
+    "DBL_MANT_DIG",
+    "DBL_MAX",
+    "DBL_MAX_10_EXP",
+    "DBL_MAX_EXP",
+    "DBL_MIN",
+    "DBL_MIN_10_EXP",
+    "DBL_MIN_EXP",
+    "DBL_TRUE_MIN",
+    "DECIMAL_DIG",
+    "E2BIG",
+    "EACCES",
+    "EADDRINUSE",
+    "EADDRNOTAVAIL",
+    "EAFNOSUPPORT",
+    "EAGAIN",
+    "EALREADY",
+    "EBADF",
+    "EBADMSG",
+    "EBUSY",
+    "ECANCELED",
+    "ECHILD",
+    "ECONNABORTED",
+    "ECONNREFUSED",
+    "ECONNRESET",
+    "EDEADLK",
+    "EDESTADDRREQ",
+    "EDOM",
+    "EEXIST",
+    "EFAULT",
+    "EFBIG",
+    "EHOSTUNREACH",
+    "EIDRM",
+    "EILSEQ",
+    "EINPROGRESS",
+    "EINTR",
+    "EINVAL",
+    "EIO",
+    "EISCONN",
+    "EISDIR",
+    "ELOOP",
+    "EMFILE",
+    "EMLINK",
+    "EMSGSIZE",
+    "ENAMETOOLONG",
+    "ENETDOWN",
+    "ENETRESET",
+    "ENETUNREACH",
+    "ENFILE",
+    "ENOBUFS",
+    "ENODATA",
+    "ENODEV",
+    "ENOENT",
+    "ENOEXEC",
+    "ENOLCK",
+    "ENOLINK",
+    "ENOMEM",
+    "ENOMSG",
+    "ENOPROTOOPT",
+    "ENOSPC",
+    "ENOSR",
+    "ENOSTR",
+    "ENOSYS",
+    "ENOTCONN",
+    "ENOTDIR",
+    "ENOTEMPTY",
+    "ENOTRECOVERABLE",
+    "ENOTSOCK",
+    "ENOTSUP",
+    "ENOTTY",
+    "ENXIO",
+    "EOF",
+    "EOPNOTSUPP",
+    "EOVERFLOW",
+    "EOWNERDEAD",
+    "EPERM",
+    "EPIPE",
+    "EPROTO",
+    "EPROTONOSUPPORT",
+    "EPROTOTYPE",
+    "ERANGE",
+    "EROFS",
+    "errno",
+    "ESPIPE",
+    "ESRCH",
+    "ETIME",
+    "ETIMEDOUT",
+    "ETXTBSY",
+    "EWOULDBLOCK",
+    "EXDEV",
+    "EXIT_FAILURE",
+    "EXIT_SUCCESS",
+    "FE_ALL_EXCEPT",
+    "FE_DFL_ENV",
+    "FE_DIVBYZERO",
+    "FE_DOWNWARD",
+    "FE_INEXACT",
+    "FE_INVALID",
+    "FE_OVERFLOW",
+    "FE_TONEAREST",
+    "FE_TOWARDZERO",
+    "FE_UNDERFLOW",
+    "FE_UPWARD",
+    "FILENAME_MAX",
+    "FLT_DECIMAL_DIG",
+    "FLT_DIG",
+    "FLT_EPSILON",
+    "FLT_EVAL_METHOD",
+    "FLT_HAS_SUBNORM",
+    "FLT_MANT_DIG",
+    "FLT_MAX",
+    "FLT_MAX_10_EXP",
+    "FLT_MAX_EXP",
+    "FLT_MIN",
+    "FLT_MIN_10_EXP",
+    "FLT_MIN_EXP",
+    "FLT_RADIX",
+    "FLT_ROUNDS",
+    "FLT_TRUE_MIN",
+    "FOPEN_MAX",
+    "FP_FAST_FMA",
+    "FP_FAST_FMAF",
+    "FP_FAST_FMAL",
+    "FP_ILOGB0",
+    "FP_ILOGBNAN",
+    "FP_SUBNORMAL",
+    "FP_ZERO",
+    "FP_INFINITE",
+    "FP_NAN",
+    "FP_NORMAL",
+    "HUGE_VAL",
+    "HUGE_VALF",
+    "HUGE_VALL",
+    "INFINITY",
+    "INT_FAST16_MAX",
+    "INT_FAST16_MIN",
+    "INT_FAST32_MAX",
+    "INT_FAST32_MIN",
+    "INT_FAST64_MAX",
+    "INT_FAST64_MIN",
+    "INT_FAST8_MAX",
+    "INT_FAST8_MIN",
+    "INT_LEAST16_MAX",
+    "INT_LEAST16_MIN",
+    "INT_LEAST32_MAX",
+    "INT_LEAST32_MIN",
+    "INT_LEAST64_MAX",
+    "INT_LEAST64_MIN",
+    "INT_LEAST8_MAX",
+    "INT_LEAST8_MIN",
+    "INT_MAX",
+    "INT_MIN",
+    "INT16_MAX",
+    "INT16_MIN",
+    "INT32_MAX",
+    "INT32_MIN",
+    "INT64_MAX",
+    "INT64_MIN",
+    "INT8_MAX",
+    "INT8_MIN",
+    "INTMAX_MAX",
+    "INTMAX_MIN",
+    "INTPTR_MAX",
+    "INTPTR_MIN",
+    "L_tmpnam",
+    "LC_ALL",
+    "LC_COLLATE",
+    "LC_CTYPE",
+    "LC_MONETARY",
+    "LC_NUMERIC",
+    "LC_TIME",
+    "LDBL_DECIMAL_DIG",
+    "LDBL_DIG",
+    "LDBL_EPSILON",
+    "LDBL_HAS_SUBNORM",
+    "LDBL_MANT_DIG",
+    "LDBL_MAX",
+    "LDBL_MAX_10_EXP",
+    "LDBL_MAX_EXP",
+    "LDBL_MIN",
+    "LDBL_MIN_10_EXP",
+    "LDBL_MIN_EXP",
+    "LDBL_TRUE_MIN",
+    "LLONG_MAX",
+    "LLONG_MIN",
+    "LONG_MAX",
+    "LONG_MIN",
+    "MATH_ERREXCEPT",
+    "math_errhandling",
+    "MATH_ERRNO",
+    "MB_CUR_MAX",
+    "MB_LEN_MAX",
+    "NAN",
+    "NULL",
+    "ONCE_FLAG_INIT",
+    "PRId16",
+    "PRId32",
+    "PRId64",
+    "PRId8",
+    "PRIdFAST16",
+    "PRIdFAST32",
+    "PRIdFAST64",
+    "PRIdFAST8",
+    "PRIdLEAST16",
+    "PRIdLEAST32",
+    "PRIdLEAST64",
+    "PRIdLEAST8",
+    "PRIdMAX",
+    "PRIdPTR",
+    "PRIi16",
+    "PRIi32",
+    "PRIi64",
+    "PRIi8",
+    "PRIiFAST16",
+    "PRIiFAST32",
+    "PRIiFAST64",
+    "PRIiFAST8",
+    "PRIiLEAST16",
+    "PRIiLEAST32",
+    "PRIiLEAST64",
+    "PRIiLEAST8",
+    "PRIiMAX",
+    "PRIiPTR",
+    "PRIo16",
+    "PRIo32",
+    "PRIo64",
+    "PRIo8",
+    "PRIoFAST16",
+    "PRIoFAST32",
+    "PRIoFAST64",
+    "PRIoFAST8",
+    "PRIoLEAST16",
+    "PRIoLEAST32",
+    "PRIoLEAST64",
+    "PRIoLEAST8",
+    "PRIoMAX",
+    "PRIoPTR",
+    "PRIu16",
+    "PRIu32",
+    "PRIu64",
+    "PRIu8",
+    "PRIuFAST16",
+    "PRIuFAST32",
+    "PRIuFAST64",
+    "PRIuFAST8",
+    "PRIuLEAST16",
+    "PRIuLEAST32",
+    "PRIuLEAST64",
+    "PRIuLEAST8",
+    "PRIuMAX",
+    "PRIuPTR",
+    "PRIx16",
+    "PRIX16",
+    "PRIx32",
+    "PRIX32",
+    "PRIx64",
+    "PRIX64",
+    "PRIx8",
+    "PRIX8",
+    "PRIxFAST16",
+    "PRIXFAST16",
+    "PRIxFAST32",
+    "PRIXFAST32",
+    "PRIxFAST64",
+    "PRIXFAST64",
+    "PRIxFAST8",
+    "PRIXFAST8",
+    "PRIxLEAST16",
+    "PRIXLEAST16",
+    "PRIxLEAST32",
+    "PRIXLEAST32",
+    "PRIxLEAST64",
+    "PRIXLEAST64",
+    "PRIxLEAST8",
+    "PRIXLEAST8",
+    "PRIxMAX",
+    "PRIXMAX",
+    "PRIxPTR",
+    "PRIXPTR",
+    "PTRDIFF_MAX",
+    "PTRDIFF_MIN",
+    "RAND_MAX",
+    "SCHAR_MAX",
+    "SCHAR_MIN",
+    "SCNd16",
+    "SCNd32",
+    "SCNd64",
+    "SCNd8",
+    "SCNdFAST16",
+    "SCNdFAST32",
+    "SCNdFAST64",
+    "SCNdFAST8",
+    "SCNdLEAST16",
+    "SCNdLEAST32",
+    "SCNdLEAST64",
+    "SCNdLEAST8",
+    "SCNdMAX",
+    "SCNdPTR",
+    "SCNi16",
+    "SCNi32",
+    "SCNi64",
+    "SCNi8",
+    "SCNiFAST16",
+    "SCNiFAST32",
+    "SCNiFAST64",
+    "SCNiFAST8",
+    "SCNiLEAST16",
+    "SCNiLEAST32",
+    "SCNiLEAST64",
+    "SCNiLEAST8",
+    "SCNiMAX",
+    "SCNiPTR",
+    "SCNo16",
+    "SCNo32",
+    "SCNo64",
+    "SCNo8",
+    "SCNoFAST16",
+    "SCNoFAST32",
+    "SCNoFAST64",
+    "SCNoFAST8",
+    "SCNoLEAST16",
+    "SCNoLEAST32",
+    "SCNoLEAST64",
+    "SCNoLEAST8",
+    "SCNoMAX",
+    "SCNoPTR",
+    "SCNu16",
+    "SCNu32",
+    "SCNu64",
+    "SCNu8",
+    "SCNuFAST16",
+    "SCNuFAST32",
+    "SCNuFAST64",
+    "SCNuFAST8",
+    "SCNuLEAST16",
+    "SCNuLEAST32",
+    "SCNuLEAST64",
+    "SCNuLEAST8",
+    "SCNuMAX",
+    "SCNuPTR",
+    "SCNx16",
+    "SCNx32",
+    "SCNx64",
+    "SCNx8",
+    "SCNxFAST16",
+    "SCNxFAST32",
+    "SCNxFAST64",
+    "SCNxFAST8",
+    "SCNxLEAST16",
+    "SCNxLEAST32",
+    "SCNxLEAST64",
+    "SCNxLEAST8",
+    "SCNxMAX",
+    "SCNxPTR",
+    "SEEK_CUR",
+    "SEEK_END",
+    "SEEK_SET",
+    "SHRT_MAX",
+    "SHRT_MIN",
+    "SIG_ATOMIC_MAX",
+    "SIG_ATOMIC_MIN",
+    "SIG_DFL",
+    "SIG_ERR",
+    "SIG_IGN",
+    "SIGABRT",
+    "SIGFPE",
+    "SIGILL",
+    "SIGINT",
+    "SIGSEGV",
+    "SIGTERM",
+    "SIZE_MAX",
+    "stderr",
+    "stdin",
+    "stdout",
+    "TIME_UTC",
+    "TMP_MAX",
+    "UCHAR_MAX",
+    "UINT_FAST16_MAX",
+    "UINT_FAST32_MAX",
+    "UINT_FAST64_MAX",
+    "UINT_FAST8_MAX",
+    "UINT_LEAST16_MAX",
+    "UINT_LEAST32_MAX",
+    "UINT_LEAST64_MAX",
+    "UINT_LEAST8_MAX",
+    "UINT_MAX",
+    "UINT16_MAX",
+    "UINT32_MAX",
+    "UINT64_MAX",
+    "UINT8_MAX",
+    "UINTMAX_MAX",
+    "UINTPTR_MAX",
+    "ULLONG_MAX",
+    "ULONG_MAX",
+    "USHRT_MAX",
+    "WCHAR_MAX",
+    "WCHAR_MIN",
+    "WEOF",
+    "WINT_MAX",
+    "WINT_MIN",
+}
+
+
+def _transform_invalid_identifier(invalid_identifier: str) -> str:
+    """Applies a transformation to an invalid C++ identifier to make it valid.
+
+    Currently, this simply appends an underscore. This addresses the vast
+    majority of realistic cases, but there are some caveats; see
+    `fix_cc_identifier` function documentation for details.
+    """
+    return f"{invalid_identifier}_"
+
+
+def fix_cc_identifier(proto_identifier: str) -> str:
+    """Returns an adjusted form of the identifier for use in generated C++ code.
+
+    If the given identifier is already valid for use in the generated C++ code,
+    it will be returned as-is. If the identifier is a C++ keyword or a
+    preprocessor macro from the standard library, the returned identifier will
+    be modified slightly in order to avoid compiler errors.
+
+    Currently, this simply appends an underscore if necessary. This handles the
+    vast majority of realistic cases, though it doesn't attempt to fix
+    identifiers that the C++ spec reserves for the compiler's use.
+
+    For reference, C++ reserves two categories of identifiers for the compiler:
+    - Any identifier that contains the substring "__" anywhere in it.
+    - Any identifier with an underscore for the first character and a capital
+      letter for the second character.
+    """
+    return (
+        _transform_invalid_identifier(proto_identifier)  #
+        if proto_identifier in PW_PROTO_CODEGEN_RESERVED_WORDS  #
+        else proto_identifier
+    )
+
+
+def fix_cc_enum_value_name(proto_enum_entry: str) -> str:
+    """Returns an adjusted form of the enum-value name for use in generated C++.
+
+    Generates an UPPER_SNAKE_CASE variant of the given enum-value name and then
+    checks it for collisions with C++ keywords and standard-library macros.
+    Returns a potentially modified version of the input in order to fix
+    collisions if any are found.
+
+    Note that, although the code generation also creates enum-value aliases in
+    kHungarianNotationPascalCase, symbols of that form never conflict with
+    keywords or standard-library macros in C++20. Therefore, only the
+    UPPER_SNAKE_CASE versions need to be checked for conflicts.
+
+    See `fix_cc_identifier` for further details.
+    """
+    upper_snake_case = proto_enum_entry.upper()
+    return (
+        _transform_invalid_identifier(proto_enum_entry)  #
+        if upper_snake_case in PW_PROTO_CODEGEN_RESERVED_WORDS  #
+        else proto_enum_entry
+    )