To support the KCFI type-id which needs to convert unique function
prototypes into unique 32-bit values, add a subset of the Itanium C++
mangling ABI for C typeinfo of function prototypes. This gets us to the
first step: a string representation of the function prototype.

Trying to extract only the C portions of the gcc/cp/mangle.cc code
seemed infeasible after a few attempts. So this is the minimal subset
of the mangling ABI needed to generate unique KCFI type ids.

I could not find a way to build a sensible selftest infrastructure for
this code. I wanted to do something like this:

  #ifdef CHECKING_P
  const char code[] = "
        typedef struct { int x, y } xy_t;
        extern int func(xy_t *p);
  ";

  ASSERT_MANGLE (code, "_ZTSPFiP4xy_tE");
  ...
  #endif

But I could not find any way to build a localized parser that could
parse the "code" string from which I could extract the "func" fndecl.
It would have been so much nicer to build the selftest directly into
mangle.cc here, but I couldn't figure it out. Instead, later patches
create a "kcfi" dump file, and the large kcfi testsuite validates
expected mangle strings as part of the type-id validation.

Signed-off-by: Kees Cook <k...@kernel.org>
---
 gcc/Makefile.in |   1 +
 gcc/mangle.h    |  29 +++
 gcc/selftest.h  |   1 +
 gcc/mangle.cc   | 548 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 579 insertions(+)
 create mode 100644 gcc/mangle.h
 create mode 100644 gcc/mangle.cc

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d7d5cbe72770..86f62611c1d4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1619,6 +1619,7 @@ OBJS = \
        lto-section-out.o \
        lto-opts.o \
        lto-compress.o \
+       mangle.o \
        mcf.o \
        mode-switching.o \
        modulo-sched.o \
diff --git a/gcc/mangle.h b/gcc/mangle.h
new file mode 100644
index 000000000000..94521e1e7e5c
--- /dev/null
+++ b/gcc/mangle.h
@@ -0,0 +1,29 @@
+/* Itanium C++ ABI type mangling for GCC.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MANGLE_H
+#define GCC_MANGLE_H
+
+#include "tree.h"
+
+/* Function type mangling following Itanium C++ ABI conventions.
+   Returns a static buffer containing the mangled type string.  */
+extern const char *mangle_function_type (tree fntype_or_fndecl);
+
+#endif /* GCC_MANGLE_H */
diff --git a/gcc/mangle.cc b/gcc/mangle.cc
new file mode 100644
index 000000000000..830985251c81
--- /dev/null
+++ b/gcc/mangle.cc
@@ -0,0 +1,548 @@
+/* Itanium C++ ABI type mangling for GCC.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "diagnostic-core.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "mangle.h"
+#include "selftest.h"
+
+/* Forward declaration for recursive type mangling.  */
+static void mangle_type_to_buffer (tree type, char **p, char *end);
+
+/* Mangle a builtin type following Itanium C++ ABI for C types.  */
+static void
+mangle_builtin_type_to_buffer (tree type, char **p, char *end)
+{
+  gcc_assert (type != NULL_TREE);
+  gcc_assert (p != NULL && *p != NULL && end != NULL);
+  gcc_assert (*p < end);
+
+  if (*p >= end)
+    return;
+
+  switch (TREE_CODE (type))
+    {
+    case VOID_TYPE:
+      **p = 'v';
+      (*p)++;
+      break;
+
+    case BOOLEAN_TYPE:
+      **p = 'b';
+      (*p)++;
+      break;
+
+    case INTEGER_TYPE:
+      /* Handle standard integer types using Itanium ABI codes.  */
+      if (type == char_type_node)
+       {
+         **p = 'c';
+         (*p)++;
+       }
+      else if (type == signed_char_type_node)
+       {
+         **p = 'a';
+         (*p)++;
+       }
+      else if (type == unsigned_char_type_node)
+       {
+         **p = 'h';
+         (*p)++;
+       }
+      else if (type == short_integer_type_node)
+       {
+         **p = 's';
+         (*p)++;
+       }
+      else if (type == short_unsigned_type_node)
+       {
+         **p = 't';
+         (*p)++;
+       }
+      else if (type == integer_type_node)
+       {
+         **p = 'i';
+         (*p)++;
+       }
+      else if (type == unsigned_type_node)
+       {
+         **p = 'j';
+         (*p)++;
+       }
+      else if (type == long_integer_type_node)
+       {
+         **p = 'l';
+         (*p)++;
+       }
+      else if (type == long_unsigned_type_node)
+       {
+         **p = 'm';
+         (*p)++;
+       }
+      else if (type == long_long_integer_type_node)
+       {
+         **p = 'x';
+         (*p)++;
+       }
+      else if (type == long_long_unsigned_type_node)
+       {
+         **p = 'y';
+         (*p)++;
+       }
+      else
+       {
+         /* Fallback for other integer types - use precision-based encoding.  
*/
+         *p += snprintf (*p, end - *p, "i%d", TYPE_PRECISION (type));
+       }
+      break;
+
+    case REAL_TYPE:
+      if (type == float_type_node)
+       {
+         **p = 'f';
+         (*p)++;
+       }
+      else if (type == double_type_node)
+       {
+         **p = 'd';
+         (*p)++;
+       }
+      else if (type == long_double_type_node)
+       {
+         **p = 'e';
+         (*p)++;
+       }
+      else
+       {
+         /* Fallback for other real types.  */
+         *p += snprintf (*p, end - *p, "f%d", TYPE_PRECISION (type));
+       }
+      break;
+
+    default:
+      /* Unknown builtin type - this should never happen in a well-formed C 
program.  */
+      error ("mangle: Unknown builtin type with %<TREE_CODE%> %d", TREE_CODE 
(type));
+      error ("mangle: %<TYPE_MODE%> = %d, %<TYPE_PRECISION%> = %d", TYPE_MODE 
(type), TYPE_PRECISION (type));
+      error ("mangle: Please report this as a bug with the above diagnostic 
information");
+      gcc_unreachable ();
+    }
+}
+
+/* Canonicalize typedef types to their underlying named struct/union types.  */
+static tree
+canonicalize_typedef_type (tree type)
+{
+  /* Handle typedef types - canonicalize to named structs when possible.  */
+  if (TYPE_NAME (type) && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
+    {
+      tree type_decl = TYPE_NAME (type);
+
+      /* Check if this is a typedef (not the original struct declaration) */
+      if (DECL_ORIGINAL_TYPE (type_decl))
+       {
+         tree original_type = DECL_ORIGINAL_TYPE (type_decl);
+
+         /* If the original type is a named struct/union/enum, use that 
instead.  */
+         if ((TREE_CODE (original_type) == RECORD_TYPE
+              || TREE_CODE (original_type) == UNION_TYPE
+              || TREE_CODE (original_type) == ENUMERAL_TYPE)
+             && TYPE_NAME (original_type)
+             && ((TREE_CODE (TYPE_NAME (original_type)) == TYPE_DECL
+                  && DECL_NAME (TYPE_NAME (original_type)))
+                 || TREE_CODE (TYPE_NAME (original_type)) == IDENTIFIER_NODE))
+           {
+             /* Recursively canonicalize in case the original type is also a 
typedef.  */
+             return canonicalize_typedef_type (original_type);
+           }
+
+         /* For basic type typedefs (e.g., u8 -> unsigned char), canonicalize 
to original type.  */
+         if (TREE_CODE (original_type) == INTEGER_TYPE
+             || TREE_CODE (original_type) == REAL_TYPE
+             || TREE_CODE (original_type) == POINTER_TYPE
+             || TREE_CODE (original_type) == ARRAY_TYPE
+             || TREE_CODE (original_type) == FUNCTION_TYPE
+             || TREE_CODE (original_type) == METHOD_TYPE
+             || TREE_CODE (original_type) == BOOLEAN_TYPE
+             || TREE_CODE (original_type) == COMPLEX_TYPE
+             || TREE_CODE (original_type) == VECTOR_TYPE)
+           {
+             /* Recursively canonicalize in case the original type is also a 
typedef.  */
+             return canonicalize_typedef_type (original_type);
+           }
+       }
+    }
+
+  return type;
+}
+
+/* Recursively mangle a type following Itanium C++ ABI conventions.  */
+static void
+mangle_type_to_buffer (tree type, char **p, char *end)
+{
+  gcc_assert (type != NULL_TREE);
+  gcc_assert (p != NULL && *p != NULL && end != NULL);
+  gcc_assert (*p < end);
+
+  if (*p >= end)
+    return;
+
+  /* Canonicalize typedef types to their underlying named struct types.  */
+  type = canonicalize_typedef_type (type);
+
+  switch (TREE_CODE (type))
+    {
+    case POINTER_TYPE:
+      {
+       /* Pointer type: 'P' + qualifiers + pointed-to type.  */
+       **p = 'P';
+       (*p)++;
+
+       /* Add qualifiers to the pointed-to type following Itanium C++ ABI 
ordering.  */
+       tree pointed_to_type = TREE_TYPE (type);
+       if (TYPE_QUALS (pointed_to_type) != TYPE_UNQUALIFIED)
+         {
+           /* Emit qualifiers in Itanium ABI order: restrict, volatile, const. 
 */
+           if (TYPE_QUALS (pointed_to_type) & TYPE_QUAL_RESTRICT)
+             {
+               **p = 'r';
+               (*p)++;
+             }
+           if (TYPE_QUALS (pointed_to_type) & TYPE_QUAL_VOLATILE)
+             {
+               **p = 'V';
+               (*p)++;
+             }
+           if (TYPE_QUALS (pointed_to_type) & TYPE_QUAL_CONST)
+             {
+               **p = 'K';
+               (*p)++;
+             }
+           /* Note: _Atomic is not typically used in kernel code.  */
+         }
+
+       /* For KCFI's hybrid type system: preserve typedef names for compound 
types,
+          but use canonical forms for primitive types.  */
+       tree target_type;
+       if (TREE_CODE (pointed_to_type) == RECORD_TYPE
+           || TREE_CODE (pointed_to_type) == UNION_TYPE
+           || TREE_CODE (pointed_to_type) == ENUMERAL_TYPE)
+         {
+           /* Compound type: preserve typedef information by using original 
type.  */
+           target_type = pointed_to_type;
+         }
+       else
+         {
+           /* Primitive type: use canonical form to ensure structural typing.  
*/
+           target_type = TYPE_MAIN_VARIANT (pointed_to_type);
+         }
+       mangle_type_to_buffer (target_type, p, end);
+       break;
+      }
+
+    case ARRAY_TYPE:
+      /* Array type: 'A' + size + '_' + element type (simplified).  */
+      **p = 'A';
+      (*p)++;
+      if (TYPE_DOMAIN (type) && TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
+       {
+         HOST_WIDE_INT size = tree_to_shwi (TYPE_MAX_VALUE (TYPE_DOMAIN 
(type))) + 1;
+         *p += snprintf (*p, end - *p, "%ld_", (long) size);
+       }
+      else
+       {
+         **p = '_';
+         (*p)++;
+       }
+      mangle_type_to_buffer (TREE_TYPE (type), p, end);
+      break;
+
+    case FUNCTION_TYPE:
+      {
+       /* Function type: 'F' + return type + parameter types + 'E' */
+       **p = 'F';
+       (*p)++;
+       mangle_type_to_buffer (TREE_TYPE (type), p, end);
+
+       /* Add parameter types.  */
+       tree param_types = TYPE_ARG_TYPES (type);
+
+       if (param_types == NULL_TREE)
+         {
+           /* func() - variadic function, no parameter list.
+              Don't mangle any parameters. */
+         }
+       else
+         {
+           bool found_real_params = false;
+           for (tree param = param_types; param && *p < end; param = 
TREE_CHAIN (param))
+             {
+               tree param_type = TREE_VALUE (param);
+               if (param_type == void_type_node)
+                 {
+                   /* Check if this is the first parameter (explicit void) or 
a sentinel */
+                   if (!found_real_params)
+                     {
+                       /* func(void) - explicit empty parameter list.
+                          Mangle void to distinguish from variadic func(). */
+                       mangle_type_to_buffer (void_type_node, p, end);
+                     }
+                   /* If we found real params before this void, it's a 
sentinel - stop */
+                   break;
+                 }
+
+               found_real_params = true;
+
+               /* For value parameters, ignore const/volatile qualifiers as 
they
+                  don't affect the calling convention.  const int and int are
+                  passed identically by value.  */
+               tree canonical_param_type = param_type;
+               if (TREE_CODE (param_type) != POINTER_TYPE
+                   && TREE_CODE (param_type) != REFERENCE_TYPE
+                   && TREE_CODE (param_type) != ARRAY_TYPE)
+                 {
+                   /* Strip qualifiers for non-pointer/reference value 
parameters.  */
+                   canonical_param_type = TYPE_MAIN_VARIANT (param_type);
+                 }
+
+               mangle_type_to_buffer (canonical_param_type, p, end);
+             }
+         }
+
+       **p = 'E';
+       (*p)++;
+       break;
+      }
+
+    case RECORD_TYPE:
+    case UNION_TYPE:
+    case ENUMERAL_TYPE:
+      {
+       /* Struct/union/enum: use simplified representation for C types.  */
+       const char *name = NULL;
+
+       if (TYPE_NAME (type))
+         {
+           if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
+             {
+               /* TYPE_DECL case: both named structs and typedef structs.  */
+               tree decl_name = DECL_NAME (TYPE_NAME (type));
+               if (decl_name && TREE_CODE (decl_name) == IDENTIFIER_NODE)
+                 {
+                   name = IDENTIFIER_POINTER (decl_name);
+                 }
+             }
+           else if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
+             {
+               /* Direct identifier case.  */
+               name = IDENTIFIER_POINTER (TYPE_NAME (type));
+             }
+         }
+
+       /* If no name found through normal extraction, handle anonymous types 
following Itanium C++ ABI.  */
+       if (!name && !TYPE_NAME (type))
+         {
+           static char anon_name[128];
+
+           if (TREE_CODE (type) == UNION_TYPE)
+             {
+               /* For anonymous unions, try to find first named field (Itanium 
ABI approach).  */
+               tree field = TYPE_FIELDS (type);
+               while (field && !DECL_NAME (field))
+                 field = DECL_CHAIN (field);
+
+               if (field && DECL_NAME (field))
+                 {
+                   const char *field_name = IDENTIFIER_POINTER (DECL_NAME 
(field));
+                   snprintf (anon_name, sizeof(anon_name), "anon_union_by_%s", 
field_name);
+                 }
+               else
+                 {
+                   /* No named fields - use Itanium-style Ut encoding.  */
+                   snprintf (anon_name, sizeof(anon_name), "Ut_unnamed_union");
+                 }
+             }
+           else
+             {
+               /* For anonymous structs/enums, use Itanium-style Ut encoding 
with layout info for discrimination.  */
+               const char *type_prefix = "";
+               if (TREE_CODE (type) == RECORD_TYPE)
+                 type_prefix = "struct";
+               else if (TREE_CODE (type) == ENUMERAL_TYPE)
+                 type_prefix = "enum";
+
+               /* Include size and field layout for better discrimination.  */
+               HOST_WIDE_INT size = 0;
+               if (TYPE_SIZE (type) && tree_fits_shwi_p (TYPE_SIZE (type)))
+                 size = tree_to_shwi (TYPE_SIZE (type));
+
+               /* Generate a hash based on field layout to distinguish 
same-sized anonymous types.  */
+               unsigned layout_hash = 0;
+               if (TREE_CODE (type) == RECORD_TYPE)
+                 {
+                   for (tree field = TYPE_FIELDS (type); field; field = 
DECL_CHAIN (field))
+                     {
+                       if (TREE_CODE (field) == FIELD_DECL)
+                         {
+                           /* Hash field offset and type.  */
+                           if (DECL_FIELD_OFFSET (field))
+                             {
+                               HOST_WIDE_INT offset = tree_to_shwi 
(DECL_FIELD_OFFSET (field));
+                               layout_hash = layout_hash * 31 + 
(unsigned)offset;
+                             }
+
+                           /* Hash field type.  */
+                           tree field_type = TREE_TYPE (field);
+                           if (field_type && TYPE_MODE (field_type) != 
VOIDmode)
+                             layout_hash = layout_hash * 37 + 
(unsigned)TYPE_MODE (field_type);
+                         }
+                     }
+                 }
+
+               if (layout_hash != 0)
+                 snprintf (anon_name, sizeof(anon_name), "Ut_%s_%ld_%x", 
type_prefix, (long)size, layout_hash);
+               else
+                 snprintf (anon_name, sizeof(anon_name), "Ut_%s_%ld", 
type_prefix, (long)size);
+             }
+
+           name = anon_name;
+         }
+
+       if (name)
+         {
+           *p += snprintf (*p, end - *p, "%zu%s", strlen (name), name);
+         }
+       else
+         {
+           /* Always show diagnostic information for missing struct names.  */
+           error ("mangle: No struct/union/enum name found for type code %d 
(%qs)",
+                  TREE_CODE (type), get_tree_code_name (TREE_CODE (type)));
+           if (TYPE_NAME (type))
+             {
+               error ("mangle: %<TYPE_NAME%> exists but extraction failed");
+               error ("mangle: %<TYPE_NAME%> tree code = %d", TREE_CODE 
(TYPE_NAME (type)));
+               if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
+                 {
+                   tree decl_name = DECL_NAME (TYPE_NAME (type));
+                   error ("mangle: %<TYPE_DECL%> %<DECL_NAME%> = %p", 
(void*)decl_name);
+                   if (decl_name && TREE_CODE (decl_name) == IDENTIFIER_NODE)
+                     error ("mangle: %<IDENTIFIER_NODE%> name = '%s'", 
IDENTIFIER_POINTER (decl_name));
+                 }
+               else if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
+                 {
+                   error ("mangle: %<IDENTIFIER_NODE%> name = '%s'", 
IDENTIFIER_POINTER (TYPE_NAME (type)));
+                 }
+               else
+                 {
+                   error ("mangle: Unknown %<TYPE_NAME%> tree code = %d", 
TREE_CODE (TYPE_NAME (type)));
+                 }
+             }
+           else
+             {
+               error ("mangle: %<TYPE_NAME%> is NULL - anonymous 
struct/union/enum detected");
+             }
+
+           /* This indicates a missing case in our struct name extraction.  */
+           error ("mangle: Please report this as a bug with the above 
diagnostic information");
+           gcc_unreachable ();
+         }
+       break;
+      }
+
+    default:
+      /* Handle builtin types.  */
+      mangle_builtin_type_to_buffer (type, p, end);
+      break;
+    }
+}
+
+/* Compute canonical type name using Itanium C++ ABI mangling.
+   Accepts either FUNCTION_DECL (preferred for typedef preservation) or 
FUNCTION_TYPE.  */
+const char *
+mangle_function_type (tree fntype_or_fndecl)
+{
+  gcc_assert (fntype_or_fndecl);
+
+  tree fntype = NULL;
+
+  /* Determine input type and extract function type.  */
+  if (TREE_CODE (fntype_or_fndecl) == FUNCTION_TYPE)
+    {
+      /* Already FUNCTION_TYPE.  */
+      fntype = fntype_or_fndecl;
+    }
+  else if (TREE_CODE (fntype_or_fndecl) == FUNCTION_DECL)
+    {
+      tree fndecl = fntype_or_fndecl;
+      tree base_fntype = TREE_TYPE (fndecl);
+
+      /* For FUNCTION_DECL, build a synthetic function type using 
DECL_ARGUMENTS
+        if available to preserve typedef information.  */
+      tree parm = DECL_ARGUMENTS (fndecl);
+      if (parm)
+       {
+         /* Build parameter type list from DECL_ARGUMENTS.  */
+         tree param_list = NULL_TREE;
+         tree *param_tail = &param_list;
+
+         for (; parm; parm = DECL_CHAIN (parm))
+           {
+             tree parm_type = TREE_TYPE (parm);
+             *param_tail = tree_cons (NULL_TREE, parm_type, NULL_TREE);
+             param_tail = &TREE_CHAIN (*param_tail);
+           }
+
+         /* Add void_type_node sentinel if the function takes no parameters.  
*/
+         if (!param_list)
+           param_list = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
+
+         /* Build synthetic function type with preserved parameter types.  */
+         fntype = build_function_type (TREE_TYPE (base_fntype), param_list);
+       }
+      else
+       {
+         /* No DECL_ARGUMENTS - use the standard function type.  */
+         fntype = base_fntype;
+       }
+    }
+  else
+    {
+      /* Must only be called with FUNCTION_DECL or FUNCTION_TYPE.  */
+      gcc_unreachable ();
+    }
+
+  static char name_buf[512];
+  char *p = name_buf;
+  char *end = name_buf + sizeof (name_buf) - 1;
+
+  /* Typeinfo for a function prototype.  */
+  p += sprintf(name_buf, "_ZTSP");
+
+  /* Use mangle_type_to_buffer for all cases.  */
+  mangle_type_to_buffer (fntype, &p, end);
+
+  /* Ensure we didn't overflow the buffer.  */
+  gcc_assert (p <= end);
+  *p = '\0';
+  return name_buf;
+}
-- 
2.34.1

Reply via email to