From dac55e9dbc765ccfcc5a1f49baa4662dae3c3923 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radek=20Barto=C5=88?= <radek.barton@microsoft.com>
Date: Mon, 4 Nov 2024 18:13:30 +0100
Subject: [PATCH] Fix function call handling according to Microsoft Arm64
 variadic function call ABI

---
 gcc/config.gcc                         |   5 +-
 gcc/config/aarch64/aarch64-builtins.cc |  30 +++
 gcc/config/aarch64/aarch64-protos.h    |   2 +
 gcc/config/aarch64/aarch64.cc          | 315 +++++++++++++++++++++++--
 gcc/config/aarch64/aarch64.h           |  10 +
 gcc/config/aarch64/cross-stdarg.h      |  42 ++++
 gcc/config/aarch64/cygming.h           |  13 +-
 gcc/config/mingw/winnt.cc              |  22 ++
 gcc/config/mingw/winnt.h               |   1 +
 9 files changed, 422 insertions(+), 18 deletions(-)
 create mode 100644 gcc/config/aarch64/cross-stdarg.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0d8dbc4fb19..5357690840b 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -347,7 +347,10 @@ m32c*-*-*)
         ;;
 aarch64*-*-*)
 	cpu_type=aarch64
-	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h arm_neon_sve_bridge.h arm_private_fp8.h arm_private_neon_types.h"
+	extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h
+		       arm_sme.h arm_neon_sve_bridge.h arm_private_fp8.h
+		       arm_private_neon_types.h
+		       cross-stdarg.h"
 	c_target_objs="aarch64-c.o"
 	cxx_target_objs="aarch64-c.o"
 	d_target_objs="aarch64-d.o"
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 408099a50e8..878d4e76f55 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2519,6 +2519,36 @@ aarch64_general_init_builtins (void)
     handle_arm_acle_h ();
 }
 
+/* Internal method for aarch64_ms_variadic_abi_init_builtins.  */
+
+void
+aarch64_ms_variadic_abi_init_builtins (void)
+{
+  tree ms_va_ref;
+  tree fnvoid_va_end_ms;
+  tree fnvoid_va_start_ms;
+  tree fnvoid_va_copy_ms;
+  tree fnattr_ms = NULL_TREE;
+
+  fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
+  ms_va_ref = build_reference_type (ms_va_list_type_node);
+
+  fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref,
+					       NULL_TREE);
+  fnvoid_va_start_ms
+    = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+  fnvoid_va_copy_ms
+    = build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
+				NULL_TREE);
+
+  add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
+  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
+  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
+			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
+}
+
 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group.  */
 tree
 aarch64_general_builtin_decl (unsigned code, bool)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 38c307cdc3a..1c021615754 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1121,6 +1121,8 @@ void aarch64_override_options_internal (struct gcc_options *);
 
 const char *aarch64_general_mangle_builtin_type (const_tree);
 void aarch64_general_init_builtins (void);
+void aarch64_ms_variadic_abi_init_builtins (void);
+
 tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *);
 gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
 					     gimple_stmt_iterator *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5502d0b4807..03dd8d87843 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -541,6 +541,9 @@ const sysreg_t aarch64_sysregs[] =
 using sysreg_map_t = hash_map<nofree_string_hash, const sysreg_t *>;
 static sysreg_map_t *sysreg_map = nullptr;
 
+/* Microsoft Arm64 variadic function call ABI specific va_list type node.  */
+tree ms_va_list_type_node;
+
 /* Map system register names to their hardware metadata: encoding,
    feature flags and architectural feature requirements, all of which
    are encoded in a sysreg_t struct.  */
@@ -750,6 +753,7 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
       *no_add_attrs = true;
       return NULL_TREE;
 
+    case ARM_PCS_MS_VARIADIC:
     case ARM_PCS_TLSDESC:
     case ARM_PCS_UNKNOWN:
       break;
@@ -1313,6 +1317,21 @@ aarch64_sve_abi (void)
   return sve_abi;
 }
 
+/* Return the descriptor of the Microsoft Arm64 variadic function call ABI.  */
+
+static const predefined_function_abi &
+aarch64_ms_variadic_abi (void)
+{
+  predefined_function_abi &ms_variadic_abi = function_abis[ARM_PCS_MS_VARIADIC];
+  if (!ms_variadic_abi.initialized_p ())
+    {
+      HARD_REG_SET full_reg_clobbers
+	= default_function_abi.full_reg_clobbers ();
+      ms_variadic_abi.initialize (ARM_PCS_MS_VARIADIC, full_reg_clobbers);
+    }
+  return ms_variadic_abi;
+}
+
 /* If X is an UNSPEC_SALT_ADDR expression, return the address that it
    wraps, otherwise return X itself.  */
 
@@ -2300,11 +2319,37 @@ aarch64_takes_arguments_in_sve_regs_p (const_tree fntype)
   return false;
 }
 
+/* Return true if a function has variadic arguments.  */
+
+static bool
+is_variadic_function_type (const_tree fntype) {
+  if (TYPE_NO_NAMED_ARGS_STDARG_P (fntype))
+    return true;
+
+  int arg_count = 0;
+  for (tree arg = TYPE_ARG_TYPES (fntype); arg; arg = TREE_CHAIN (arg))
+    {
+      if (TREE_VALUE (arg) == void_type_node)
+	return false;
+      arg_count++;
+    }
+
+  return arg_count > 0;
+}
+
 /* Implement TARGET_FNTYPE_ABI.  */
 
 static const predefined_function_abi &
 aarch64_fntype_abi (const_tree fntype)
 {
+#if defined(TARGET_AARCH64_MS_ABI)
+  if (is_variadic_function_type (fntype))
+    return aarch64_ms_variadic_abi ();
+#endif
+
+  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+    return aarch64_ms_variadic_abi ();
+
   if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
     return aarch64_simd_abi ();
 
@@ -2519,6 +2564,10 @@ aarch64_reg_save_mode (unsigned int regno)
 	/* Only the low 64 bits are saved by the base PCS.  */
 	return DFmode;
 
+      case ARM_PCS_MS_VARIADIC:
+	/* Microsoft only uses GP registers for variadic arguments.  */
+	return DImode;
+
       case ARM_PCS_SIMD:
 	/* The vector PCS saves the low 128 bits (which is the full
 	   register on non-SVE targets).  */
@@ -7220,6 +7269,86 @@ bitint_or_aggr_of_bitint_p (tree type)
   return false;
 }
 
+static int
+aarch64_arg_size (const function_arg_info &arg)
+{
+  HOST_WIDE_INT size;
+
+  /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
+  if (arg.type)
+    size = int_size_in_bytes (arg.type);
+  else
+    /* No frontends can create types with variable-sized modes, so we
+       shouldn't be asked to pass or return them.  */
+    size = GET_MODE_SIZE (arg.mode).to_constant ();
+
+  return ROUND_UP (size, UNITS_PER_WORD);
+}
+
+/* Layout a function argument according to the AAPCS64 rules.  The rule
+   numbers refer to the rule numbers in the AAPCS64.  The Microsoft Arm64
+   variadic function call ABI uses only C.12-C15 rules.
+   See: https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#addendum-variadic-functions */
+
+static void
+aarch64_ms_variadic_abi_layout_arg (cumulative_args_t pcum_v,
+				    const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  tree type = arg.type;
+  machine_mode mode = arg.mode;
+  int ncrn, nregs;
+  HOST_WIDE_INT size;
+
+  size = aarch64_arg_size (arg);
+  ncrn = pcum->aapcs_ncrn;
+  nregs = size / UNITS_PER_WORD;
+
+  if (ncrn < NUM_ARG_REGS)
+    {
+      /* The argument bytes are copied to the core registers.  */
+      if (nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
+	{
+	  pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
+	}
+      else
+	{
+	  rtx par;
+	  int i;
+
+	  /* Handle the case when argument is split between the last registers
+	     and the stack.  */
+	  if (ncrn + nregs > NUM_ARG_REGS) {
+	    pcum->aapcs_stack_words = ncrn + nregs - NUM_ARG_REGS;
+	    nregs -= pcum->aapcs_stack_words;
+	  }
+
+	  /* Generate load arg to registers intructions.  */
+	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
+	  for (i = 0; i < nregs; i++) 
+	    {
+	      rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
+	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+				       GEN_INT (i * UNITS_PER_WORD));
+	      XVECEXP (par, 0, i) = tmp;
+	    }
+	  pcum->aapcs_reg = par;
+	}
+
+      pcum->aapcs_nextncrn = ncrn + nregs;
+    }
+  else
+    {
+       /* The remaining arguments are passed on stack; record the needed
+	  number of words for this argument and align the total size if
+	  necessary.  */
+       pcum->aapcs_nextncrn = NUM_ARG_REGS;
+       pcum->aapcs_stack_words = nregs;
+    }
+
+  pcum->aapcs_arg_processed = true;
+}
+
 /* Layout a function argument according to the AAPCS64 rules.  The rule
    numbers refer to the rule numbers in the AAPCS64.  ORIG_MODE is the
    mode that was originally given to us by the target hook, whereas the
@@ -7243,6 +7372,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
   if (pcum->aapcs_arg_processed)
     return;
 
+  if (pcum->pcs_variant == ARM_PCS_MS_VARIADIC) {
+    aarch64_ms_variadic_abi_layout_arg (pcum_v, arg);
+    return;
+  }
+
   bool warn_pcs_change
     = (warn_psabi
        && !pcum->silent_p
@@ -7359,15 +7493,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
 		&& (aarch64_some_values_include_pst_objects_p (type)
 		    || (vec_flags & VEC_PARTIAL)));
 
-  /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
-  if (type)
-    size = int_size_in_bytes (type);
-  else
-    /* No frontends can create types with variable-sized modes, so we
-       shouldn't be asked to pass or return them.  */
-    size = GET_MODE_SIZE (mode).to_constant ();
-  size = ROUND_UP (size, UNITS_PER_WORD);
-
+  size = aarch64_arg_size (arg);
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
 						 mode,
@@ -7600,6 +7726,25 @@ aarch64_finish_sme_mode_switch_args (CUMULATIVE_ARGS *pcum)
   return gen_rtx_PARALLEL (VOIDmode, argvec);
 }
 
+/* Implement TARGET_ARG_PARTIAL_BYTES. */
+
+static int
+aarch64_arg_partial_bytes (cumulative_args_t pcum_v,
+			   const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  if (pcum->pcs_variant != ARM_PCS_MS_VARIADIC)
+    return 0;
+
+  /* Handle the case when argument is split between the last registers and
+     the stack.  */
+  if ((pcum->aapcs_reg != NULL_RTX) && (pcum->aapcs_stack_words != 0))
+    return pcum->aapcs_stack_words * UNITS_PER_WORD;
+
+  return 0;
+}
+
 /* Implement TARGET_FUNCTION_ARG.  */
 
 static rtx
@@ -7608,7 +7753,8 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64
 	      || pcum->pcs_variant == ARM_PCS_SIMD
-	      || pcum->pcs_variant == ARM_PCS_SVE);
+	      || pcum->pcs_variant == ARM_PCS_SVE
+	      || pcum->pcs_variant == ARM_PCS_MS_VARIADIC);
 
   if (arg.end_marker_p ())
     {
@@ -7700,11 +7846,13 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v,
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
   if (pcum->pcs_variant == ARM_PCS_AAPCS64
       || pcum->pcs_variant == ARM_PCS_SIMD
-      || pcum->pcs_variant == ARM_PCS_SVE)
+      || pcum->pcs_variant == ARM_PCS_SVE
+      || pcum->pcs_variant == ARM_PCS_MS_VARIADIC)
     {
       aarch64_layout_arg (pcum_v, arg);
-      gcc_assert ((pcum->aapcs_reg != NULL_RTX)
-		  != (pcum->aapcs_stack_words != 0));
+      gcc_assert ((pcum->pcs_variant == ARM_PCS_MS_VARIADIC)
+		   || (pcum->aapcs_reg != NULL_RTX)
+		       != (pcum->aapcs_stack_words != 0));
       if (pcum->aapcs_reg
 	  && aarch64_call_switches_pstate_sm (pcum->isa_mode))
 	aarch64_record_sme_mode_switch_args (pcum);
@@ -21671,6 +21819,21 @@ aarch64_build_builtin_va_list (void)
   return va_list_type;
 }
 
+/* Setup the builtin va_list data type and for 64-bit the additional
+   calling convention specific va_list data types.  */
+
+static tree
+aarch64_ms_variadic_abi_build_builtin_va_list (void)
+{
+  /* For MS_ABI we use plain pointer to argument area.  */
+  tree char_ptr_type = build_pointer_type (char_type_node);
+  tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
+			 TYPE_ATTRIBUTES (char_ptr_type));
+  ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
+
+  return ms_va_list_type_node;
+}
+
 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
 static void
 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
@@ -21754,6 +21917,75 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 }
 
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+
+static void
+aarch64_ms_variadic_abi_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+  rtx va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+  /* TODO: Should we initialize and use cfun->va_list_gpr_size instead of
+           defining single purpose
+           cfun->machine->frame.unaligned_saved_varargs_size field?
+           Currently, the cfun->va_list_gpr_size contains only value 255?  */
+  int offset = cfun->machine->frame.unaligned_saved_varargs_size;
+  nextarg = plus_constant (GET_MODE (nextarg), nextarg, -offset);
+
+  convert_move (va_r, nextarg, 0);
+}
+
+/* Iterate through the target-specific builtin types for va_list.
+   IDX denotes the iterator, *PTREE is set to the result type of
+   the va_list builtin, and *PNAME to its internal type.
+   Returns zero if there is no element for this index, otherwise
+   IDX should be increased upon the next call.
+   Note, do not iterate a base builtin's name like __builtin_va_list.
+   Used from c_common_nodes_and_builtins.  */
+
+static int
+aarch64_ms_variadic_abi_enum_va_list (int idx, const char **pname, tree *ptree)
+{
+  switch (idx)
+    {
+    default:
+      break;
+
+    case 0:
+      *ptree = ms_va_list_type_node;
+      *pname = "__builtin_ms_va_list";
+      return 1;
+    }
+
+  return 0;
+}
+
+/* This function returns the calling abi specific va_list type node.
+   It returns  the FNDECL specific va_list type.  */
+
+static tree
+aarch64_ms_variadic_abi_fn_abi_va_list (tree fndecl)
+{
+  gcc_assert (fndecl != NULL_TREE);
+
+  arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
+  if (pcs == ARM_PCS_MS_VARIADIC)
+    return ms_va_list_type_node;
+
+  return std_fn_abi_va_list (fndecl);
+}
+
+/* Returns the canonical va_list type specified by TYPE. If there
+   is no valid TYPE provided, it return NULL_TREE.  */
+
+static tree
+aarch64_ms_variadic_abi_canonical_va_list_type (tree type)
+{
+  if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
+    return ms_va_list_type_node;
+
+  return NULL_TREE;
+}
+
 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
 
 static tree
@@ -22077,7 +22309,10 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
     vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
 		    cfun->va_list_fpr_size / UNITS_PER_VREG);
 
+  /* Microsoft variadic function calls ABI never uses vector registers.  */
+#if !defined (TARGET_AARCH64_MS_ABI)
   if (!TARGET_FLOAT)
+#endif
     {
       gcc_assert (local_cum.aapcs_nvrn == 0);
       vr_saved = 0;
@@ -22128,8 +22363,9 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v,
 
   /* We don't save the size into *PRETEND_SIZE because we want to avoid
      any complication of having crtl->args.pretend_args_size changed.  */
+  cfun->machine->frame.unaligned_saved_varargs_size = gr_saved * UNITS_PER_WORD;
   cfun->machine->frame.saved_varargs_size
-    = (ROUND_UP (gr_saved * UNITS_PER_WORD,
+    = (ROUND_UP (cfun->machine->frame.unaligned_saved_varargs_size,
 		 STACK_BOUNDARY / BITS_PER_UNIT)
        + vr_saved * UNITS_PER_VREG);
 }
@@ -22922,9 +23158,13 @@ static const char *
 aarch64_mangle_type (const_tree type)
 {
   /* The AArch64 ABI documents say that "__va_list" has to be
-     mangled as if it is in the "std" namespace.  */
+     mangled as if it is in the "std" namespace.
+     The Windows Arm64 ABI uses just an address of the first variadic
+     argument.  */
+#if !defined (TARGET_AARCH64_MS_ABI)
   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
     return "St9__va_list";
+#endif
 
   /* Half-precision floating point types.  */
   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
@@ -25550,6 +25790,25 @@ aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED)
 	asm_fprintf (f, "\t.cfi_b_key_frame\n");
 }
 
+/* Implement TARGET_STRICT_ARGUMENT_NAMING.
+
+   Return true if the location where a function argument is passed
+   depends on whether or not it is a named argument.
+
+   For Microsoft ABI of variadic function calls, treat the named arguments as
+   unnamed as they are handled the same way as variadic arguments. */
+
+static bool
+aarch64_ms_variadic_abi_strict_argument_naming (cumulative_args_t pcum_v)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  if (pcum->pcs_variant == ARM_PCS_MS_VARIADIC)
+    return false;
+
+  return hook_bool_CUMULATIVE_ARGS_true(pcum_v);
+}
+
 /* Implements TARGET_ASM_FILE_START.  Output the assembly header.  */
 
 static void
@@ -32095,8 +32354,13 @@ aarch64_run_selftests (void)
 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY aarch64_print_patchable_function_entry
 
+#if defined (TARGET_AARCH64_MS_ABI)
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_ms_variadic_abi_build_builtin_va_list
+#else
 #undef TARGET_BUILD_BUILTIN_VA_LIST
 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
+#endif
 
 #undef TARGET_CALLEE_COPIES
 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false
@@ -32166,12 +32430,31 @@ aarch64_run_selftests (void)
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
 
+#if defined (TARGET_AARCH64_MS_ABI)
+#undef TARGET_ENUM_VA_LIST_P
+#define TARGET_ENUM_VA_LIST_P aarch64_ms_variadic_abi_enum_va_list
+
+#undef TARGET_FN_ABI_VA_LIST
+#define TARGET_FN_ABI_VA_LIST aarch64_ms_variadic_abi_fn_abi_va_list
+
+#undef TARGET_CANONICAL_VA_LIST_TYPE
+#define TARGET_CANONICAL_VA_LIST_TYPE aarch64_ms_variadic_abi_canonical_va_list_type
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START aarch64_ms_variadic_abi_expand_builtin_va_start
+#else
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
+#endif
 
 #undef TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
 
+#if defined (TARGET_AARCH64_MS_ABI)
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES aarch64_arg_partial_bytes
+#endif
+
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG aarch64_function_arg
 
@@ -32207,8 +32490,10 @@ aarch64_run_selftests (void)
 #undef TARGET_GIMPLE_FOLD_BUILTIN
 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
 
+#if !defined (TARGET_AARCH64_MS_ABI)
 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
+#endif
 
 #undef  TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 096c853af7f..c1b858563fb 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1003,6 +1003,9 @@ struct GTY (()) aarch64_frame
      STACK_BOUNDARY.  */
   HOST_WIDE_INT saved_varargs_size;
 
+  /* The same as above except it is the original unaligned stack size.  */
+  HOST_WIDE_INT unaligned_saved_varargs_size;
+
   /* The number of bytes between the bottom of the static frame (the bottom
      of the outgoing arguments) and the bottom of the register save area.
      This value is always a multiple of STACK_BOUNDARY.  */
@@ -1166,6 +1169,10 @@ enum arm_pcs
   ARM_PCS_SVE,			/* For functions that pass or return
 				   values in SVE registers.  */
   ARM_PCS_TLSDESC,		/* For targets of tlsdesc calls.  */
+  ARM_PCS_MS_VARIADIC,		/* Microsoft handles variadic functions 
+				   differently. All composites are treated
+				   alike. SIMD and floating-point registers
+				   aren't used.  */
   ARM_PCS_UNKNOWN
 };
 
@@ -1549,6 +1556,9 @@ extern GTY(()) tree aarch64_fp16_ptr_type_node;
    bfloat16_type_node.  Defined in aarch64-builtins.cc.  */
 extern GTY(()) tree aarch64_bf16_ptr_type_node;
 
+/* Microsoft Arm64 variadic function call ABI specific va_list type node.  */
+extern GTY(()) tree ms_va_list_type_node;
+
 /* The generic unwind code in libgcc does not initialize the frame pointer.
    So in order to unwind a function using a frame pointer, the very first
    function that is unwound must save the frame pointer.  That way the frame
diff --git a/gcc/config/aarch64/cross-stdarg.h b/gcc/config/aarch64/cross-stdarg.h
new file mode 100644
index 00000000000..573e856998f
--- /dev/null
+++ b/gcc/config/aarch64/cross-stdarg.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __CROSS_STDARG_H_INCLUDED
+#define __CROSS_STDARG_H_INCLUDED
+
+#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
+#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
+#define __ms_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
+#define __ms_va_end(__v) __builtin_ms_va_end(__v)
+
+#ifndef __GNUC_MS_VA_LIST
+#define __GNUC_MS_VA_LIST
+typedef __builtin_ms_va_list __gnuc_ms_va_list;
+#endif
+
+#ifndef _MS_VA_LIST_DEFINED
+#define _MS_VA_LIST_DEFINED
+typedef __gnuc_ms_va_list ms_va_list;
+#endif
+
+#endif /* __CROSS_STDARG_H_INCLUDED */
diff --git a/gcc/config/aarch64/cygming.h b/gcc/config/aarch64/cygming.h
index 7e2203c3e92..aa580f4be27 100644
--- a/gcc/config/aarch64/cygming.h
+++ b/gcc/config/aarch64/cygming.h
@@ -204,8 +204,11 @@ still needed for compilation.  */
   } while (0)
 
 #define SUBTARGET_ATTRIBUTE_TABLE \
-  { "selectany", 0, 0, true, false, false, false, \
-    mingw_handle_selectany_attribute, NULL }
+  { "selectany", 0, 0, true, false, false, false,			\
+    mingw_handle_selectany_attribute, NULL },				\
+  { "ms_abi", 0, 0, false, true, true, true,				\
+    aarch64_handle_ms_abi_attribute, NULL },				\
+  { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL }
 
 #undef SUB_TARGET_RECORD_STUB
 #define SUB_TARGET_RECORD_STUB(NAME, DECL) mingw_pe_record_stub((NAME), \
@@ -252,3 +255,9 @@ still needed for compilation.  */
 #define TARGET_ASM_LTO_END mingw_pe_asm_lto_end
 
 #endif
+
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS						\
+  do {									\
+    aarch64_ms_variadic_abi_init_builtins ();				\
+  } while(0)
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index f22496615ed..fc85425d1a2 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -94,6 +94,28 @@ mingw_handle_selectany_attribute (tree *node, tree name, tree, int,
   return NULL_TREE;
 }
 
+/* Handle a "ms_abi" attribute; arguments as in struct
+   attribute_spec.handler.  */
+
+tree
+aarch64_handle_ms_abi_attribute (tree *node, tree name, tree, int,
+				 bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
 
 /* Return the type that we should use to determine if DECL is
    imported or exported.  */
diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h
index 23f4dc94ec5..5ef11c14ec0 100644
--- a/gcc/config/mingw/winnt.h
+++ b/gcc/config/mingw/winnt.h
@@ -21,6 +21,7 @@ http://www.gnu.org/licenses/.  */
 #ifndef USED_FOR_TARGET
 
 extern tree mingw_handle_selectany_attribute (tree *, tree, tree, int, bool *);
+extern tree aarch64_handle_ms_abi_attribute (tree *, tree, tree, int, bool *);
 
 extern void mingw_pe_asm_named_section (const char *, unsigned int, tree);
 extern void mingw_pe_asm_lto_start (void);
-- 
2.50.1.vfs.0.0

