https://github.com/efriedma-quic created https://github.com/llvm/llvm-project/pull/152411
The clang side of the calling convention code for arm64 vs. arm64ec is close enough that this isn't really noticeable in most cases, but the rule for choosing whether to pass a struct directly or indirectly is significantly different. (Adapted from my old patch https://reviews.llvm.org/D125419 .) Fixes #89615. >From 9b8fdaf87ceb1478e5c4a0324de99b5ce375b175 Mon Sep 17 00:00:00 2001 From: Eli Friedman <efrie...@quicinc.com> Date: Wed, 6 Aug 2025 16:31:03 -0700 Subject: [PATCH] [Arm64EC][clang] Implement varargs support in clang. The clang side of the calling convention code for arm64 vs. arm64ec is close enough that this isn't really noticeable in most cases, but the rule for choosing whether to pass a struct directly or indirectly is significantly different. (Adapted from my old patch https://reviews.llvm.org/D125419 .) Fixes #89615. --- clang/lib/CodeGen/ABIInfo.cpp | 4 ++ clang/lib/CodeGen/ABIInfo.h | 4 ++ clang/lib/CodeGen/Targets/AArch64.cpp | 44 ++++++++++++++------ clang/lib/CodeGen/Targets/X86.cpp | 6 +++ clang/test/CodeGen/arm64ec-varargs.c | 59 +++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/arm64ec-varargs.c diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp index 3ef430e19ebd3..822c3803a33e7 100644 --- a/clang/lib/CodeGen/ABIInfo.cpp +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -244,6 +244,10 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T, return T; } +ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const { + llvm_unreachable("Only implemented for x86"); +} + // Pin the vtable to this file. SwiftABIInfo::~SwiftABIInfo() = default; diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 9c7029c99bd44..6f07a82a39fd7 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -132,6 +132,10 @@ class ABIInfo { virtual llvm::FixedVectorType * getOptimalVectorMemoryType(llvm::FixedVectorType *T, const LangOptions &Opt) const; + + /// Used by Arm64EC calling convention code to call into x86 calling + /// convention code for varargs function. + virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const; }; /// Target specific hooks for defining how a type should be passed or returned diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index b82c46966cf0b..fe562f37e30bc 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -24,9 +24,16 @@ namespace { class AArch64ABIInfo : public ABIInfo { AArch64ABIKind Kind; + std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo; + public: - AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) - : ABIInfo(CGT), Kind(Kind) {} + AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind) + : ABIInfo(CGM.getTypes()), Kind(Kind) { + if (getTarget().getTriple().isWindowsArm64EC()) { + WinX86_64CodegenInfo = + createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None); + } + } bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; } @@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo { class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: - AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) - : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) { - SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT); + AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind) + : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGM, Kind)) { + SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGM.getTypes()); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { @@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { public: - WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K) - : AArch64TargetCodeGenInfo(CGT, K) {} + WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K) + : AArch64TargetCodeGenInfo(CGM, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, unsigned &NPRN) const { Ty = useFirstFieldIfTransparentUnion(Ty); + if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) { + // Arm64EC varargs functions use the x86_64 classification rules, + // not the AArch64 ABI rules. + return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty); + } + // Handle illegal vector types here. if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty, NSRN, NPRN); @@ -1151,9 +1164,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const { bool IsIndirect = false; - // Composites larger than 16 bytes are passed by reference. - if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) - IsIndirect = true; + if (getTarget().getTriple().isWindowsArm64EC()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + uint64_t Width = getContext().getTypeSize(Ty); + IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + } else { + // Composites larger than 16 bytes are passed by reference. + if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) + IsIndirect = true; + } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, CGF.getContext().getTypeInfoInChars(Ty), @@ -1345,11 +1365,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, std::unique_ptr<TargetCodeGenInfo> CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind) { - return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind); + return std::make_unique<AArch64TargetCodeGenInfo>(CGM, Kind); } std::unique_ptr<TargetCodeGenInfo> CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K) { - return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K); + return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM, K); } diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index abb91486e7ee6..f473e9d7665ac 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1409,6 +1409,12 @@ class WinX86_64ABIInfo : public ABIInfo { return isX86VectorCallAggregateSmallEnough(NumMembers); } + ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override { + unsigned FreeSSERegs = 0; + return classify(Ty, FreeSSERegs, /*IsReturnType=*/false, + /*IsVectorCall=*/false, /*IsRegCall=*/false); + } + private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, bool IsVectorCall, bool IsRegCall) const; diff --git a/clang/test/CodeGen/arm64ec-varargs.c b/clang/test/CodeGen/arm64ec-varargs.c new file mode 100644 index 0000000000000..6bb5cff5fb30e --- /dev/null +++ b/clang/test/CodeGen/arm64ec-varargs.c @@ -0,0 +1,59 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f" +// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s + +typedef struct { float x[2]; } A; +typedef struct { float x[4]; } B; +void f(A a, ...) { + __builtin_va_list b; + __builtin_va_start(b, a); + float x = __builtin_va_arg(b, A).x[0]; + float y = __builtin_va_arg(b, B).x[0]; +} +void g(A a, B b) { f(a, b); } + +// CHECK-LABEL: @f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4 +// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 4 +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[B]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[B]], align 8 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[B]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false) +// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: store float [[TMP0]], ptr [[X]], align 4 +// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[B]], align 8 +// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[B]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP2]], ptr align 4 [[TMP1]], i64 16, i1 false) +// CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x float], ptr [[X5]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK-NEXT: store float [[TMP2]], ptr [[Y]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: @g( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 4 +// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 4 +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: store [4 x float] [[B_COERCE:%.*]], ptr [[COERCE_DIVE1]], align 4 +// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE2]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP]], ptr align 4 [[B]], i64 16, i1 false) +// CHECK-NEXT: call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]]) +// CHECK-NEXT: ret void +// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits