Re: [PATCH] [CodeGen] Teach X86_64ABIInfo about AVX512.

Ahmed Bougacha Fri, 22 May 2015 11:03:52 -0700

- Add OpenMP default alignment for AVX512
- Extract HasAVX/HasAVX512 into a X86_64BaseABIInfo


In r237989 I refactored the "HasAVX" flag to just use the TargetInfo::getABI() 
string, but I'm tempted to revert, and pass down an additional HasAVX512 flag.  
I originally wanted to do the same thing for other targets, but didn't realize 
that some of the ABIs depend on things not in TargetInfo, e.g. CodeGenOptions, 
which aren't supposed to be available?

Anyway, opinions welcome, one way or the other.


http://reviews.llvm.org/D9894

Files:
  lib/Basic/Targets.cpp
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/x86_64-arguments.c
  test/OpenMP/simd_metadata.c

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/

Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2237,7 +2237,9 @@
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
   StringRef getABI() const override {
-    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
+    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
+      return "avx512";
+    else if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
       return "avx";
     else if (getTriple().getArch() == llvm::Triple::x86 &&
              MMX3DNowLevel == NoMMX3DNow)
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -1386,8 +1386,27 @@
 
 
 namespace {
+
+/// The Win and SysV X86_64 shared ABI information.
+class X86_64BaseABIInfo : public ABIInfo {
+  bool HasAVX;
+  bool HasAVX512;
+
+protected:
+  X86_64BaseABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT),
+  HasAVX(), HasAVX512() {
+    StringRef ABI = getTarget().getABI();
+    HasAVX512 = ABI == "avx512";
+    HasAVX = HasAVX512 || ABI == "avx";
+  }
+
+public:
+  bool hasAVX() const { return HasAVX; }
+  bool hasAVX512() const { return HasAVX512; }
+};
+
 /// X86_64ABIInfo - The X86_64 ABI information.
-class X86_64ABIInfo : public ABIInfo {
+class X86_64ABIInfo : public X86_64BaseABIInfo {
   enum Class {
     Integer = 0,
     SSE,
@@ -1497,7 +1516,7 @@
 
 public:
   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT) :
-      ABIInfo(CGT),
+      X86_64BaseABIInfo(CGT),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1522,20 +1541,16 @@
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
-
-  bool hasAVX() const {
-    return getTarget().getABI() == "avx";
-  }
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
-class WinX86_64ABIInfo : public ABIInfo {
+class WinX86_64ABIInfo : public X86_64BaseABIInfo {
 
   ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
                       bool IsReturnType) const;
 
 public:
-  WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
+  WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : X86_64BaseABIInfo(CGT) {}
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
@@ -1625,7 +1640,8 @@
   }
 
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return getABIInfo().hasAVX() ? 32 : 16;
+    auto &ABII = getABIInfo();
+    return ABII.hasAVX512() ? 64 : ABII.hasAVX() ? 32 : 16;
   }
 };
 
@@ -1697,12 +1713,15 @@
 }
 
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
-  bool hasAVX() const { return getABIInfo().getTarget().getABI() == "avx"; }
-
 public:
   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
     : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
 
+  const WinX86_64ABIInfo &getABIInfo() const {
+    return static_cast<const WinX86_64ABIInfo &>(
+        TargetCodeGenInfo::getABIInfo());
+  }
+
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
@@ -1733,7 +1752,8 @@
   }
 
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return hasAVX() ? 32 : 16;
+    auto &ABII = getABIInfo();
+    return ABII.hasAVX512() ? 64 : ABII.hasAVX() ? 32 : 16;
   }
 };
 
@@ -1922,7 +1942,8 @@
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
-    } else if (Size == 128 || (hasAVX() && isNamedArg && Size == 256)) {
+    } else if (Size == 128 || (isNamedArg && ((hasAVX() && Size == 256) ||
+                                              (hasAVX512() && Size == 512)))) {
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -1934,6 +1955,9 @@
       // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
       // registers if they are "named", i.e. not part of the "..." of a
       // variadic function.
+      //
+      // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
+      // split into eight eightbyte chunks, one SSE and seven SSEUP.
       Lo = SSE;
       Hi = SSEUp;
     }
@@ -2144,7 +2168,7 @@
 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VecTy);
-    unsigned LargestVector = hasAVX() ? 256 : 128;
+    unsigned LargestVector = hasAVX512() ? 512 : hasAVX() ? 256 : 128;
     if (Size <= 64 || Size > LargestVector)
       return true;
   }
Index: test/CodeGen/x86_64-arguments.c
===================================================================
--- test/CodeGen/x86_64-arguments.c
+++ test/CodeGen/x86_64-arguments.c
@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
 #include <stdarg.h>
 
 // CHECK-LABEL: define signext i8 @f0()
@@ -458,3 +460,77 @@
 }
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+
+typedef float __m512 __attribute__ ((__vector_size__ (64)));
+typedef struct {
+  __m512 m;
+} s512;
+
+s512 x55;
+__m512 x56;
+
+// Even on AVX512, aggregates of size larger than four eightbytes have class
+// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
+//
+// CHECK: declare void @f55(%struct.s512* byval align 64)
+void f55(s512 x);
+
+// However, __m512 has type SSE/SSEUP on AVX512.
+//
+// AVX512: declare void @f56(<16 x float>)
+// NO-AVX512: declare void @f56(<16 x float>* byval align 64)
+void f56(__m512 x);
+void f57() { f55(x55); f56(x56); }
+
+// Like for __m128 on AVX, check that the struct below is passed
+// in the same way regardless of AVX512 being used.
+//
+// CHECK: declare void @f58(%struct.t256* byval align 32)
+typedef struct t256 {
+  __m256 m;
+  __m256 n;
+} two256;
+
+extern void f58(two256 s);
+void f59(two256 s) {
+  f58(s);
+}
+
+// CHECK: declare void @f60(%struct.sat256* byval align 32)
+typedef struct at256 {
+  __m256 array[2];
+} Atwo256;
+typedef struct sat256 {
+  Atwo256 x;
+} SAtwo256;
+
+extern void f60(SAtwo256 s);
+void f61(SAtwo256 s) {
+  f60(s);
+}
+
+// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+void f62_helper(int, ...);
+__m512 x62;
+void f62() {
+  f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+
+// Like for __m256 on AVX, we always pass __m512 in memory, and don't
+// need to use the register save area.
+//
+// AVX512-LABEL: define void @f63
+// AVX512-NOT: br i1
+// AVX512: ret void
+void f63(__m512 *m, __builtin_va_list argList) {
+  *m = __builtin_va_arg(argList, __m512);
+}
+
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+void f64_helper(__m512, ...);
+__m512 x64;
+void f64() {
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
Index: test/OpenMP/simd_metadata.c
===================================================================
--- test/OpenMP/simd_metadata.c
+++ test/OpenMP/simd_metadata.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
+// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
 
@@ -16,6 +17,7 @@
 
 // X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
 // PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 
@@ -25,6 +27,7 @@
 
 // X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
 // X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
 // PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
 // PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [PATCH] [CodeGen] Teach X86_64ABIInfo about AVX512.

Reply via email to