[clang] 987aa34 - Corrected clang amdgpu-features.cl test for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins)

2020-01-28 Thread Konstantin Pyzhov via cfe-commits

Author: Konstantin Pyzhov
Date: 2020-01-28T05:41:42-05:00
New Revision: 987aa3435f4517d663f776e261683b1620eb8101

URL: 
https://github.com/llvm/llvm-project/commit/987aa3435f4517d663f776e261683b1620eb8101
DIFF: 
https://github.com/llvm/llvm-project/commit/987aa3435f4517d663f776e261683b1620eb8101.diff

LOG: Corrected clang amdgpu-features.cl test for 
6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins)

Differential Revision: https://reviews.llvm.org/D72723

Added: 


Modified: 
clang/test/CodeGenOpenCL/amdgpu-features.cl

Removed: 




diff  --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl 
b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index f3ed26494831..d8eb2d26b0e3 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -16,7 +16,7 @@
 
 // GFX904: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
 // GFX906: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX908: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
+// GFX908: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals"
 // GFX1010: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
 // GFX1011: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
 // GFX1012: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] ac9b2a6 - Add missing clang tests for 6d614a82a4230ea69e322f56dc18dcbd815ed37b (AMDGPU MFMA built-ins)

2020-01-28 Thread Konstantin Pyzhov via cfe-commits

Author: Konstantin Pyzhov
Date: 2020-01-28T04:41:21-05:00
New Revision: ac9b2a6297420a461f7b9db9e2dbd67f5f07f301

URL: 
https://github.com/llvm/llvm-project/commit/ac9b2a6297420a461f7b9db9e2dbd67f5f07f301
DIFF: 
https://github.com/llvm/llvm-project/commit/ac9b2a6297420a461f7b9db9e2dbd67f5f07f301.diff

LOG: Add missing clang tests for 6d614a82a4230ea69e322f56dc18dcbd815ed37b 
(AMDGPU MFMA built-ins)

Differential Revision: https://reviews.llvm.org/D72723

Added: 


Modified: 
clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl

Removed: 




diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index e69de29bb2d1..c7db942b871a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -0,0 +1,161 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S 
-emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef float  v4f   __attribute__((ext_vector_type(4)));
+typedef float  v16f  __attribute__((ext_vector_type(16)));
+typedef float  v32f  __attribute__((ext_vector_type(32)));
+typedef half   v4h   __attribute__((ext_vector_type(4)));
+typedef half   v16h  __attribute__((ext_vector_type(16)));
+typedef half   v32h  __attribute__((ext_vector_type(32)));
+typedef intv4i   __attribute__((ext_vector_type(4)));
+typedef intv16i  __attribute__((ext_vector_type(16)));
+typedef intv32i  __attribute__((ext_vector_type(32)));
+typedef short  v2s   __attribute__((ext_vector_type(2)));
+typedef short  v4s   __attribute__((ext_vector_type(4)));
+typedef short  v16s  __attribute__((ext_vector_type(16)));
+typedef short  v32s  __attribute__((ext_vector_type(32)));
+typedef double v4d   __attribute__((ext_vector_type(4)));
+
+
+// CHECK-LABEL: @test_mfma_f32_32x32x1f32
+// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float 
%b, <32 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_32x32x1f32(global v32f* out, float a, float b, v32f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_32x32x1f32(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_16x16x1f32
+// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %a, float 
%b, <16 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_16x16x1f32(global v16f* out, float a, float b, v16f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_16x16x1f32(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_4x4x1f32
+// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %a, float %b, 
<4 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_4x4x1f32(global v4f* out, float a, float b, v4f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_4x4x1f32(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_32x32x2f32
+// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %a, float 
%b, <16 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_32x32x2f32(global v16f* out, float a, float b, v16f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_32x32x2f32(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_16x16x4f32
+// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %a, float 
%b, <4 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_16x16x4f32(global v4f* out, float a, float b, v4f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_16x16x4f32(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_32x32x4f16
+// CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %a, <4 
x half> %b, <32 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_32x32x4f16(global v32f* out, v4h a, v4h b, v32f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_32x32x4f16(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_16x16x4f16
+// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %a, <4 
x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_16x16x4f16(global v16f* out, v4h a, v4h b, v16f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_16x16x4f16(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_4x4x4f16
+// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %a, <4 x 
half> %b, <4 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_4x4x4f16(global v4f* out, v4h a, v4h b, v4f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_4x4x4f16(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_32x32x8f16
+// CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %a, <4 
x half> %b, <16 x float> %c, i32 0, i32 0, i32 0)
+void test_mfma_f32_32x32x8f16(global v16f* out, v4h a, v4h b, v16f c)
+{
+  *out = __builtin_amdgcn_mfma_f32_32x32x8f16(a, b, c, 0, 0, 0);
+}
+
+// CHECK-LABEL: @test_mfma_f32_16x16x16f16
+// CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %a, <4 
x half> %b, <4 x float> %c, i32 0, i32 0, 

[clang] 6d614a8 - Summary:

2020-01-28 Thread Konstantin Pyzhov via cfe-commits

Author: Konstantin Pyzhov
Date: 2020-01-28T03:51:27-05:00
New Revision: 6d614a82a4230ea69e322f56dc18dcbd815ed37b

URL: 
https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b
DIFF: 
https://github.com/llvm/llvm-project/commit/6d614a82a4230ea69e322f56dc18dcbd815ed37b.diff

LOG: Summary:
This CL adds clang declarations of built-in functions for AMDGPU MFMA 
intrinsics and instructions.
OpenCL tests for new built-ins are included.

Differential Revision: https://reviews.llvm.org/D72723

Added: 
clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl

Modified: 
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/Basic/Targets/AMDGPU.cpp
llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Removed: 




diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 9b3a0f96798f..a9143ad8292c 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -212,5 +212,30 @@ BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc")
 BUILTIN(__builtin_r600_recipsqrt_ieee, "dd", "nc")
 BUILTIN(__builtin_r600_recipsqrt_ieeef, "ff", "nc")
 
+//===--===//
+// MFMA builtins.
+//===--===//
+
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x1f32, "V32fffV32fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x1f32, "V16fffV16fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x1f32, "V4fffV4fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2f32, "V16fffV16fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f32, "V4fffV4fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4f16, "V32fV4hV4hV32fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x4f16, "V16fV4hV4hV16fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x4f16, "V4fV4hV4hV4fIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x8f16, "V16fV4hV4hV16fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x16f16, "V4fV4hV4hV4fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x4i8, "V32iiiV32iIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x4i8, "V16iiiV16iIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_4x4x4i8, "V4iiiV4iIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_32x32x8i8, "V16iiiV16iIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_i32_16x16x16i8, "V4iiiV4iIiIiIi", "nc", 
"mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x2bf16, "V32fV2sV2sV32fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x2bf16, "V16fV2sV2sV16fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_4x4x2bf16, "V4fV2sV2sV4fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_32x32x4bf16, "V16fV2sV2sV16fIiIiIi", 
"nc", "mai-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x8bf16, "V4fV2sV2sV4fIiIiIi", 
"nc", "mai-insts")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN

diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 249a123ea605..0aaf6813442a 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -163,6 +163,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
   Features["dot4-insts"] = true;
   Features["dot5-insts"] = true;
   Features["dot6-insts"] = true;
+  Features["mai-insts"] = true;
   LLVM_FALLTHROUGH;
 case GK_GFX906:
   Features["dl-insts"] = true;

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
new file mode 100644
index ..e69de29bb2d1

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx908-param.cl
new file mode 100644
index ..e69de29bb2d1

diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 71cea8c1f3d5..68e8a830ecac 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1725,105 +1725,125 @@ def int_amdgcn_buffer_atomic_fadd: 
AMDGPUBufferAtomicNoRtn;
 def int_amdgcn_global_atomic_fadd: AMDGPUGlobalAtomicNoRtn;
 
 // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
-def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
-  [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
-
-def 

r364183 - [CUDA][HIP] Don't set comdat attribute for CUDA device stub functions.\nDifferential Revision: https://reviews.llvm.org/D63277

2019-06-24 Thread Konstantin Pyzhov via cfe-commits
Author: kpyzhov
Date: Mon Jun 24 07:40:20 2019
New Revision: 364183

URL: http://llvm.org/viewvc/llvm-project?rev=364183=rev
Log:
[CUDA][HIP] Don't set comdat attribute for CUDA device stub 
functions.\nDifferential Revision: https://reviews.llvm.org/D63277

Modified:
cfe/trunk/lib/CodeGen/CodeGenModule.cpp

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=364183=364182=364183=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 24 07:40:20 2019
@@ -3712,6 +3712,11 @@ static bool shouldBeInCOMDAT(CodeGenModu
   if (!CGM.supportsCOMDAT())
 return false;
 
+  // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent
+  // them being "merged" by the COMDAT Folding linker optimization.
+  if (D.hasAttr())
+return false;
+
   if (D.hasAttr())
 return true;
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits