[PATCH] D40872: [NVPTX, CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang.

2017-12-06 Thread Artem Belevich via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL319909: [NVPTX,CUDA] Added llvm.nvvm.fns intrinsic and 
matching __nvvm_fns builtin in… (authored by tra).

Changed prior to commit:
  https://reviews.llvm.org/D40872?vs=125649=125757#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D40872

Files:
  cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def
  cfe/trunk/lib/Headers/__clang_cuda_intrinsics.h
  llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
  llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
  llvm/trunk/test/CodeGen/NVPTX/fns.ll

Index: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
===
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
@@ -682,6 +682,11 @@
   def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
   Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
 
+// FNS
+
+  def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+[IntrNoMem]>;
 
 // Atomics not available as llvm intrinsics.
   def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
Index: llvm/trunk/test/CodeGen/NVPTX/fns.ll
===
--- llvm/trunk/test/CodeGen/NVPTX/fns.ll
+++ llvm/trunk/test/CodeGen/NVPTX/fns.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
+
+declare i32 @llvm.nvvm.fns(i32, i32, i32)
+
+; CHECK-LABEL: .func{{.*}}fns
+define i32 @fns(i32 %mask, i32 %base, i32 %offset) {
+  ; CHECK: ld.param.u32 	[[MASK:%r[0-9]+]], [fns_param_0];
+  ; CHECK: ld.param.u32 	[[BASE:%r[0-9]+]], [fns_param_1];
+  ; CHECK: ld.param.u32 	[[OFFSET:%r[0-9]+]], [fns_param_2];
+
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]];
+  %r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], 0;
+  %r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0);
+  %r01 = add i32 %r0, %r1;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]];
+  %r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, 0;
+  %r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0);
+  %r23 = add i32 %r2, %r3;
+  %r0123 = add i32 %r01, %r23;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]];
+  %r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], 0;
+  %r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0);
+  %r45 = add i32 %r4, %r5;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, [[OFFSET]];
+  %r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, 0;
+  %r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0);
+  %r67 = add i32 %r6, %r7;
+  %r4567 = add i32 %r45, %r67;
+  %r = add i32 %r0123, %r4567;
+  ret i32 %r;
+}
+
Index: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
===
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -979,6 +979,33 @@
 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
   Float64Regs, int_nvvm_bitcast_d2ll>;
 
+//
+// FNS
+//
+
+class INT_FNS_MBO
+  : NVPTXInst<(outs Int32Regs:$dst), ins,
+   "fns.b32 \t$dst, $mask, $base, $offset;",
+   [(set Int32Regs:$dst, Operands )]>,
+Requires<[hasPTX60, hasSM30]>;
+
+def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
+ (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,i32imm:$offset),
+ (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,   imm:$offset)>;
+def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,i32imm:$base, Int32Regs:$offset),
+ (int_nvvm_fns Int32Regs:$mask,   imm:$base, Int32Regs:$offset)>;
+def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,i32imm:$base,i32imm:$offset),
+ (int_nvvm_fns Int32Regs:$mask,   imm:$base,   imm:$offset)>;
+def INT_FNS_irr : INT_FNS_MBO<(insi32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
+ (int_nvvm_fns   imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_iri : INT_FNS_MBO<(insi32imm:$mask, Int32Regs:$base,i32imm:$offset),
+ (int_nvvm_fns   imm:$mask, Int32Regs:$base,   imm:$offset)>;
+def INT_FNS_iir : INT_FNS_MBO<(insi32imm:$mask,i32imm:$base, Int32Regs:$offset),
+ (int_nvvm_fns   imm:$mask,   imm:$base, Int32Regs:$offset)>;
+def INT_FNS_iii : INT_FNS_MBO<(insi32imm:$mask,i32imm:$base,i32imm:$offset),
+ (int_nvvm_fns   

[PATCH] D40872: [NVPTX, CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang.

2017-12-05 Thread Artem Belevich via Phabricator via cfe-commits
tra created this revision.
Herald added subscribers: hiraditya, sanjoy, jholewinski.

https://reviews.llvm.org/D40872

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/Headers/__clang_cuda_intrinsics.h
  llvm/include/llvm/IR/IntrinsicsNVVM.td
  llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
  llvm/test/CodeGen/NVPTX/fns.ll

Index: llvm/test/CodeGen/NVPTX/fns.ll
===
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/fns.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
+
+declare i32 @llvm.nvvm.fns(i32, i32, i32)
+
+; CHECK-LABEL: .func{{.*}}fns
+define i32 @fns(i32 %mask, i32 %base, i32 %offset) {
+  ; CHECK: ld.param.u32 	[[MASK:%r[0-9]+]], [fns_param_0];
+  ; CHECK: ld.param.u32 	[[BASE:%r[0-9]+]], [fns_param_1];
+  ; CHECK: ld.param.u32 	[[OFFSET:%r[0-9]+]], [fns_param_2];
+
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]];
+  %r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], 0;
+  %r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0);
+  %r01 = add i32 %r0, %r1;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]];
+  %r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, 0;
+  %r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0);
+  %r23 = add i32 %r2, %r3;
+  %r0123 = add i32 %r01, %r23;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]];
+  %r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], 0;
+  %r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0);
+  %r45 = add i32 %r4, %r5;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, [[OFFSET]];
+  %r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, 0;
+  %r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0);
+  %r67 = add i32 %r6, %r7;
+  %r4567 = add i32 %r45, %r67;
+  %r = add i32 %r0123, %r4567;
+  ret i32 %r;
+}
+
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -979,6 +979,33 @@
 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
   Float64Regs, int_nvvm_bitcast_d2ll>;
 
+//
+// FNS
+//
+
+class INT_FNS_MBO
+  : NVPTXInst<(outs Int32Regs:$dst), ins,
+   "fns.b32 \t$dst, $mask, $base, $offset;",
+   [(set Int32Regs:$dst, Operands )]>,
+Requires<[hasPTX60, hasSM30]>;
+
+def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
+ (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,i32imm:$offset),
+ (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,   imm:$offset)>;
+def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,i32imm:$base, Int32Regs:$offset),
+ (int_nvvm_fns Int32Regs:$mask,   imm:$base, Int32Regs:$offset)>;
+def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,i32imm:$base,i32imm:$offset),
+ (int_nvvm_fns Int32Regs:$mask,   imm:$base,   imm:$offset)>;
+def INT_FNS_irr : INT_FNS_MBO<(insi32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
+ (int_nvvm_fns   imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_iri : INT_FNS_MBO<(insi32imm:$mask, Int32Regs:$base,i32imm:$offset),
+ (int_nvvm_fns   imm:$mask, Int32Regs:$base,   imm:$offset)>;
+def INT_FNS_iir : INT_FNS_MBO<(insi32imm:$mask,i32imm:$base, Int32Regs:$offset),
+ (int_nvvm_fns   imm:$mask,   imm:$base, Int32Regs:$offset)>;
+def INT_FNS_iii : INT_FNS_MBO<(insi32imm:$mask,i32imm:$base,i32imm:$offset),
+ (int_nvvm_fns   imm:$mask,   imm:$base,   imm:$offset)>;
+
 //---
 // Atomic Functions
 //---
Index: llvm/include/llvm/IR/IntrinsicsNVVM.td
===
--- llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -682,6 +682,11 @@
   def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
   Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
 
+// FNS
+
+  def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+[IntrNoMem]>;
 
 // Atomics not available as llvm intrinsics.
   def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++