This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0a0bae1e9f94: [CUDA] plumb through new sm_90-specific 
builtins. (authored by tra).

Changed prior to commit:
  https://reviews.llvm.org/D151168?vs=524516&id=525737#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151168/new/

https://reviews.llvm.org/D151168

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenCUDA/builtins-sm90.cu

Index: clang/test/CodeGenCUDA/builtins-sm90.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/builtins-sm90.cu
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx78" "-target-cpu" "sm_90" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+// CHECK: define{{.*}} void @_Z6kernelPlPvj(
+__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) {
+  int i = 0;
+  // CHECK: call i1 @llvm.nvvm.isspacep.shared.cluster
+  out[i++] = __nvvm_isspacep_shared_cluster(ptr);
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w()
+  out[i++] = __nvvm_read_ptx_sreg_clusterid_w();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w()
+  out[i++] = __nvvm_read_ptx_sreg_nclusterid_w();
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.w()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_w();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_x();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_y();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_z();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.w()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_w();
+
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_ctarank();
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank()
+  out[i++] = __nvvm_read_ptx_sreg_cluster_nctarank();
+  // CHECK: call i1 @llvm.nvvm.is_explicit_cluster()
+  out[i++] = __nvvm_is_explicit_cluster();
+
+  auto * sptr = (__attribute__((address_space(3))) void *)ptr;
+  // CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
+  out[i++] = (long) __nvvm_mapa(ptr, u);
+  // CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+  out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u);
+  // CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}})
+  out[i++] = __nvvm_getctarank(ptr);
+  // CHECK: call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) {{.*}})
+  out[i++] = __nvvm_getctarank_shared_cluster(sptr);
+
+  // CHECK: ret void
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -18885,6 +18885,83 @@
     return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
                        Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
                        16);
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
+  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
+  case NVPTX::BI__nvvm_is_explicit_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
+  case NVPTX::BI__nvvm_isspacep_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
+        EmitScalarExpr(E->getArg(0)));
+  case NVPTX::BI__nvvm_mapa:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_mapa),
+        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
+  case NVPTX::BI__nvvm_mapa_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
+        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
+  case NVPTX::BI__nvvm_getctarank:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
+        EmitScalarExpr(E->getArg(0)));
+  case NVPTX::BI__nvvm_getctarank_shared_cluster:
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
+        EmitScalarExpr(E->getArg(0)));
   default:
     return nullptr;
   }
Index: clang/include/clang/Basic/BuiltinsNVPTX.def
===================================================================
--- clang/include/clang/Basic/BuiltinsNVPTX.def
+++ clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -99,6 +99,31 @@
 BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
 
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
+
+TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
+
 BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
 BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
@@ -865,6 +890,7 @@
 BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
+TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
 
 // Builtins to support WMMA instructions on sm_70
 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
@@ -988,6 +1014,11 @@
 TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
 TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
 
+TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
+TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #pragma pop_macro("AND")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D151168: [CUDA] plu... Artem Belevich via Phabricator via cfe-commits
    • [PATCH] D151168: [CUDA... Artem Belevich via Phabricator via cfe-commits

Reply via email to