https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/185171

>From e070a7ffdffcdfeb58cfb74243c91a2e00db0847 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Fri, 6 Mar 2026 17:55:54 +0100
Subject: [PATCH 1/6] libclc: Avoid duplicated get_local_size/get_global_size
 functions

Move opencl handling on top of clc into opencl generic, delete
amdgpu implementations in opencl.
---
 libclc/opencl/lib/amdgcn-amdhsa/SOURCES       |  2 --
 .../amdgcn-amdhsa/workitem/get_global_size.cl | 20 --------------
 .../amdgcn-amdhsa/workitem/get_local_size.cl  | 26 -------------------
 libclc/opencl/lib/amdgcn/SOURCES              |  2 --
 .../lib/amdgcn/workitem/get_global_size.cl    | 13 ----------
 .../lib/amdgcn/workitem/get_local_size.cl     | 22 ----------------
 libclc/opencl/lib/generic/SOURCES             |  1 +
 .../workitem/get_local_size.cl                |  0
 libclc/opencl/lib/ptx-nvidiacl/SOURCES        |  1 -
 9 files changed, 1 insertion(+), 86 deletions(-)
 delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/SOURCES
 delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
 delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
 delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_global_size.cl
 delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_local_size.cl
 rename libclc/opencl/lib/{ptx-nvidiacl => generic}/workitem/get_local_size.cl 
(100%)

diff --git a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES 
b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES
deleted file mode 100644
index ee3a48ce2c474..0000000000000
--- a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES
+++ /dev/null
@@ -1,2 +0,0 @@
-workitem/get_global_size.cl
-workitem/get_local_size.cl
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl 
b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
deleted file mode 100644
index f21a060849dbe..0000000000000
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl
+++ /dev/null
@@ -1,20 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <amdhsa_abi.h>
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
-  if (dim > 2)
-    return 1;
-  __constant amdhsa_implicit_kernarg_v5 *args =
-      (__constant amdhsa_implicit_kernarg_v5 *)
-          __builtin_amdgcn_implicitarg_ptr();
-  return args->block_count[dim] * (uint)args->group_size[dim] +
-         (uint)args->remainder[dim];
-}
diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl 
b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
deleted file mode 100644
index ed1e17776361e..0000000000000
--- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <amdhsa_abi.h>
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
-  if (dim > 2)
-    return 1;
-
-  __constant amdhsa_implicit_kernarg_v5 *args =
-      (__constant amdhsa_implicit_kernarg_v5 *)
-          __builtin_amdgcn_implicitarg_ptr();
-
-  uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(),
-                       __builtin_amdgcn_workgroup_id_y(),
-                       __builtin_amdgcn_workgroup_id_z()};
-
-  return group_ids[dim] < args->block_count[dim] ? 
(size_t)args->group_size[dim]
-                                                 : 
(size_t)args->remainder[dim];
-}
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index ac72d8a00c9d0..e52f54789bfab 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -3,7 +3,5 @@ subgroup/subgroup.cl
 synchronization/sub_group_barrier.cl
 workitem/get_global_offset.cl
 workitem/get_group_id.cl
-workitem/get_global_size.cl
 workitem/get_local_id.cl
-workitem/get_local_size.cl
 workitem/get_work_dim.cl
diff --git a/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl 
b/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl
deleted file mode 100644
index eca7199a766fc..0000000000000
--- a/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl
+++ /dev/null
@@ -1,13 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/workitem/clc_get_global_size.h>
-
-_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
-  return __clc_get_global_size(dim);
-}
diff --git a/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl 
b/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl
deleted file mode 100644
index 34e4f2f1b4c19..0000000000000
--- a/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl
+++ /dev/null
@@ -1,22 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/opencl/opencl-base.h>
-
-_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
-  switch (dim) {
-  case 0:
-    return __builtin_amdgcn_workgroup_size_x();
-  case 1:
-    return __builtin_amdgcn_workgroup_size_y();
-  case 2:
-    return __builtin_amdgcn_workgroup_size_z();
-  default:
-    return 1;
-  }
-}
diff --git a/libclc/opencl/lib/generic/SOURCES 
b/libclc/opencl/lib/generic/SOURCES
index cdc1d8321dfeb..18dd6fd3a10c2 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -207,4 +207,5 @@ workitem/get_global_id.cl
 workitem/get_global_linear_id.cl
 workitem/get_global_size.cl
 workitem/get_local_linear_id.cl
+workitem/get_local_size.cl
 workitem/get_num_groups.cl
diff --git a/libclc/opencl/lib/ptx-nvidiacl/workitem/get_local_size.cl 
b/libclc/opencl/lib/generic/workitem/get_local_size.cl
similarity index 100%
rename from libclc/opencl/lib/ptx-nvidiacl/workitem/get_local_size.cl
rename to libclc/opencl/lib/generic/workitem/get_local_size.cl
diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES 
b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
index b8e8f64b5802a..3ece564c9760e 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES
@@ -3,7 +3,6 @@ workitem/get_global_id.cl
 workitem/get_group_id.cl
 workitem/get_local_id.cl
 workitem/get_local_linear_id.cl
-workitem/get_local_size.cl
 workitem/get_max_sub_group_size.cl
 workitem/get_num_sub_groups.cl
 workitem/get_sub_group_id.cl

>From dd9f0bf312876bd9c3b766e75c1b044fd55e0efa Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sat, 7 Mar 2026 10:53:01 +0100
Subject: [PATCH 2/6] Use v5 implementation

---
 .../lib/amdgcn/workitem/clc_get_global_size.cl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl 
b/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl
index b1d8f27dc68c8..1886ab6d1a1a0 100644
--- a/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl
@@ -6,17 +6,15 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/workitem/clc_get_global_size.h>
+#include "clc/workitem/clc_get_global_size.h"
+#include <amdhsa_abi.h>
 
 _CLC_DEF _CLC_OVERLOAD size_t __clc_get_global_size(uint dim) {
-  switch (dim) {
-  case 0:
-    return __builtin_amdgcn_grid_size_x();
-  case 1:
-    return __builtin_amdgcn_grid_size_y();
-  case 2:
-    return __builtin_amdgcn_grid_size_z();
-  default:
+  if (dim > 2)
     return 1;
-  }
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+  return args->block_count[dim] * (uint)args->group_size[dim] +
+         (uint)args->remainder[dim];
 }

>From 62fe685987a904b94f282d55cc501ec7fe2f2d8e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sat, 7 Mar 2026 10:56:51 +0100
Subject: [PATCH 3/6] Use __clc_get_global_size

---
 libclc/opencl/lib/generic/workitem/get_global_size.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libclc/opencl/lib/generic/workitem/get_global_size.cl 
b/libclc/opencl/lib/generic/workitem/get_global_size.cl
index 34d00f8fa809b..2fe343ca48c41 100644
--- a/libclc/opencl/lib/generic/workitem/get_global_size.cl
+++ b/libclc/opencl/lib/generic/workitem/get_global_size.cl
@@ -6,8 +6,8 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/opencl/opencl-base.h>
+#include "clc/workitem/clc_get_global_size.h"
 
 _CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
-  return get_num_groups(dim) * get_local_size(dim);
+  return __clc_get_global_size(dim);
 }

>From 7a34a3a943c5064165b96004b5d410936da74f15 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sat, 7 Mar 2026 11:01:58 +0100
Subject: [PATCH 4/6] Fix ptx build

---
 libclc/clc/lib/ptx-nvidiacl/SOURCES               |  1 +
 .../ptx-nvidiacl/workitem/clc_get_global_size.cl  | 15 +++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl

diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES 
b/libclc/clc/lib/ptx-nvidiacl/SOURCES
index cafd90943f22e..9ed25c71a3f35 100644
--- a/libclc/clc/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES
@@ -5,6 +5,7 @@ math/clc_sqrt.cl
 mem_fence/clc_mem_fence.cl
 synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_id.cl
+workitem/clc_get_global_size.cl
 workitem/clc_get_group_id.cl
 workitem/clc_get_local_id.cl
 workitem/clc_get_local_size.cl
diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl 
b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl
new file mode 100644
index 0000000000000..262fd3b1b43a3
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/workitem/clc_get_global_size.h"
+#include "clc/workitem/clc_get_local_size.h"
+#include "clc/workitem/clc_get_num_groups.h"
+
+_CLC_DEF _CLC_OVERLOAD size_t __clc_get_global_size(uint dim) {
+  return __clc_get_num_groups(dim) * __clc_get_local_size(dim);
+}

>From faee8e53f24de576b881dcced8f4e4a07726f4ba Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sat, 7 Mar 2026 11:26:59 +0100
Subject: [PATCH 5/6] Move get_local_size implementation

---
 .../lib/amdgcn/workitem/clc_get_local_size.cl | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl 
b/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl
index 1e749404168d8..57d801a2d02d4 100644
--- a/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl
@@ -6,17 +6,21 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/workitem/clc_get_local_size.h>
+#include "clc/workitem/clc_get_local_size.h"
+#include <amdhsa_abi.h>
 
 _CLC_OVERLOAD _CLC_DEF size_t __clc_get_local_size(uint dim) {
-  switch (dim) {
-  case 0:
-    return __builtin_amdgcn_workgroup_size_x();
-  case 1:
-    return __builtin_amdgcn_workgroup_size_y();
-  case 2:
-    return __builtin_amdgcn_workgroup_size_z();
-  default:
+  if (dim > 2)
     return 1;
-  }
+
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+
+  uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(),
+                       __builtin_amdgcn_workgroup_id_y(),
+                       __builtin_amdgcn_workgroup_id_z()};
+
+  return group_ids[dim] < args->block_count[dim] ? 
(size_t)args->group_size[dim]
+                                                 : 
(size_t)args->remainder[dim];
 }

>From a2c7b88d3adee8c1403329c13ab3e9718d168285 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Sat, 7 Mar 2026 10:39:39 +0100
Subject: [PATCH 6/6] libclc: Fix amdgpu get_enqueued_local_size

This should not be the same as get_local_size
---
 .../clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl 
b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl
index c7226241694b3..102294e6dbb25 100644
--- a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl
@@ -7,8 +7,11 @@
 
//===----------------------------------------------------------------------===//
 
 #include "clc/workitem/clc_get_enqueued_local_size.h"
-#include "clc/workitem/clc_get_local_size.h"
+#include <amdhsa_abi.h>
 
 _CLC_OVERLOAD _CLC_DEF size_t __clc_get_enqueued_local_size(uint dim) {
-  return __clc_get_local_size(dim);
+  __constant amdhsa_implicit_kernarg_v5 *args =
+      (__constant amdhsa_implicit_kernarg_v5 *)
+          __builtin_amdgcn_implicitarg_ptr();
+  return dim < 3 ? args->group_size[dim] : 1;
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to