https://github.com/igorban-intel created 
https://github.com/llvm/llvm-project/pull/199258

Add cl_intel_subgroup_buffer_prefetch and cl_intel_subgroup_local_block_io
declarations to OpenCLBuiltins.td and cover them with header-free SPIR tests.

This keeps the generated OpenCL builtins in sync with opencl-c.h for the
Intel subgroup buffer prefetch and local block I/O extensions.

Per the cl_intel_subgroup_local_block_io specification, the _ui local
aliases (intel_sub_group_block_read_ui*, intel_sub_group_block_write_ui*
with __local pointer) are declared under FuncExtIntelSubgroupLocalBlockIO
alone, without a char/short/long prerequisite.  A dedicated test
(intel-subgroup-local-block-io-ui-without-char-short-long.cl) verifies that
they resolve when only cl_intel_subgroup_local_block_io is active.

Specification:
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_local_block_io.html

Co-authored-by: Copilot

>From d5f6c96c56abb98a0fa6d9a34d1c2547f4e095cd Mon Sep 17 00:00:00 2001
From: "Gorban, Igor" <[email protected]>
Date: Fri, 22 May 2026 20:10:18 +0200
Subject: [PATCH] [OpenCL] Add Intel subgroup buffer prefetch and local block
 I/O builtins

Add cl_intel_subgroup_buffer_prefetch and cl_intel_subgroup_local_block_io
declarations to OpenCLBuiltins.td and cover them with header-free SPIR tests.

This keeps the generated OpenCL builtins in sync with opencl-c.h for the
Intel subgroup buffer prefetch and local block I/O extensions.

Per the cl_intel_subgroup_local_block_io specification, the _ui local
aliases (intel_sub_group_block_read_ui*, intel_sub_group_block_write_ui*
with __local pointer) are declared under FuncExtIntelSubgroupLocalBlockIO
alone, without a char/short/long prerequisite.  A dedicated test
(intel-subgroup-local-block-io-ui-without-char-short-long.cl) verifies that
they resolve when only cl_intel_subgroup_local_block_io is active.

Specification:
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_local_block_io.html

Co-authored-by: Copilot
---
 clang/lib/Sema/OpenCLBuiltins.td              | 101 +++++++++++
 ...intel-subgroup-buffer-prefetch-builtins.cl |  47 +++++
 .../intel-subgroup-local-block-io-builtins.cl | 165 ++++++++++++++++++
 ...cal-block-io-ui-without-char-short-long.cl |  40 +++++
 4 files changed, 353 insertions(+)
 create mode 100644 
clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl
 create mode 100644 
clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl
 create mode 100644 
clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl

diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 356e54c09d02f..ec8be2722a833 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -1900,10 +1900,19 @@ def FuncExtIntelSubgroups      : 
FunctionExtension<"cl_intel_subgroups">;
 def FuncExtIntelSubgroupsShort : FunctionExtension<"cl_intel_subgroups_short">;
 def FuncExtIntelSubgroupsChar  : FunctionExtension<"cl_intel_subgroups_char">;
 def FuncExtIntelSubgroupsLong  : FunctionExtension<"cl_intel_subgroups_long">;
+def FuncExtIntelSubgroupBufferPrefetch : 
FunctionExtension<"cl_intel_subgroup_buffer_prefetch">;
+def FuncExtIntelSubgroupLocalBlockIO : 
FunctionExtension<"cl_intel_subgroup_local_block_io">;
 def FuncExtIntelSubgroupsRWImages : FunctionExtension<"cl_intel_subgroups 
__opencl_c_read_write_images">;
 def FuncExtIntelSubgroupsShortRWImages : 
FunctionExtension<"cl_intel_subgroups_short __opencl_c_read_write_images">;
 def FuncExtIntelSubgroupsCharRWImages : 
FunctionExtension<"cl_intel_subgroups_char __opencl_c_read_write_images">;
 def FuncExtIntelSubgroupsLongRWImages : 
FunctionExtension<"cl_intel_subgroups_long __opencl_c_read_write_images">;
+def FuncExtIntelSubgroupsPrefetch : FunctionExtension<"cl_intel_subgroups 
cl_intel_subgroup_buffer_prefetch">;
+def FuncExtIntelSubgroupsShortPrefetch : 
FunctionExtension<"cl_intel_subgroups_short cl_intel_subgroup_buffer_prefetch">;
+def FuncExtIntelSubgroupsCharPrefetch : 
FunctionExtension<"cl_intel_subgroups_char cl_intel_subgroup_buffer_prefetch">;
+def FuncExtIntelSubgroupsLongPrefetch : 
FunctionExtension<"cl_intel_subgroups_long cl_intel_subgroup_buffer_prefetch">;
+def FuncExtIntelSubgroupsShortLocalBlockIO : 
FunctionExtension<"cl_intel_subgroups_short cl_intel_subgroup_local_block_io">;
+def FuncExtIntelSubgroupsCharLocalBlockIO : 
FunctionExtension<"cl_intel_subgroups_char cl_intel_subgroup_local_block_io">;
+def FuncExtIntelSubgroupsLongLocalBlockIO : 
FunctionExtension<"cl_intel_subgroups_long cl_intel_subgroup_local_block_io">;
 
 // cl_intel_subgroups - shuffle functions
 // intel_sub_group_shuffle(T, uint) for float/int/uint vectors, half/double
@@ -2167,6 +2176,98 @@ let Extension = FuncExtIntelSubgroupsLongRWImages in {
   def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>;
 }
 
+let Extension = FuncExtIntelSubgroupsPrefetch in {
+  def : Builtin<"intel_sub_group_block_prefetch_ui", [Void, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ui2", [Void, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ui4", [Void, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ui8", [Void, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsShortPrefetch in {
+  def : Builtin<"intel_sub_group_block_prefetch_us", [Void, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_us2", [Void, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_us4", [Void, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_us8", [Void, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_us16", [Void, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsCharPrefetch in {
+  def : Builtin<"intel_sub_group_block_prefetch_uc", [Void, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_uc2", [Void, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_uc4", [Void, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_uc8", [Void, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_uc16", [Void, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsLongPrefetch in {
+  def : Builtin<"intel_sub_group_block_prefetch_ul", [Void, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ul2", [Void, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ul4", [Void, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_prefetch_ul8", [Void, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupLocalBlockIO in {
+  def : Builtin<"intel_sub_group_block_read", [UInt, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write", [Void, PointerType<UInt, 
LocalAS>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write2", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write4", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write8", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 8>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_ui", [UInt, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, 
PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ui", [Void, PointerType<UInt, 
LocalAS>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui2", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui4", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui8", [Void, PointerType<UInt, 
LocalAS>, VectorType<UInt, 8>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsCharLocalBlockIO in {
+  def : Builtin<"intel_sub_group_block_read_uc", [UChar, 
PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, 
PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, 
PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, 
PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, 
PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_uc", [Void, PointerType<UChar, 
LocalAS>, UChar], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc2", [Void, PointerType<UChar, 
LocalAS>, VectorType<UChar, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc4", [Void, PointerType<UChar, 
LocalAS>, VectorType<UChar, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc8", [Void, PointerType<UChar, 
LocalAS>, VectorType<UChar, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc16", [Void, PointerType<UChar, 
LocalAS>, VectorType<UChar, 16>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsShortLocalBlockIO in {
+  def : Builtin<"intel_sub_group_block_read_us", [UShort, 
PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, 
PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, 
PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, 
PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, 
PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_us", [Void, PointerType<UShort, 
LocalAS>, UShort], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us2", [Void, PointerType<UShort, 
LocalAS>, VectorType<UShort, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us4", [Void, PointerType<UShort, 
LocalAS>, VectorType<UShort, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us8", [Void, PointerType<UShort, 
LocalAS>, VectorType<UShort, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us16", [Void, PointerType<UShort, 
LocalAS>, VectorType<UShort, 16>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsLongLocalBlockIO in {
+  def : Builtin<"intel_sub_group_block_read_ul", [ULong, 
PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, 
PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, 
PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, 
PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ul", [Void, PointerType<ULong, 
LocalAS>, ULong], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul2", [Void, PointerType<ULong, 
LocalAS>, VectorType<ULong, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul4", [Void, PointerType<ULong, 
LocalAS>, VectorType<ULong, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul8", [Void, PointerType<ULong, 
LocalAS>, VectorType<ULong, 8>], Attr.Convergent>;
+}
+
 //--------------------------------------------------------------------
 // Arm extensions.
 let Extension = ArmIntegerDotProductInt8 in {
diff --git a/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl
new file mode 100644
index 0000000000000..5a1244fc76511
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+typedef unsigned long ulong;
+
+void test_block_prefetch_ui(const __global uint *in) {
+  intel_sub_group_block_prefetch_ui(in);
+  intel_sub_group_block_prefetch_ui2(in);
+  intel_sub_group_block_prefetch_ui4(in);
+  intel_sub_group_block_prefetch_ui8(in);
+}
+
+void test_block_prefetch_us(const __global ushort *in) {
+  intel_sub_group_block_prefetch_us(in);
+  intel_sub_group_block_prefetch_us2(in);
+  intel_sub_group_block_prefetch_us4(in);
+  intel_sub_group_block_prefetch_us8(in);
+  intel_sub_group_block_prefetch_us16(in);
+}
+
+void test_block_prefetch_uc(const __global uchar *in) {
+  intel_sub_group_block_prefetch_uc(in);
+  intel_sub_group_block_prefetch_uc2(in);
+  intel_sub_group_block_prefetch_uc4(in);
+  intel_sub_group_block_prefetch_uc8(in);
+  intel_sub_group_block_prefetch_uc16(in);
+}
+
+void test_block_prefetch_ul(const __global ulong *in) {
+  intel_sub_group_block_prefetch_ul(in);
+  intel_sub_group_block_prefetch_ul2(in);
+  intel_sub_group_block_prefetch_ul4(in);
+  intel_sub_group_block_prefetch_ul8(in);
+}
+
+void test_block_prefetch_ui16_rejected(const __global uint *in) {
+  intel_sub_group_block_prefetch_ui16(in); // expected-error{{use of 
undeclared identifier 'intel_sub_group_block_prefetch_ui16'}}
+}
+
+void test_block_prefetch_ul16_rejected(const __global ulong *in) {
+  intel_sub_group_block_prefetch_ul16(in); // expected-error{{use of 
undeclared identifier 'intel_sub_group_block_prefetch_ul16'}}
+}
diff --git a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl
new file mode 100644
index 0000000000000..ddd35810f85f0
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl
@@ -0,0 +1,165 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned char uchar;
+typedef unsigned long ulong;
+typedef uint uint2 __attribute__((ext_vector_type(2)));
+typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef uint uint8 __attribute__((ext_vector_type(8)));
+typedef ushort ushort2 __attribute__((ext_vector_type(2)));
+typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+typedef ushort ushort8 __attribute__((ext_vector_type(8)));
+typedef ushort ushort16 __attribute__((ext_vector_type(16)));
+typedef uchar uchar2 __attribute__((ext_vector_type(2)));
+typedef uchar uchar4 __attribute__((ext_vector_type(4)));
+typedef uchar uchar8 __attribute__((ext_vector_type(8)));
+typedef uchar uchar16 __attribute__((ext_vector_type(16)));
+typedef ulong ulong2 __attribute__((ext_vector_type(2)));
+typedef ulong ulong4 __attribute__((ext_vector_type(4)));
+typedef ulong ulong8 __attribute__((ext_vector_type(8)));
+typedef ulong ulong16 __attribute__((ext_vector_type(16)));
+
+uint test_block_read_local(const __local uint *in) {
+  return intel_sub_group_block_read(in);
+}
+
+uint2 test_block_read2_local(const __local uint *in) {
+  return intel_sub_group_block_read2(in);
+}
+
+uint4 test_block_read4_local(const __local uint *in) {
+  return intel_sub_group_block_read4(in);
+}
+
+uint8 test_block_read8_local(const __local uint *in) {
+  return intel_sub_group_block_read8(in);
+}
+
+void test_block_write_local(__local uint *out, uint value, uint2 value2,
+                            uint4 value4, uint8 value8) {
+  intel_sub_group_block_write(out, value);
+  intel_sub_group_block_write2(out, value2);
+  intel_sub_group_block_write4(out, value4);
+  intel_sub_group_block_write8(out, value8);
+}
+
+uint test_block_read_ui_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui(in);
+}
+
+uint2 test_block_read_ui2_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui2(in);
+}
+
+uint4 test_block_read_ui4_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui4(in);
+}
+
+uint8 test_block_read_ui8_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui8(in);
+}
+
+void test_block_write_ui_local(__local uint *out, uint value, uint2 value2,
+                               uint4 value4, uint8 value8) {
+  intel_sub_group_block_write_ui(out, value);
+  intel_sub_group_block_write_ui2(out, value2);
+  intel_sub_group_block_write_ui4(out, value4);
+  intel_sub_group_block_write_ui8(out, value8);
+}
+
+uchar test_block_read_uc_local(const __local uchar *in) {
+  return intel_sub_group_block_read_uc(in);
+}
+
+uchar2 test_block_read_uc2_local(const __local uchar *in) {
+  return intel_sub_group_block_read_uc2(in);
+}
+
+uchar4 test_block_read_uc4_local(const __local uchar *in) {
+  return intel_sub_group_block_read_uc4(in);
+}
+
+uchar8 test_block_read_uc8_local(const __local uchar *in) {
+  return intel_sub_group_block_read_uc8(in);
+}
+
+uchar16 test_block_read_uc16_local(const __local uchar *in) {
+  return intel_sub_group_block_read_uc16(in);
+}
+
+void test_block_write_uc_local(__local uchar *out, uchar value, uchar2 value2,
+                               uchar4 value4, uchar8 value8,
+                               uchar16 value16) {
+  intel_sub_group_block_write_uc(out, value);
+  intel_sub_group_block_write_uc2(out, value2);
+  intel_sub_group_block_write_uc4(out, value4);
+  intel_sub_group_block_write_uc8(out, value8);
+  intel_sub_group_block_write_uc16(out, value16);
+}
+
+ushort test_block_read_us_local(const __local ushort *in) {
+  return intel_sub_group_block_read_us(in);
+}
+
+ushort2 test_block_read_us2_local(const __local ushort *in) {
+  return intel_sub_group_block_read_us2(in);
+}
+
+ushort4 test_block_read_us4_local(const __local ushort *in) {
+  return intel_sub_group_block_read_us4(in);
+}
+
+ushort8 test_block_read_us8_local(const __local ushort *in) {
+  return intel_sub_group_block_read_us8(in);
+}
+
+ushort16 test_block_read_us16_local(const __local ushort *in) {
+  return intel_sub_group_block_read_us16(in);
+}
+
+void test_block_write_us_local(__local ushort *out, ushort value,
+                               ushort2 value2, ushort4 value4,
+                               ushort8 value8, ushort16 value16) {
+  intel_sub_group_block_write_us(out, value);
+  intel_sub_group_block_write_us2(out, value2);
+  intel_sub_group_block_write_us4(out, value4);
+  intel_sub_group_block_write_us8(out, value8);
+  intel_sub_group_block_write_us16(out, value16);
+}
+
+ulong test_block_read_ul_local(const __local ulong *in) {
+  return intel_sub_group_block_read_ul(in);
+}
+
+ulong2 test_block_read_ul2_local(const __local ulong *in) {
+  return intel_sub_group_block_read_ul2(in);
+}
+
+ulong4 test_block_read_ul4_local(const __local ulong *in) {
+  return intel_sub_group_block_read_ul4(in);
+}
+
+ulong8 test_block_read_ul8_local(const __local ulong *in) {
+  return intel_sub_group_block_read_ul8(in);
+}
+
+void test_block_write_ul_local(__local ulong *out, ulong value,
+                               ulong2 value2, ulong4 value4, ulong8 value8) {
+  intel_sub_group_block_write_ul(out, value);
+  intel_sub_group_block_write_ul2(out, value2);
+  intel_sub_group_block_write_ul4(out, value4);
+  intel_sub_group_block_write_ul8(out, value8);
+}
+
+void test_block_read_ui16_local_rejected(const __local uint *in) {
+  intel_sub_group_block_read_ui16(in); // expected-error{{use of undeclared 
identifier 'intel_sub_group_block_read_ui16'}}
+}
+
+void test_block_read_ul16_local_rejected(const __local ulong *in) {
+  intel_sub_group_block_read_ul16(in); // expected-error{{no matching function 
for call to 'intel_sub_group_block_read_ul16'}}
+  // expected-note@-1 0+{{candidate function not viable}}
+}
diff --git 
a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl
 
b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl
new file mode 100644
index 0000000000000..93b26ff6fec2a
--- /dev/null
+++ 
b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins 
-cl-ext=+cl_intel_subgroup_local_block_io,-cl_intel_subgroups_char,-cl_intel_subgroups_short,-cl_intel_subgroups_long
 -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+//
+// Per the cl_intel_subgroup_local_block_io specification, 
intel_sub_group_block_read_ui*
+// and intel_sub_group_block_write_ui* with __local pointer are declared by
+// cl_intel_subgroup_local_block_io alone.  cl_intel_subgroups_char/short/long
+// are not required and must not gate these aliases.
+
+// expected-no-diagnostics
+
+typedef unsigned int uint;
+typedef uint uint2 __attribute__((ext_vector_type(2)));
+typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef uint uint8 __attribute__((ext_vector_type(8)));
+
+uint test_block_read_ui_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui(in);
+}
+
+uint2 test_block_read_ui2_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui2(in);
+}
+
+uint4 test_block_read_ui4_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui4(in);
+}
+
+uint8 test_block_read_ui8_local(const __local uint *in) {
+  return intel_sub_group_block_read_ui8(in);
+}
+
+void test_block_write_ui_local(__local uint *out, uint value, uint2 value2,
+                               uint4 value4, uint8 value8) {
+  intel_sub_group_block_write_ui(out, value);
+  intel_sub_group_block_write_ui2(out, value2);
+  intel_sub_group_block_write_ui4(out, value4);
+  intel_sub_group_block_write_ui8(out, value8);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to