https://github.com/igorban-intel created https://github.com/llvm/llvm-project/pull/199258
Add cl_intel_subgroup_buffer_prefetch and cl_intel_subgroup_local_block_io declarations to OpenCLBuiltins.td and cover them with header-free SPIR tests. This keeps the generated OpenCL builtins in sync with opencl-c.h for the Intel subgroup buffer prefetch and local block I/O extensions. Per the cl_intel_subgroup_local_block_io specification, the _ui local aliases (intel_sub_group_block_read_ui*, intel_sub_group_block_write_ui* with __local pointer) are declared under FuncExtIntelSubgroupLocalBlockIO alone, without a char/short/long prerequisite. A dedicated test (intel-subgroup-local-block-io-ui-without-char-short-long.cl) verifies that they resolve when only cl_intel_subgroup_local_block_io is active. Specification: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_local_block_io.html Co-authored-by: Copilot >From d5f6c96c56abb98a0fa6d9a34d1c2547f4e095cd Mon Sep 17 00:00:00 2001 From: "Gorban, Igor" <[email protected]> Date: Fri, 22 May 2026 20:10:18 +0200 Subject: [PATCH] [OpenCL] Add Intel subgroup buffer prefetch and local block I/O builtins Add cl_intel_subgroup_buffer_prefetch and cl_intel_subgroup_local_block_io declarations to OpenCLBuiltins.td and cover them with header-free SPIR tests. This keeps the generated OpenCL builtins in sync with opencl-c.h for the Intel subgroup buffer prefetch and local block I/O extensions. Per the cl_intel_subgroup_local_block_io specification, the _ui local aliases (intel_sub_group_block_read_ui*, intel_sub_group_block_write_ui* with __local pointer) are declared under FuncExtIntelSubgroupLocalBlockIO alone, without a char/short/long prerequisite. A dedicated test (intel-subgroup-local-block-io-ui-without-char-short-long.cl) verifies that they resolve when only cl_intel_subgroup_local_block_io is active. Specification: https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_local_block_io.html Co-authored-by: Copilot --- clang/lib/Sema/OpenCLBuiltins.td | 101 +++++++++++ ...intel-subgroup-buffer-prefetch-builtins.cl | 47 +++++ .../intel-subgroup-local-block-io-builtins.cl | 165 ++++++++++++++++++ ...cal-block-io-ui-without-char-short-long.cl | 40 +++++ 4 files changed, 353 insertions(+) create mode 100644 clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl create mode 100644 clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl create mode 100644 clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 356e54c09d02f..ec8be2722a833 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -1900,10 +1900,19 @@ def FuncExtIntelSubgroups : FunctionExtension<"cl_intel_subgroups">; def FuncExtIntelSubgroupsShort : FunctionExtension<"cl_intel_subgroups_short">; def FuncExtIntelSubgroupsChar : FunctionExtension<"cl_intel_subgroups_char">; def FuncExtIntelSubgroupsLong : FunctionExtension<"cl_intel_subgroups_long">; +def FuncExtIntelSubgroupBufferPrefetch : FunctionExtension<"cl_intel_subgroup_buffer_prefetch">; +def FuncExtIntelSubgroupLocalBlockIO : FunctionExtension<"cl_intel_subgroup_local_block_io">; def FuncExtIntelSubgroupsRWImages : FunctionExtension<"cl_intel_subgroups __opencl_c_read_write_images">; def FuncExtIntelSubgroupsShortRWImages : FunctionExtension<"cl_intel_subgroups_short __opencl_c_read_write_images">; def FuncExtIntelSubgroupsCharRWImages : FunctionExtension<"cl_intel_subgroups_char __opencl_c_read_write_images">; def FuncExtIntelSubgroupsLongRWImages : FunctionExtension<"cl_intel_subgroups_long __opencl_c_read_write_images">; +def FuncExtIntelSubgroupsPrefetch : FunctionExtension<"cl_intel_subgroups cl_intel_subgroup_buffer_prefetch">; +def FuncExtIntelSubgroupsShortPrefetch : FunctionExtension<"cl_intel_subgroups_short cl_intel_subgroup_buffer_prefetch">; +def FuncExtIntelSubgroupsCharPrefetch : FunctionExtension<"cl_intel_subgroups_char cl_intel_subgroup_buffer_prefetch">; +def FuncExtIntelSubgroupsLongPrefetch : FunctionExtension<"cl_intel_subgroups_long cl_intel_subgroup_buffer_prefetch">; +def FuncExtIntelSubgroupsShortLocalBlockIO : FunctionExtension<"cl_intel_subgroups_short cl_intel_subgroup_local_block_io">; +def FuncExtIntelSubgroupsCharLocalBlockIO : FunctionExtension<"cl_intel_subgroups_char cl_intel_subgroup_local_block_io">; +def FuncExtIntelSubgroupsLongLocalBlockIO : FunctionExtension<"cl_intel_subgroups_long cl_intel_subgroup_local_block_io">; // cl_intel_subgroups - shuffle functions // intel_sub_group_shuffle(T, uint) for float/int/uint vectors, half/double @@ -2167,6 +2176,98 @@ let Extension = FuncExtIntelSubgroupsLongRWImages in { def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>; } +let Extension = FuncExtIntelSubgroupsPrefetch in { + def : Builtin<"intel_sub_group_block_prefetch_ui", [Void, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ui2", [Void, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ui4", [Void, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ui8", [Void, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsShortPrefetch in { + def : Builtin<"intel_sub_group_block_prefetch_us", [Void, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_us2", [Void, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_us4", [Void, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_us8", [Void, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_us16", [Void, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsCharPrefetch in { + def : Builtin<"intel_sub_group_block_prefetch_uc", [Void, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_uc2", [Void, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_uc4", [Void, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_uc8", [Void, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_uc16", [Void, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsLongPrefetch in { + def : Builtin<"intel_sub_group_block_prefetch_ul", [Void, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ul2", [Void, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ul4", [Void, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_prefetch_ul8", [Void, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupLocalBlockIO in { + def : Builtin<"intel_sub_group_block_read", [UInt, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write", [Void, PointerType<UInt, LocalAS>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write2", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write4", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write8", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 8>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_ui", [UInt, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, PointerType<ConstType<UInt>, LocalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ui", [Void, PointerType<UInt, LocalAS>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui2", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui4", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui8", [Void, PointerType<UInt, LocalAS>, VectorType<UInt, 8>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsCharLocalBlockIO in { + def : Builtin<"intel_sub_group_block_read_uc", [UChar, PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, PointerType<ConstType<UChar>, LocalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_uc", [Void, PointerType<UChar, LocalAS>, UChar], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc2", [Void, PointerType<UChar, LocalAS>, VectorType<UChar, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc4", [Void, PointerType<UChar, LocalAS>, VectorType<UChar, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc8", [Void, PointerType<UChar, LocalAS>, VectorType<UChar, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc16", [Void, PointerType<UChar, LocalAS>, VectorType<UChar, 16>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsShortLocalBlockIO in { + def : Builtin<"intel_sub_group_block_read_us", [UShort, PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, PointerType<ConstType<UShort>, LocalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_us", [Void, PointerType<UShort, LocalAS>, UShort], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us2", [Void, PointerType<UShort, LocalAS>, VectorType<UShort, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us4", [Void, PointerType<UShort, LocalAS>, VectorType<UShort, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us8", [Void, PointerType<UShort, LocalAS>, VectorType<UShort, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us16", [Void, PointerType<UShort, LocalAS>, VectorType<UShort, 16>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsLongLocalBlockIO in { + def : Builtin<"intel_sub_group_block_read_ul", [ULong, PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, PointerType<ConstType<ULong>, LocalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ul", [Void, PointerType<ULong, LocalAS>, ULong], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul2", [Void, PointerType<ULong, LocalAS>, VectorType<ULong, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul4", [Void, PointerType<ULong, LocalAS>, VectorType<ULong, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul8", [Void, PointerType<ULong, LocalAS>, VectorType<ULong, 8>], Attr.Convergent>; +} + //-------------------------------------------------------------------- // Arm extensions. let Extension = ArmIntegerDotProductInt8 in { diff --git a/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl new file mode 100644 index 0000000000000..5a1244fc76511 --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroup-buffer-prefetch-builtins.cl @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef unsigned long ulong; + +void test_block_prefetch_ui(const __global uint *in) { + intel_sub_group_block_prefetch_ui(in); + intel_sub_group_block_prefetch_ui2(in); + intel_sub_group_block_prefetch_ui4(in); + intel_sub_group_block_prefetch_ui8(in); +} + +void test_block_prefetch_us(const __global ushort *in) { + intel_sub_group_block_prefetch_us(in); + intel_sub_group_block_prefetch_us2(in); + intel_sub_group_block_prefetch_us4(in); + intel_sub_group_block_prefetch_us8(in); + intel_sub_group_block_prefetch_us16(in); +} + +void test_block_prefetch_uc(const __global uchar *in) { + intel_sub_group_block_prefetch_uc(in); + intel_sub_group_block_prefetch_uc2(in); + intel_sub_group_block_prefetch_uc4(in); + intel_sub_group_block_prefetch_uc8(in); + intel_sub_group_block_prefetch_uc16(in); +} + +void test_block_prefetch_ul(const __global ulong *in) { + intel_sub_group_block_prefetch_ul(in); + intel_sub_group_block_prefetch_ul2(in); + intel_sub_group_block_prefetch_ul4(in); + intel_sub_group_block_prefetch_ul8(in); +} + +void test_block_prefetch_ui16_rejected(const __global uint *in) { + intel_sub_group_block_prefetch_ui16(in); // expected-error{{use of undeclared identifier 'intel_sub_group_block_prefetch_ui16'}} +} + +void test_block_prefetch_ul16_rejected(const __global ulong *in) { + intel_sub_group_block_prefetch_ul16(in); // expected-error{{use of undeclared identifier 'intel_sub_group_block_prefetch_ul16'}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl new file mode 100644 index 0000000000000..ddd35810f85f0 --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-builtins.cl @@ -0,0 +1,165 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef unsigned long ulong; +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef uint uint8 __attribute__((ext_vector_type(8))); +typedef ushort ushort2 __attribute__((ext_vector_type(2))); +typedef ushort ushort4 __attribute__((ext_vector_type(4))); +typedef ushort ushort8 __attribute__((ext_vector_type(8))); +typedef ushort ushort16 __attribute__((ext_vector_type(16))); +typedef uchar uchar2 __attribute__((ext_vector_type(2))); +typedef uchar uchar4 __attribute__((ext_vector_type(4))); +typedef uchar uchar8 __attribute__((ext_vector_type(8))); +typedef uchar uchar16 __attribute__((ext_vector_type(16))); +typedef ulong ulong2 __attribute__((ext_vector_type(2))); +typedef ulong ulong4 __attribute__((ext_vector_type(4))); +typedef ulong ulong8 __attribute__((ext_vector_type(8))); +typedef ulong ulong16 __attribute__((ext_vector_type(16))); + +uint test_block_read_local(const __local uint *in) { + return intel_sub_group_block_read(in); +} + +uint2 test_block_read2_local(const __local uint *in) { + return intel_sub_group_block_read2(in); +} + +uint4 test_block_read4_local(const __local uint *in) { + return intel_sub_group_block_read4(in); +} + +uint8 test_block_read8_local(const __local uint *in) { + return intel_sub_group_block_read8(in); +} + +void test_block_write_local(__local uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write(out, value); + intel_sub_group_block_write2(out, value2); + intel_sub_group_block_write4(out, value4); + intel_sub_group_block_write8(out, value8); +} + +uint test_block_read_ui_local(const __local uint *in) { + return intel_sub_group_block_read_ui(in); +} + +uint2 test_block_read_ui2_local(const __local uint *in) { + return intel_sub_group_block_read_ui2(in); +} + +uint4 test_block_read_ui4_local(const __local uint *in) { + return intel_sub_group_block_read_ui4(in); +} + +uint8 test_block_read_ui8_local(const __local uint *in) { + return intel_sub_group_block_read_ui8(in); +} + +void test_block_write_ui_local(__local uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write_ui(out, value); + intel_sub_group_block_write_ui2(out, value2); + intel_sub_group_block_write_ui4(out, value4); + intel_sub_group_block_write_ui8(out, value8); +} + +uchar test_block_read_uc_local(const __local uchar *in) { + return intel_sub_group_block_read_uc(in); +} + +uchar2 test_block_read_uc2_local(const __local uchar *in) { + return intel_sub_group_block_read_uc2(in); +} + +uchar4 test_block_read_uc4_local(const __local uchar *in) { + return intel_sub_group_block_read_uc4(in); +} + +uchar8 test_block_read_uc8_local(const __local uchar *in) { + return intel_sub_group_block_read_uc8(in); +} + +uchar16 test_block_read_uc16_local(const __local uchar *in) { + return intel_sub_group_block_read_uc16(in); +} + +void test_block_write_uc_local(__local uchar *out, uchar value, uchar2 value2, + uchar4 value4, uchar8 value8, + uchar16 value16) { + intel_sub_group_block_write_uc(out, value); + intel_sub_group_block_write_uc2(out, value2); + intel_sub_group_block_write_uc4(out, value4); + intel_sub_group_block_write_uc8(out, value8); + intel_sub_group_block_write_uc16(out, value16); +} + +ushort test_block_read_us_local(const __local ushort *in) { + return intel_sub_group_block_read_us(in); +} + +ushort2 test_block_read_us2_local(const __local ushort *in) { + return intel_sub_group_block_read_us2(in); +} + +ushort4 test_block_read_us4_local(const __local ushort *in) { + return intel_sub_group_block_read_us4(in); +} + +ushort8 test_block_read_us8_local(const __local ushort *in) { + return intel_sub_group_block_read_us8(in); +} + +ushort16 test_block_read_us16_local(const __local ushort *in) { + return intel_sub_group_block_read_us16(in); +} + +void test_block_write_us_local(__local ushort *out, ushort value, + ushort2 value2, ushort4 value4, + ushort8 value8, ushort16 value16) { + intel_sub_group_block_write_us(out, value); + intel_sub_group_block_write_us2(out, value2); + intel_sub_group_block_write_us4(out, value4); + intel_sub_group_block_write_us8(out, value8); + intel_sub_group_block_write_us16(out, value16); +} + +ulong test_block_read_ul_local(const __local ulong *in) { + return intel_sub_group_block_read_ul(in); +} + +ulong2 test_block_read_ul2_local(const __local ulong *in) { + return intel_sub_group_block_read_ul2(in); +} + +ulong4 test_block_read_ul4_local(const __local ulong *in) { + return intel_sub_group_block_read_ul4(in); +} + +ulong8 test_block_read_ul8_local(const __local ulong *in) { + return intel_sub_group_block_read_ul8(in); +} + +void test_block_write_ul_local(__local ulong *out, ulong value, + ulong2 value2, ulong4 value4, ulong8 value8) { + intel_sub_group_block_write_ul(out, value); + intel_sub_group_block_write_ul2(out, value2); + intel_sub_group_block_write_ul4(out, value4); + intel_sub_group_block_write_ul8(out, value8); +} + +void test_block_read_ui16_local_rejected(const __local uint *in) { + intel_sub_group_block_read_ui16(in); // expected-error{{use of undeclared identifier 'intel_sub_group_block_read_ui16'}} +} + +void test_block_read_ul16_local_rejected(const __local ulong *in) { + intel_sub_group_block_read_ul16(in); // expected-error{{no matching function for call to 'intel_sub_group_block_read_ul16'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl new file mode 100644 index 0000000000000..93b26ff6fec2a --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroup-local-block-io-ui-without-char-short-long.cl @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -cl-ext=+cl_intel_subgroup_local_block_io,-cl_intel_subgroups_char,-cl_intel_subgroups_short,-cl_intel_subgroups_long -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. +// +// Per the cl_intel_subgroup_local_block_io specification, intel_sub_group_block_read_ui* +// and intel_sub_group_block_write_ui* with __local pointer are declared by +// cl_intel_subgroup_local_block_io alone. cl_intel_subgroups_char/short/long +// are not required and must not gate these aliases. + +// expected-no-diagnostics + +typedef unsigned int uint; +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef uint uint8 __attribute__((ext_vector_type(8))); + +uint test_block_read_ui_local(const __local uint *in) { + return intel_sub_group_block_read_ui(in); +} + +uint2 test_block_read_ui2_local(const __local uint *in) { + return intel_sub_group_block_read_ui2(in); +} + +uint4 test_block_read_ui4_local(const __local uint *in) { + return intel_sub_group_block_read_ui4(in); +} + +uint8 test_block_read_ui8_local(const __local uint *in) { + return intel_sub_group_block_read_ui8(in); +} + +void test_block_write_ui_local(__local uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write_ui(out, value); + intel_sub_group_block_write_ui2(out, value2); + intel_sub_group_block_write_ui4(out, value4); + intel_sub_group_block_write_ui8(out, value8); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
