https://github.com/Ippo47 updated https://github.com/llvm/llvm-project/pull/199192
>From 9d9d1859446afbddb66919b742d51383e5ac055d Mon Sep 17 00:00:00 2001 From: Igor Gorban <[email protected]> Date: Fri, 22 May 2026 11:08:50 +0200 Subject: [PATCH] [OpenCL] Add subgroup UI image overloads builtin functions (#198904) Add subgroup UI image overloads builtin functions from the OpenCL extensions available at https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups.asciidoc https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_short.asciidoc https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_char.asciidoc https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_long.asciidoc --- .../clangd/unittests/CodeCompleteTests.cpp | 89 ++++++ clang/lib/Sema/OpenCLBuiltins.td | 285 ++++++++++++++++++ clang/lib/Sema/SemaOverload.cpp | 10 +- .../SemaOpenCL/intel-subgroups-builtins.cl | 139 +++++++++ .../intel-subgroups-char-builtins.cl | 106 +++++++ .../intel-subgroups-long-builtins.cl | 64 ++++ .../intel-subgroups-short-builtins.cl | 106 +++++++ 7 files changed, 795 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaOpenCL/intel-subgroups-builtins.cl create mode 100644 clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl create mode 100644 clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl create mode 100644 clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 5808b2145965f..1fbb880121597 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -3494,6 +3494,95 @@ TEST(SignatureHelpTest, SkipExplicitObjectParameter) { } } +TEST(SignatureHelpTest, StaticCallOperator) { + Annotations Code(R"cpp( + struct Abc { + void operator()(bool a) {} + }; + struct AbcStatic { + static void operator()(bool a) {} + }; + void test() { + Abc abc; + AbcStatic abcStatic; + abc($c1^); + abcStatic($c2^); + } + )cpp"); + auto TU = TestTU::withCode(Code.code()); + TU.ExtraArgs = {"-std=c++23"}; + MockFS FS; + auto Inputs = TU.inputs(FS); + auto Preamble = TU.preamble(); + ASSERT_TRUE(Preamble); + { + // Case 1: non-static + const auto Result = signatureHelp(testPath(TU.Filename), Code.point("c1"), + *Preamble, Inputs, MarkupKind::PlainText); + EXPECT_EQ(1U, Result.signatures.size()); + EXPECT_THAT(Result.signatures[0], + AllOf(sig("operator()([[bool a]]) -> void"))); + } + { + // Case 2: static + const auto Result = signatureHelp(testPath(TU.Filename), Code.point("c2"), + *Preamble, Inputs, MarkupKind::PlainText); + EXPECT_EQ(1U, Result.signatures.size()); + EXPECT_THAT(Result.signatures[0], + AllOf(sig("operator()([[bool a]]) -> void"))); + } + { + // Case 3: static template operator() + Annotations TemplateCode(R"cpp( + struct AbcTemplate { + template <typename T> + static void operator()(T a, bool b) {} + }; + void test() { + AbcTemplate abcTemplate; + abcTemplate($c3^); + } + )cpp"); + auto TU2 = TestTU::withCode(TemplateCode.code()); + TU2.ExtraArgs = {"-std=c++23"}; + MockFS FS2; + auto Inputs2 = TU2.inputs(FS2); + auto Preamble2 = TU2.preamble(); + ASSERT_TRUE(Preamble2); + const auto Result = + signatureHelp(testPath(TU2.Filename), TemplateCode.point("c3"), + *Preamble2, Inputs2, MarkupKind::PlainText); + EXPECT_EQ(1U, Result.signatures.size()); + EXPECT_THAT(Result.signatures[0], + AllOf(sig("operator()([[T a]], [[bool b]]) -> void"))); + } + + { + Annotations TemplateCode2(R"cpp( + struct AbcTemplate2 { + template <typename T> + static void operator()(bool a, bool b) { T c; } + }; + void test() { + AbcTemplate2 abcTemplate2; + abcTemplate2($c4^); + } + )cpp"); + auto TU3 = TestTU::withCode(TemplateCode2.code()); + TU3.ExtraArgs = {"-std=c++23"}; + MockFS FS3; + auto Inputs3 = TU3.inputs(FS3); + auto Preamble3 = TU3.preamble(); + ASSERT_TRUE(Preamble3); + const auto Result = + signatureHelp(testPath(TU3.Filename), TemplateCode2.point("c4"), + *Preamble3, Inputs3, MarkupKind::PlainText); + EXPECT_EQ(1U, Result.signatures.size()); + EXPECT_THAT(Result.signatures[0], + AllOf(sig("operator()([[bool a]], [[bool b]]) -> void"))); + } +} + TEST(CompletionTest, IncludedCompletionKinds) { Annotations Test(R"cpp(#include "^)cpp"); auto TU = TestTU::withCode(Test.code()); diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 7fcfd4dfb41ed..356e54c09d02f 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -443,6 +443,7 @@ def VecAndScalar: IntList<"VecAndScalar", [1, 2, 3, 4, 8, 16]>; def VecNoScalar : IntList<"VecNoScalar", [2, 3, 4, 8, 16]>; def Vec1 : IntList<"Vec1", [1]>; def Vec1234 : IntList<"Vec1234", [1, 2, 3, 4]>; +def VecAndScalarTo8 : IntList<"VecAndScalarTo8", [1, 2, 3, 4, 8]>; // Type lists. def TLAll : TypeList<[Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; @@ -502,6 +503,17 @@ foreach Type = [Float, Double, Half] in { TypeList<[Type]>, Vec1234>; } +// Intel subgroup broadcast for narrow integer types is limited to scalar and +// vector widths up to 8 elements. +def GenTypeCharVecAndScalarTo8 : + GenericType<"GenTypeCharVecAndScalarTo8", TypeList<[Char]>, VecAndScalarTo8>; +def GenTypeUCharVecAndScalarTo8 : + GenericType<"GenTypeUCharVecAndScalarTo8", TypeList<[UChar]>, VecAndScalarTo8>; +def GenTypeShortVecAndScalarTo8 : + GenericType<"GenTypeShortVecAndScalarTo8", TypeList<[Short]>, VecAndScalarTo8>; +def GenTypeUShortVecAndScalarTo8 : + GenericType<"GenTypeUShortVecAndScalarTo8", TypeList<[UShort]>, VecAndScalarTo8>; + //===----------------------------------------------------------------------===// // Definitions of OpenCL builtin functions @@ -1882,6 +1894,279 @@ let Extension = FunctionExtension<"cl_khr_kernel_clock __opencl_c_kernel_clock_s def : Builtin<"clock_read_hilo_sub_group", [VectorType<UInt, 2>]>; } +//-------------------------------------------------------------------- +// Intel different sub-group extensions. +def FuncExtIntelSubgroups : FunctionExtension<"cl_intel_subgroups">; +def FuncExtIntelSubgroupsShort : FunctionExtension<"cl_intel_subgroups_short">; +def FuncExtIntelSubgroupsChar : FunctionExtension<"cl_intel_subgroups_char">; +def FuncExtIntelSubgroupsLong : FunctionExtension<"cl_intel_subgroups_long">; +def FuncExtIntelSubgroupsRWImages : FunctionExtension<"cl_intel_subgroups __opencl_c_read_write_images">; +def FuncExtIntelSubgroupsShortRWImages : FunctionExtension<"cl_intel_subgroups_short __opencl_c_read_write_images">; +def FuncExtIntelSubgroupsCharRWImages : FunctionExtension<"cl_intel_subgroups_char __opencl_c_read_write_images">; +def FuncExtIntelSubgroupsLongRWImages : FunctionExtension<"cl_intel_subgroups_long __opencl_c_read_write_images">; + +// cl_intel_subgroups - shuffle functions +// intel_sub_group_shuffle(T, uint) for float/int/uint vectors, half/double +// scalars, and long/ulong scalars. +let Extension = FuncExtIntelSubgroups in { + foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in { + def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeFloatVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [Long, Long, UInt], Attr.Convergent>; + def : Builtin<name, [ULong, ULong, UInt], Attr.Convergent>; + def : Builtin<name, [Half, Half, UInt], Attr.Convergent>; + def : Builtin<name, [Double, Double, UInt], Attr.Convergent>; + } + + foreach name = ["intel_sub_group_shuffle_down", "intel_sub_group_shuffle_up"] in { + def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeFloatVecAndScalar, + GenTypeFloatVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, + GenTypeIntVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, + GenTypeUIntVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [Long, Long, Long, UInt], Attr.Convergent>; + def : Builtin<name, [ULong, ULong, ULong, UInt], Attr.Convergent>; + def : Builtin<name, [Half, Half, Half, UInt], Attr.Convergent>; + def : Builtin<name, [Double, Double, Double, UInt], Attr.Convergent>; + } + + // intel_sub_group_block_read/write from/to image and global memory + def : Builtin<"intel_sub_group_block_read", [UInt, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read", [UInt, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write2", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write4", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write8", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write", [Void, PointerType<UInt, GlobalAS>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write2", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write4", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write8", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 8>], Attr.Convergent>; + + // _ui variants (explicit uint type suffix) + def : Builtin<"intel_sub_group_block_read_ui", [UInt, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_ui", [UInt, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ui", [Void, ImageType<Image2d, "RO">, VectorType<Int, 2>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui2", [Void, ImageType<Image2d, "RO">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui4", [Void, ImageType<Image2d, "RO">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui8", [Void, ImageType<Image2d, "RO">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ui", [Void, PointerType<UInt, GlobalAS>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui2", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui4", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui8", [Void, PointerType<UInt, GlobalAS>, VectorType<UInt, 8>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsRWImages in { + def : Builtin<"intel_sub_group_block_read", [UInt, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write2", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write4", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write8", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_ui", [UInt, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ui", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui2", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui4", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ui8", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>; +} + +// cl_intel_subgroups_short - broadcast/scan/reduce, shuffle, and block +// read/write for short types +let Extension = FuncExtIntelSubgroupsShort in { + def : Builtin<"intel_sub_group_broadcast", [GenTypeShortVecAndScalarTo8, GenTypeShortVecAndScalarTo8, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_broadcast", [GenTypeUShortVecAndScalarTo8, GenTypeUShortVecAndScalarTo8, UInt], Attr.Convergent>; + + foreach name = ["intel_sub_group_reduce_", "intel_sub_group_scan_exclusive_", + "intel_sub_group_scan_inclusive_"] in { + foreach op = ["add", "min", "max"] in { + def : Builtin<name # op, [Short, Short], Attr.Convergent>; + def : Builtin<name # op, [UShort, UShort], Attr.Convergent>; + } + } + + foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in { + def : Builtin<name, [GenTypeShortVecAndScalar, GenTypeShortVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUShortVecAndScalar, GenTypeUShortVecAndScalar, UInt], Attr.Convergent>; + } + + foreach name = ["intel_sub_group_shuffle_down", "intel_sub_group_shuffle_up"] in { + def : Builtin<name, [GenTypeShortVecAndScalar, GenTypeShortVecAndScalar, + GenTypeShortVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUShortVecAndScalar, GenTypeUShortVecAndScalar, + GenTypeUShortVecAndScalar, UInt], Attr.Convergent>; + } + + def : Builtin<"intel_sub_group_block_read_us", [UShort, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_us", [UShort, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_us", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, UShort], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us2", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UShort, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us4", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UShort, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us8", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UShort, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us16", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UShort, 16>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_us", [Void, PointerType<UShort, GlobalAS>, UShort], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us2", [Void, PointerType<UShort, GlobalAS>, VectorType<UShort, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us4", [Void, PointerType<UShort, GlobalAS>, VectorType<UShort, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us8", [Void, PointerType<UShort, GlobalAS>, VectorType<UShort, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us16", [Void, PointerType<UShort, GlobalAS>, VectorType<UShort, 16>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsShortRWImages in { + def : Builtin<"intel_sub_group_block_read_us", [UShort, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_us", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, UShort], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us2", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UShort, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us4", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UShort, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us8", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UShort, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_us16", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UShort, 16>], Attr.Convergent>; +} + +// cl_intel_subgroups_char - broadcast/scan/reduce, shuffle, and block +// read/write for char types +let Extension = FuncExtIntelSubgroupsChar in { + def : Builtin<"intel_sub_group_broadcast", [GenTypeCharVecAndScalarTo8, GenTypeCharVecAndScalarTo8, UInt], Attr.Convergent>; + def : Builtin<"intel_sub_group_broadcast", [GenTypeUCharVecAndScalarTo8, GenTypeUCharVecAndScalarTo8, UInt], Attr.Convergent>; + + foreach name = ["intel_sub_group_reduce_", "intel_sub_group_scan_exclusive_", + "intel_sub_group_scan_inclusive_"] in { + foreach op = ["add", "min", "max"] in { + def : Builtin<name # op, [Char, Char], Attr.Convergent>; + def : Builtin<name # op, [UChar, UChar], Attr.Convergent>; + } + } + + foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in { + def : Builtin<name, [GenTypeCharVecAndScalar, GenTypeCharVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUCharVecAndScalar, GenTypeUCharVecAndScalar, UInt], Attr.Convergent>; + } + + foreach name = ["intel_sub_group_shuffle_down", "intel_sub_group_shuffle_up"] in { + def : Builtin<name, [GenTypeCharVecAndScalar, GenTypeCharVecAndScalar, + GenTypeCharVecAndScalar, UInt], Attr.Convergent>; + def : Builtin<name, [GenTypeUCharVecAndScalar, GenTypeUCharVecAndScalar, + GenTypeUCharVecAndScalar, UInt], Attr.Convergent>; + } + + def : Builtin<"intel_sub_group_block_read_uc", [UChar, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_uc", [UChar, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_uc", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, UChar], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc2", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UChar, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc4", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UChar, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc8", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UChar, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc16", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<UChar, 16>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_uc", [Void, PointerType<UChar, GlobalAS>, UChar], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc2", [Void, PointerType<UChar, GlobalAS>, VectorType<UChar, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc4", [Void, PointerType<UChar, GlobalAS>, VectorType<UChar, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc8", [Void, PointerType<UChar, GlobalAS>, VectorType<UChar, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc16", [Void, PointerType<UChar, GlobalAS>, VectorType<UChar, 16>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsCharRWImages in { + def : Builtin<"intel_sub_group_block_read_uc", [UChar, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_uc", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, UChar], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc2", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UChar, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc4", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UChar, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc8", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UChar, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_uc16", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<UChar, 16>], Attr.Convergent>; +} + +// cl_intel_subgroups_long extends block read/write for 64-bit integer types. +// Scalar long/ulong shuffle overloads are part of cl_intel_subgroups itself. +let Extension = FuncExtIntelSubgroupsLong in { + def : Builtin<"intel_sub_group_block_read_ul", [ULong, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul16", [VectorType<ULong, 16>, ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_read_ul", [ULong, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ul", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, ULong], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul2", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<ULong, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul4", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<ULong, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul8", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<ULong, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, "WO">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ul", [Void, PointerType<ULong, GlobalAS>, ULong], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul2", [Void, PointerType<ULong, GlobalAS>, VectorType<ULong, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul4", [Void, PointerType<ULong, GlobalAS>, VectorType<ULong, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul8", [Void, PointerType<ULong, GlobalAS>, VectorType<ULong, 8>], Attr.Convergent>; +} + +let Extension = FuncExtIntelSubgroupsLongRWImages in { + def : Builtin<"intel_sub_group_block_read_ul", [ULong, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_read_ul16", [VectorType<ULong, 16>, ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>; + + def : Builtin<"intel_sub_group_block_write_ul", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, ULong], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul2", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<ULong, 2>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul4", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<ULong, 4>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul8", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<ULong, 8>], Attr.Convergent>; + def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, "RW">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>; +} + //-------------------------------------------------------------------- // Arm extensions. let Extension = ArmIntegerDotProductInt8 in { diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index e11bbd7085798..26a706eb81aa1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7867,11 +7867,13 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns, // This branch handles both standalone functions and static methods. // Slice the first argument (which is the base) when we access - // static method as non-static. + // static operator() as non-static. if (Args.size() > 0 && - (!Args[0] || (FirstArgumentIsBase && isa<CXXMethodDecl>(FD) && - !isa<CXXConstructorDecl>(FD)))) { - assert(cast<CXXMethodDecl>(FD)->isStatic()); + (!Args[0] || + (isa<CXXMethodDecl>(FD) && !isa<CXXConstructorDecl>(FD) && + (FirstArgumentIsBase || + (cast<CXXMethodDecl>(FD)->isStatic() && + FD->getOverloadedOperator() == OO_Call))))) { FunctionArgs = Args.slice(1); } if (FunTmpl) { diff --git a/clang/test/SemaOpenCL/intel-subgroups-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl new file mode 100644 index 0000000000000..473ab5296113d --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float16 __attribute__((ext_vector_type(16))); +typedef int int16 __attribute__((ext_vector_type(16))); +typedef long long2 __attribute__((ext_vector_type(2))); +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef uint uint8 __attribute__((ext_vector_type(8))); + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +float3 test_shuffle_float3(float3 value) { + return intel_sub_group_shuffle(value, 1u); +} + +int16 test_shuffle_xor_int16(int16 value) { + return intel_sub_group_shuffle_xor(value, 1u); +} + +uint8 test_shuffle_down_uint8(uint8 current, uint8 next) { + return intel_sub_group_shuffle_down(current, next, 1u); +} + +uint8 test_shuffle_up_uint8(uint8 previous, uint8 current) { + return intel_sub_group_shuffle_up(previous, current, 1u); +} + +half test_shuffle_half(half value) { + return intel_sub_group_shuffle(value, 1u); +} + +double test_shuffle_double(double value) { + return intel_sub_group_shuffle_xor(value, 1u); +} + +long test_shuffle_long(long value) { + return intel_sub_group_shuffle(value, 1u); +} + +ulong test_shuffle_ulong(ulong value) { + return intel_sub_group_shuffle_xor(value, 1u); +} + +uint test_block_read_global(const __global uint *in) { + return intel_sub_group_block_read(in); +} + +uint2 test_block_read2_global(const __global uint *in) { + return intel_sub_group_block_read2(in); +} + +uint4 test_block_read4_global(const __global uint *in) { + return intel_sub_group_block_read4(in); +} + +uint8 test_block_read8_global(const __global uint *in) { + return intel_sub_group_block_read8(in); +} + +uint test_block_read_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read(image, coord); +} + +uint2 test_block_read2_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read2(image, coord); +} + +uint4 test_block_read4_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read4(image, coord); +} + +uint8 test_block_read8_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read8(image, coord); +} + +void test_block_write_global(__global uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write(out, value); + intel_sub_group_block_write2(out, value2); + intel_sub_group_block_write4(out, value4); + intel_sub_group_block_write8(out, value8); +} + +void test_block_write_image(write_only image2d_t image, read_write image2d_t rw, + int2 coord, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write(image, coord, value); + intel_sub_group_block_write2(image, coord, value2); + intel_sub_group_block_write4(rw, coord, value4); + intel_sub_group_block_write8(rw, coord, value8); +} + +uint test_block_read_ui_global(const __global uint *in) { + return intel_sub_group_block_read_ui(in); +} + +uint2 test_block_read_ui2_global(const __global uint *in) { + return intel_sub_group_block_read_ui2(in); +} + +uint4 test_block_read_ui4_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read_ui4(image, coord); +} + +uint8 test_block_read_ui8_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_ui8(image, coord); +} + +void test_block_write_ui_global(__global uint *out, uint value, uint2 value2, + uint4 value4, uint8 value8) { + intel_sub_group_block_write_ui(out, value); + intel_sub_group_block_write_ui2(out, value2); + intel_sub_group_block_write_ui4(out, value4); + intel_sub_group_block_write_ui8(out, value8); +} + +void test_block_write_ui_image(read_only image2d_t image, + read_write image2d_t rw, int2 coord, + uint value, uint2 value2, uint4 value4, + uint8 value8) { + intel_sub_group_block_write_ui(image, coord, value); + intel_sub_group_block_write_ui2(image, coord, value2); + intel_sub_group_block_write_ui4(rw, coord, value4); + intel_sub_group_block_write_ui8(rw, coord, value8); +} + +void test_long_vectors_rejected(long2 value) { + (void)intel_sub_group_shuffle(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_shuffle'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl new file mode 100644 index 0000000000000..bad1128a10f6a --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl @@ -0,0 +1,106 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned char uchar; +typedef int int2 __attribute__((ext_vector_type(2))); +typedef char char3 __attribute__((ext_vector_type(3))); +typedef char char8 __attribute__((ext_vector_type(8))); +typedef char char16 __attribute__((ext_vector_type(16))); +typedef uchar uchar2 __attribute__((ext_vector_type(2))); +typedef uchar uchar4 __attribute__((ext_vector_type(4))); +typedef uchar uchar8 __attribute__((ext_vector_type(8))); +typedef uchar uchar16 __attribute__((ext_vector_type(16))); + +char3 test_broadcast_char3(char3 value) { + return intel_sub_group_broadcast(value, 1u); +} + +uchar8 test_broadcast_uchar8(uchar8 value) { + return intel_sub_group_broadcast(value, 1u); +} + +char16 test_shuffle_char16(char16 value) { + return intel_sub_group_shuffle(value, 1u); +} + +uchar16 test_shuffle_xor_uchar16(uchar16 value) { + return intel_sub_group_shuffle_xor(value, 1u); +} + +char16 test_shuffle_down_char16(char16 current, char16 next) { + return intel_sub_group_shuffle_down(current, next, 1u); +} + +uchar16 test_shuffle_up_uchar16(uchar16 previous, uchar16 current) { + return intel_sub_group_shuffle_up(previous, current, 1u); +} + +char test_collectives_char(char value) { + value = intel_sub_group_reduce_add(value); + value = intel_sub_group_reduce_min(value); + value = intel_sub_group_reduce_max(value); + value = intel_sub_group_scan_exclusive_add(value); + value = intel_sub_group_scan_exclusive_min(value); + value = intel_sub_group_scan_exclusive_max(value); + value = intel_sub_group_scan_inclusive_add(value); + value = intel_sub_group_scan_inclusive_min(value); + value = intel_sub_group_scan_inclusive_max(value); + return value; +} + +uchar test_collectives_uchar(uchar value) { + value = intel_sub_group_reduce_add(value); + value = intel_sub_group_reduce_min(value); + value = intel_sub_group_reduce_max(value); + value = intel_sub_group_scan_exclusive_add(value); + value = intel_sub_group_scan_exclusive_min(value); + value = intel_sub_group_scan_exclusive_max(value); + value = intel_sub_group_scan_inclusive_add(value); + value = intel_sub_group_scan_inclusive_min(value); + value = intel_sub_group_scan_inclusive_max(value); + return value; +} + +uchar test_block_read_uc_global(const __global uchar *in) { + return intel_sub_group_block_read_uc(in); +} + +uchar2 test_block_read_uc2_global(const __global uchar *in) { + return intel_sub_group_block_read_uc2(in); +} + +uchar4 test_block_read_uc4_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read_uc4(image, coord); +} + +uchar8 test_block_read_uc8_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_uc8(image, coord); +} + +uchar16 test_block_read_uc16_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_uc16(image, coord); +} + +void test_block_write_uc(__global uchar *out, write_only image2d_t image, + read_write image2d_t rw, int2 coord, uchar value, + uchar2 value2, uchar4 value4, uchar8 value8, + uchar16 value16) { + intel_sub_group_block_write_uc(out, value); + intel_sub_group_block_write_uc2(out, value2); + intel_sub_group_block_write_uc4(out, value4); + intel_sub_group_block_write_uc8(out, value8); + intel_sub_group_block_write_uc16(out, value16); + intel_sub_group_block_write_uc(image, coord, value); + intel_sub_group_block_write_uc2(image, coord, value2); + intel_sub_group_block_write_uc4(image, coord, value4); + intel_sub_group_block_write_uc8(rw, coord, value8); + intel_sub_group_block_write_uc16(rw, coord, value16); +} + +void test_broadcast_char16_rejected(char16 value) { + (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl new file mode 100644 index 0000000000000..dce694f8635fe --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s +// expected-no-diagnostics + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef int int2 __attribute__((ext_vector_type(2))); +typedef ulong ulong2 __attribute__((ext_vector_type(2))); +typedef ulong ulong4 __attribute__((ext_vector_type(4))); +typedef ulong ulong8 __attribute__((ext_vector_type(8))); +typedef ulong ulong16 __attribute__((ext_vector_type(16))); + +long test_shuffle_long(long value) { + value = intel_sub_group_shuffle(value, 1u); + value = intel_sub_group_shuffle_xor(value, 1u); + value = intel_sub_group_shuffle_down(value, value, 1u); + value = intel_sub_group_shuffle_up(value, value, 1u); + return value; +} + +ulong test_shuffle_ulong(ulong value) { + value = intel_sub_group_shuffle(value, 1u); + value = intel_sub_group_shuffle_xor(value, 1u); + value = intel_sub_group_shuffle_down(value, value, 1u); + value = intel_sub_group_shuffle_up(value, value, 1u); + return value; +} + +ulong test_block_read_ul_global(const __global ulong *in) { + return intel_sub_group_block_read_ul(in); +} + +ulong2 test_block_read_ul2_global(const __global ulong *in) { + return intel_sub_group_block_read_ul2(in); +} + +ulong4 test_block_read_ul4_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read_ul4(image, coord); +} + +ulong8 test_block_read_ul8_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_ul8(image, coord); +} + +ulong16 test_block_read_ul16_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_ul16(image, coord); +} + +void test_block_write_ul(__global ulong *out, write_only image2d_t image, + read_write image2d_t rw, int2 coord, ulong value, + ulong2 value2, ulong4 value4, ulong8 value8, + ulong16 value16) { + intel_sub_group_block_write_ul(out, value); + intel_sub_group_block_write_ul2(out, value2); + intel_sub_group_block_write_ul4(out, value4); + intel_sub_group_block_write_ul8(out, value8); + intel_sub_group_block_write_ul(image, coord, value); + intel_sub_group_block_write_ul2(image, coord, value2); + intel_sub_group_block_write_ul4(image, coord, value4); + intel_sub_group_block_write_ul8(rw, coord, value8); + intel_sub_group_block_write_ul16(rw, coord, value16); +} diff --git a/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl new file mode 100644 index 0000000000000..237974733335d --- /dev/null +++ b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl @@ -0,0 +1,106 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 -fdeclare-opencl-builtins -verify -fsyntax-only %s + +// Keep this test header-free so it exercises OpenCLBuiltins.td instead of +// declarations from opencl-c.h. + +typedef unsigned int uint; +typedef unsigned short ushort; +typedef int int2 __attribute__((ext_vector_type(2))); +typedef short short3 __attribute__((ext_vector_type(3))); +typedef short short8 __attribute__((ext_vector_type(8))); +typedef short short16 __attribute__((ext_vector_type(16))); +typedef ushort ushort2 __attribute__((ext_vector_type(2))); +typedef ushort ushort4 __attribute__((ext_vector_type(4))); +typedef ushort ushort8 __attribute__((ext_vector_type(8))); +typedef ushort ushort16 __attribute__((ext_vector_type(16))); + +short3 test_broadcast_short3(short3 value) { + return intel_sub_group_broadcast(value, 1u); +} + +ushort8 test_broadcast_ushort8(ushort8 value) { + return intel_sub_group_broadcast(value, 1u); +} + +short16 test_shuffle_short16(short16 value) { + return intel_sub_group_shuffle(value, 1u); +} + +ushort16 test_shuffle_xor_ushort16(ushort16 value) { + return intel_sub_group_shuffle_xor(value, 1u); +} + +short16 test_shuffle_down_short16(short16 current, short16 next) { + return intel_sub_group_shuffle_down(current, next, 1u); +} + +ushort16 test_shuffle_up_ushort16(ushort16 previous, ushort16 current) { + return intel_sub_group_shuffle_up(previous, current, 1u); +} + +short test_collectives_short(short value) { + value = intel_sub_group_reduce_add(value); + value = intel_sub_group_reduce_min(value); + value = intel_sub_group_reduce_max(value); + value = intel_sub_group_scan_exclusive_add(value); + value = intel_sub_group_scan_exclusive_min(value); + value = intel_sub_group_scan_exclusive_max(value); + value = intel_sub_group_scan_inclusive_add(value); + value = intel_sub_group_scan_inclusive_min(value); + value = intel_sub_group_scan_inclusive_max(value); + return value; +} + +ushort test_collectives_ushort(ushort value) { + value = intel_sub_group_reduce_add(value); + value = intel_sub_group_reduce_min(value); + value = intel_sub_group_reduce_max(value); + value = intel_sub_group_scan_exclusive_add(value); + value = intel_sub_group_scan_exclusive_min(value); + value = intel_sub_group_scan_exclusive_max(value); + value = intel_sub_group_scan_inclusive_add(value); + value = intel_sub_group_scan_inclusive_min(value); + value = intel_sub_group_scan_inclusive_max(value); + return value; +} + +ushort test_block_read_us_global(const __global ushort *in) { + return intel_sub_group_block_read_us(in); +} + +ushort2 test_block_read_us2_global(const __global ushort *in) { + return intel_sub_group_block_read_us2(in); +} + +ushort4 test_block_read_us4_image(read_only image2d_t image, int2 coord) { + return intel_sub_group_block_read_us4(image, coord); +} + +ushort8 test_block_read_us8_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_us8(image, coord); +} + +ushort16 test_block_read_us16_rw_image(read_write image2d_t image, int2 coord) { + return intel_sub_group_block_read_us16(image, coord); +} + +void test_block_write_us(__global ushort *out, write_only image2d_t image, + read_write image2d_t rw, int2 coord, ushort value, + ushort2 value2, ushort4 value4, ushort8 value8, + ushort16 value16) { + intel_sub_group_block_write_us(out, value); + intel_sub_group_block_write_us2(out, value2); + intel_sub_group_block_write_us4(out, value4); + intel_sub_group_block_write_us8(out, value8); + intel_sub_group_block_write_us16(out, value16); + intel_sub_group_block_write_us(image, coord, value); + intel_sub_group_block_write_us2(image, coord, value2); + intel_sub_group_block_write_us4(image, coord, value4); + intel_sub_group_block_write_us8(rw, coord, value8); + intel_sub_group_block_write_us16(rw, coord, value16); +} + +void test_broadcast_short16_rejected(short16 value) { + (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching function for call to 'intel_sub_group_broadcast'}} + // expected-note@-1 0+{{candidate function not viable}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
