https://github.com/Ippo47 updated 
https://github.com/llvm/llvm-project/pull/199192

>From 9d9d1859446afbddb66919b742d51383e5ac055d Mon Sep 17 00:00:00 2001
From: Igor Gorban <[email protected]>
Date: Fri, 22 May 2026 11:08:50 +0200
Subject: [PATCH] [OpenCL] Add subgroup UI image overloads builtin functions
 (#198904)

Add subgroup UI image overloads builtin functions from the OpenCL
extensions available at
https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups.asciidoc
https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_short.asciidoc
https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_char.asciidoc
https://github.com/KhronosGroup/OpenCL-Docs/blob/main/extensions/cl_intel_subgroups_long.asciidoc
---
 .../clangd/unittests/CodeCompleteTests.cpp    |  89 ++++++
 clang/lib/Sema/OpenCLBuiltins.td              | 285 ++++++++++++++++++
 clang/lib/Sema/SemaOverload.cpp               |  10 +-
 .../SemaOpenCL/intel-subgroups-builtins.cl    | 139 +++++++++
 .../intel-subgroups-char-builtins.cl          | 106 +++++++
 .../intel-subgroups-long-builtins.cl          |  64 ++++
 .../intel-subgroups-short-builtins.cl         | 106 +++++++
 7 files changed, 795 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/SemaOpenCL/intel-subgroups-builtins.cl
 create mode 100644 clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl
 create mode 100644 clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl
 create mode 100644 clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp 
b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 5808b2145965f..1fbb880121597 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -3494,6 +3494,95 @@ TEST(SignatureHelpTest, SkipExplicitObjectParameter) {
   }
 }
 
+TEST(SignatureHelpTest, StaticCallOperator) {
+  Annotations Code(R"cpp(
+    struct Abc {
+      void operator()(bool a) {}
+    };
+    struct AbcStatic {
+      static void operator()(bool a) {}
+    };
+    void test() {
+      Abc abc;
+      AbcStatic abcStatic;
+      abc($c1^);
+      abcStatic($c2^);
+    }
+  )cpp");
+  auto TU = TestTU::withCode(Code.code());
+  TU.ExtraArgs = {"-std=c++23"};
+  MockFS FS;
+  auto Inputs = TU.inputs(FS);
+  auto Preamble = TU.preamble();
+  ASSERT_TRUE(Preamble);
+  {
+    // Case 1: non-static
+    const auto Result = signatureHelp(testPath(TU.Filename), Code.point("c1"),
+                                      *Preamble, Inputs, 
MarkupKind::PlainText);
+    EXPECT_EQ(1U, Result.signatures.size());
+    EXPECT_THAT(Result.signatures[0],
+                AllOf(sig("operator()([[bool a]]) -> void")));
+  }
+  {
+    // Case 2: static
+    const auto Result = signatureHelp(testPath(TU.Filename), Code.point("c2"),
+                                      *Preamble, Inputs, 
MarkupKind::PlainText);
+    EXPECT_EQ(1U, Result.signatures.size());
+    EXPECT_THAT(Result.signatures[0],
+                AllOf(sig("operator()([[bool a]]) -> void")));
+  }
+  {
+    // Case 3: static template operator()
+    Annotations TemplateCode(R"cpp(
+      struct AbcTemplate {
+        template <typename T>
+        static void operator()(T a, bool b) {}
+      };
+      void test() {
+        AbcTemplate abcTemplate;
+        abcTemplate($c3^);
+      }
+    )cpp");
+    auto TU2 = TestTU::withCode(TemplateCode.code());
+    TU2.ExtraArgs = {"-std=c++23"};
+    MockFS FS2;
+    auto Inputs2 = TU2.inputs(FS2);
+    auto Preamble2 = TU2.preamble();
+    ASSERT_TRUE(Preamble2);
+    const auto Result =
+        signatureHelp(testPath(TU2.Filename), TemplateCode.point("c3"),
+                      *Preamble2, Inputs2, MarkupKind::PlainText);
+    EXPECT_EQ(1U, Result.signatures.size());
+    EXPECT_THAT(Result.signatures[0],
+                AllOf(sig("operator()([[T a]], [[bool b]]) -> void")));
+  }
+
+  {
+    Annotations TemplateCode2(R"cpp(
+      struct AbcTemplate2 {
+        template <typename T>
+        static void operator()(bool a, bool b) { T c; }
+      };
+      void test() {
+        AbcTemplate2 abcTemplate2;
+        abcTemplate2($c4^);
+      }
+    )cpp");
+    auto TU3 = TestTU::withCode(TemplateCode2.code());
+    TU3.ExtraArgs = {"-std=c++23"};
+    MockFS FS3;
+    auto Inputs3 = TU3.inputs(FS3);
+    auto Preamble3 = TU3.preamble();
+    ASSERT_TRUE(Preamble3);
+    const auto Result =
+        signatureHelp(testPath(TU3.Filename), TemplateCode2.point("c4"),
+                      *Preamble3, Inputs3, MarkupKind::PlainText);
+    EXPECT_EQ(1U, Result.signatures.size());
+    EXPECT_THAT(Result.signatures[0],
+                AllOf(sig("operator()([[bool a]], [[bool b]]) -> void")));
+  }
+}
+
 TEST(CompletionTest, IncludedCompletionKinds) {
   Annotations Test(R"cpp(#include "^)cpp");
   auto TU = TestTU::withCode(Test.code());
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 7fcfd4dfb41ed..356e54c09d02f 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -443,6 +443,7 @@ def VecAndScalar: IntList<"VecAndScalar", [1, 2, 3, 4, 8, 
16]>;
 def VecNoScalar : IntList<"VecNoScalar", [2, 3, 4, 8, 16]>;
 def Vec1        : IntList<"Vec1", [1]>;
 def Vec1234     : IntList<"Vec1234", [1, 2, 3, 4]>;
+def VecAndScalarTo8 : IntList<"VecAndScalarTo8", [1, 2, 3, 4, 8]>;
 
 // Type lists.
 def TLAll           : TypeList<[Char,  UChar, Short,  UShort, Int,  UInt, 
Long,  ULong, Float, Double, Half]>;
@@ -502,6 +503,17 @@ foreach Type = [Float, Double, Half] in {
                           TypeList<[Type]>, Vec1234>;
 }
 
+// Intel subgroup broadcast for narrow integer types is limited to scalar and
+// vector widths up to 8 elements.
+def GenTypeCharVecAndScalarTo8 :
+    GenericType<"GenTypeCharVecAndScalarTo8", TypeList<[Char]>, 
VecAndScalarTo8>;
+def GenTypeUCharVecAndScalarTo8 :
+    GenericType<"GenTypeUCharVecAndScalarTo8", TypeList<[UChar]>, 
VecAndScalarTo8>;
+def GenTypeShortVecAndScalarTo8 :
+    GenericType<"GenTypeShortVecAndScalarTo8", TypeList<[Short]>, 
VecAndScalarTo8>;
+def GenTypeUShortVecAndScalarTo8 :
+    GenericType<"GenTypeUShortVecAndScalarTo8", TypeList<[UShort]>, 
VecAndScalarTo8>;
+
 
 
//===----------------------------------------------------------------------===//
 //                 Definitions of OpenCL builtin functions
@@ -1882,6 +1894,279 @@ let Extension = FunctionExtension<"cl_khr_kernel_clock 
__opencl_c_kernel_clock_s
   def : Builtin<"clock_read_hilo_sub_group", [VectorType<UInt, 2>]>;
 }
 
+//--------------------------------------------------------------------
+// Intel different sub-group extensions.
+def FuncExtIntelSubgroups      : FunctionExtension<"cl_intel_subgroups">;
+def FuncExtIntelSubgroupsShort : FunctionExtension<"cl_intel_subgroups_short">;
+def FuncExtIntelSubgroupsChar  : FunctionExtension<"cl_intel_subgroups_char">;
+def FuncExtIntelSubgroupsLong  : FunctionExtension<"cl_intel_subgroups_long">;
+def FuncExtIntelSubgroupsRWImages : FunctionExtension<"cl_intel_subgroups 
__opencl_c_read_write_images">;
+def FuncExtIntelSubgroupsShortRWImages : 
FunctionExtension<"cl_intel_subgroups_short __opencl_c_read_write_images">;
+def FuncExtIntelSubgroupsCharRWImages : 
FunctionExtension<"cl_intel_subgroups_char __opencl_c_read_write_images">;
+def FuncExtIntelSubgroupsLongRWImages : 
FunctionExtension<"cl_intel_subgroups_long __opencl_c_read_write_images">;
+
+// cl_intel_subgroups - shuffle functions
+// intel_sub_group_shuffle(T, uint) for float/int/uint vectors, half/double
+// scalars, and long/ulong scalars.
+let Extension = FuncExtIntelSubgroups in {
+  foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in 
{
+    def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeFloatVecAndScalar, 
UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, 
UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, 
UInt], Attr.Convergent>;
+    def : Builtin<name, [Long, Long, UInt], Attr.Convergent>;
+    def : Builtin<name, [ULong, ULong, UInt], Attr.Convergent>;
+    def : Builtin<name, [Half, Half, UInt], Attr.Convergent>;
+    def : Builtin<name, [Double, Double, UInt], Attr.Convergent>;
+  }
+
+  foreach name = ["intel_sub_group_shuffle_down", 
"intel_sub_group_shuffle_up"] in {
+    def : Builtin<name, [GenTypeFloatVecAndScalar, GenTypeFloatVecAndScalar,
+                         GenTypeFloatVecAndScalar, UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar,
+                         GenTypeIntVecAndScalar, UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar,
+                         GenTypeUIntVecAndScalar, UInt], Attr.Convergent>;
+    def : Builtin<name, [Long, Long, Long, UInt], Attr.Convergent>;
+    def : Builtin<name, [ULong, ULong, ULong, UInt], Attr.Convergent>;
+    def : Builtin<name, [Half, Half, Half, UInt], Attr.Convergent>;
+    def : Builtin<name, [Double, Double, Double, UInt], Attr.Convergent>;
+  }
+
+  // intel_sub_group_block_read/write from/to image and global memory
+  def : Builtin<"intel_sub_group_block_read", [UInt, ImageType<Image2d, "RO">, 
VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read", [UInt, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write2", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write4", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write8", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write", [Void, PointerType<UInt, 
GlobalAS>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write2", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write4", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write8", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 8>], Attr.Convergent>;
+
+  // _ui variants (explicit uint type suffix)
+  def : Builtin<"intel_sub_group_block_read_ui", [UInt, ImageType<Image2d, 
"RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_ui", [UInt, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, 
PointerType<ConstType<UInt>, GlobalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ui", [Void, ImageType<Image2d, 
"RO">, VectorType<Int, 2>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui2", [Void, ImageType<Image2d, 
"RO">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui4", [Void, ImageType<Image2d, 
"RO">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui8", [Void, ImageType<Image2d, 
"RO">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ui", [Void, PointerType<UInt, 
GlobalAS>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui2", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui4", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui8", [Void, PointerType<UInt, 
GlobalAS>, VectorType<UInt, 8>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsRWImages in {
+  def : Builtin<"intel_sub_group_block_read", [UInt, ImageType<Image2d, "RW">, 
VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read2", [VectorType<UInt, 2>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read4", [VectorType<UInt, 4>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read8", [VectorType<UInt, 8>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write2", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write4", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write8", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_ui", [UInt, ImageType<Image2d, 
"RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui2", [VectorType<UInt, 2>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui4", [VectorType<UInt, 4>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ui8", [VectorType<UInt, 8>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ui", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui2", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui4", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ui8", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UInt, 8>], Attr.Convergent>;
+}
+
+// cl_intel_subgroups_short - broadcast/scan/reduce, shuffle, and block
+// read/write for short types
+let Extension = FuncExtIntelSubgroupsShort in {
+  def : Builtin<"intel_sub_group_broadcast", [GenTypeShortVecAndScalarTo8, 
GenTypeShortVecAndScalarTo8, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_broadcast", [GenTypeUShortVecAndScalarTo8, 
GenTypeUShortVecAndScalarTo8, UInt], Attr.Convergent>;
+
+  foreach name = ["intel_sub_group_reduce_", "intel_sub_group_scan_exclusive_",
+                  "intel_sub_group_scan_inclusive_"] in {
+    foreach op = ["add", "min", "max"] in {
+      def : Builtin<name # op, [Short, Short], Attr.Convergent>;
+      def : Builtin<name # op, [UShort, UShort], Attr.Convergent>;
+    }
+  }
+
+  foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in 
{
+    def : Builtin<name, [GenTypeShortVecAndScalar, GenTypeShortVecAndScalar, 
UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUShortVecAndScalar, GenTypeUShortVecAndScalar, 
UInt], Attr.Convergent>;
+  }
+
+  foreach name = ["intel_sub_group_shuffle_down", 
"intel_sub_group_shuffle_up"] in {
+    def : Builtin<name, [GenTypeShortVecAndScalar, GenTypeShortVecAndScalar,
+                         GenTypeShortVecAndScalar, UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUShortVecAndScalar, GenTypeUShortVecAndScalar,
+                         GenTypeUShortVecAndScalar, UInt], Attr.Convergent>;
+  }
+
+  def : Builtin<"intel_sub_group_block_read_us", [UShort, ImageType<Image2d, 
"RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_us", [UShort, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, 
PointerType<ConstType<UShort>, GlobalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_us", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, UShort], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us2", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UShort, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us4", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UShort, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us8", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UShort, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us16", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UShort, 16>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_us", [Void, PointerType<UShort, 
GlobalAS>, UShort], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us2", [Void, PointerType<UShort, 
GlobalAS>, VectorType<UShort, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us4", [Void, PointerType<UShort, 
GlobalAS>, VectorType<UShort, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us8", [Void, PointerType<UShort, 
GlobalAS>, VectorType<UShort, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us16", [Void, PointerType<UShort, 
GlobalAS>, VectorType<UShort, 16>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsShortRWImages in {
+  def : Builtin<"intel_sub_group_block_read_us", [UShort, ImageType<Image2d, 
"RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us2", [VectorType<UShort, 2>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us4", [VectorType<UShort, 4>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us8", [VectorType<UShort, 8>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_us16", [VectorType<UShort, 16>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_us", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, UShort], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us2", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UShort, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us4", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UShort, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us8", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UShort, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_us16", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UShort, 16>], Attr.Convergent>;
+}
+
+// cl_intel_subgroups_char - broadcast/scan/reduce, shuffle, and block
+// read/write for char types
+let Extension = FuncExtIntelSubgroupsChar in {
+  def : Builtin<"intel_sub_group_broadcast", [GenTypeCharVecAndScalarTo8, 
GenTypeCharVecAndScalarTo8, UInt], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_broadcast", [GenTypeUCharVecAndScalarTo8, 
GenTypeUCharVecAndScalarTo8, UInt], Attr.Convergent>;
+
+  foreach name = ["intel_sub_group_reduce_", "intel_sub_group_scan_exclusive_",
+                  "intel_sub_group_scan_inclusive_"] in {
+    foreach op = ["add", "min", "max"] in {
+      def : Builtin<name # op, [Char, Char], Attr.Convergent>;
+      def : Builtin<name # op, [UChar, UChar], Attr.Convergent>;
+    }
+  }
+
+  foreach name = ["intel_sub_group_shuffle", "intel_sub_group_shuffle_xor"] in 
{
+    def : Builtin<name, [GenTypeCharVecAndScalar, GenTypeCharVecAndScalar, 
UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUCharVecAndScalar, GenTypeUCharVecAndScalar, 
UInt], Attr.Convergent>;
+  }
+
+  foreach name = ["intel_sub_group_shuffle_down", 
"intel_sub_group_shuffle_up"] in {
+    def : Builtin<name, [GenTypeCharVecAndScalar, GenTypeCharVecAndScalar,
+                         GenTypeCharVecAndScalar, UInt], Attr.Convergent>;
+    def : Builtin<name, [GenTypeUCharVecAndScalar, GenTypeUCharVecAndScalar,
+                         GenTypeUCharVecAndScalar, UInt], Attr.Convergent>;
+  }
+
+  def : Builtin<"intel_sub_group_block_read_uc", [UChar, ImageType<Image2d, 
"RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_uc", [UChar, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, 
PointerType<ConstType<UChar>, GlobalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_uc", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, UChar], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc2", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UChar, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc4", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UChar, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc8", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UChar, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc16", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<UChar, 16>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_uc", [Void, PointerType<UChar, 
GlobalAS>, UChar], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc2", [Void, PointerType<UChar, 
GlobalAS>, VectorType<UChar, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc4", [Void, PointerType<UChar, 
GlobalAS>, VectorType<UChar, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc8", [Void, PointerType<UChar, 
GlobalAS>, VectorType<UChar, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc16", [Void, PointerType<UChar, 
GlobalAS>, VectorType<UChar, 16>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsCharRWImages in {
+  def : Builtin<"intel_sub_group_block_read_uc", [UChar, ImageType<Image2d, 
"RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc2", [VectorType<UChar, 2>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc4", [VectorType<UChar, 4>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc8", [VectorType<UChar, 8>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_uc16", [VectorType<UChar, 16>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_uc", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, UChar], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc2", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UChar, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc4", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UChar, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc8", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UChar, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_uc16", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<UChar, 16>], Attr.Convergent>;
+}
+
+// cl_intel_subgroups_long extends block read/write for 64-bit integer types.
+// Scalar long/ulong shuffle overloads are part of cl_intel_subgroups itself.
+let Extension = FuncExtIntelSubgroupsLong in {
+  def : Builtin<"intel_sub_group_block_read_ul", [ULong, ImageType<Image2d, 
"RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul16", [VectorType<ULong, 16>, 
ImageType<Image2d, "RO">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_read_ul", [ULong, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, 
PointerType<ConstType<ULong>, GlobalAS>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ul", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, ULong], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul2", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<ULong, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul4", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<ULong, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul8", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<ULong, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, 
"WO">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ul", [Void, PointerType<ULong, 
GlobalAS>, ULong], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul2", [Void, PointerType<ULong, 
GlobalAS>, VectorType<ULong, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul4", [Void, PointerType<ULong, 
GlobalAS>, VectorType<ULong, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul8", [Void, PointerType<ULong, 
GlobalAS>, VectorType<ULong, 8>], Attr.Convergent>;
+}
+
+let Extension = FuncExtIntelSubgroupsLongRWImages in {
+  def : Builtin<"intel_sub_group_block_read_ul", [ULong, ImageType<Image2d, 
"RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul2", [VectorType<ULong, 2>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul4", [VectorType<ULong, 4>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul8", [VectorType<ULong, 8>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_read_ul16", [VectorType<ULong, 16>, 
ImageType<Image2d, "RW">, VectorType<Int, 2>], Attr.Convergent>;
+
+  def : Builtin<"intel_sub_group_block_write_ul", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, ULong], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul2", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<ULong, 2>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul4", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<ULong, 4>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul8", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<ULong, 8>], Attr.Convergent>;
+  def : Builtin<"intel_sub_group_block_write_ul16", [Void, ImageType<Image2d, 
"RW">, VectorType<Int, 2>, VectorType<ULong, 16>], Attr.Convergent>;
+}
+
 //--------------------------------------------------------------------
 // Arm extensions.
 let Extension = ArmIntegerDotProductInt8 in {
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index e11bbd7085798..26a706eb81aa1 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7867,11 +7867,13 @@ void Sema::AddFunctionCandidates(const 
UnresolvedSetImpl &Fns,
       // This branch handles both standalone functions and static methods.
 
       // Slice the first argument (which is the base) when we access
-      // static method as non-static.
+      // static operator() as non-static.
       if (Args.size() > 0 &&
-          (!Args[0] || (FirstArgumentIsBase && isa<CXXMethodDecl>(FD) &&
-                        !isa<CXXConstructorDecl>(FD)))) {
-        assert(cast<CXXMethodDecl>(FD)->isStatic());
+          (!Args[0] ||
+           (isa<CXXMethodDecl>(FD) && !isa<CXXConstructorDecl>(FD) &&
+            (FirstArgumentIsBase ||
+             (cast<CXXMethodDecl>(FD)->isStatic() &&
+              FD->getOverloadedOperator() == OO_Call))))) {
         FunctionArgs = Args.slice(1);
       }
       if (FunTmpl) {
diff --git a/clang/test/SemaOpenCL/intel-subgroups-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl
new file mode 100644
index 0000000000000..473ab5296113d
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroups-builtins.cl
@@ -0,0 +1,139 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float16 __attribute__((ext_vector_type(16)));
+typedef int int16 __attribute__((ext_vector_type(16)));
+typedef long long2 __attribute__((ext_vector_type(2)));
+typedef uint uint2 __attribute__((ext_vector_type(2)));
+typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef uint uint8 __attribute__((ext_vector_type(8)));
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+float3 test_shuffle_float3(float3 value) {
+  return intel_sub_group_shuffle(value, 1u);
+}
+
+int16 test_shuffle_xor_int16(int16 value) {
+  return intel_sub_group_shuffle_xor(value, 1u);
+}
+
+uint8 test_shuffle_down_uint8(uint8 current, uint8 next) {
+  return intel_sub_group_shuffle_down(current, next, 1u);
+}
+
+uint8 test_shuffle_up_uint8(uint8 previous, uint8 current) {
+  return intel_sub_group_shuffle_up(previous, current, 1u);
+}
+
+half test_shuffle_half(half value) {
+  return intel_sub_group_shuffle(value, 1u);
+}
+
+double test_shuffle_double(double value) {
+  return intel_sub_group_shuffle_xor(value, 1u);
+}
+
+long test_shuffle_long(long value) {
+  return intel_sub_group_shuffle(value, 1u);
+}
+
+ulong test_shuffle_ulong(ulong value) {
+  return intel_sub_group_shuffle_xor(value, 1u);
+}
+
+uint test_block_read_global(const __global uint *in) {
+  return intel_sub_group_block_read(in);
+}
+
+uint2 test_block_read2_global(const __global uint *in) {
+  return intel_sub_group_block_read2(in);
+}
+
+uint4 test_block_read4_global(const __global uint *in) {
+  return intel_sub_group_block_read4(in);
+}
+
+uint8 test_block_read8_global(const __global uint *in) {
+  return intel_sub_group_block_read8(in);
+}
+
+uint test_block_read_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read(image, coord);
+}
+
+uint2 test_block_read2_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read2(image, coord);
+}
+
+uint4 test_block_read4_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read4(image, coord);
+}
+
+uint8 test_block_read8_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read8(image, coord);
+}
+
+void test_block_write_global(__global uint *out, uint value, uint2 value2,
+                             uint4 value4, uint8 value8) {
+  intel_sub_group_block_write(out, value);
+  intel_sub_group_block_write2(out, value2);
+  intel_sub_group_block_write4(out, value4);
+  intel_sub_group_block_write8(out, value8);
+}
+
+void test_block_write_image(write_only image2d_t image, read_write image2d_t 
rw,
+                            int2 coord, uint value, uint2 value2,
+                            uint4 value4, uint8 value8) {
+  intel_sub_group_block_write(image, coord, value);
+  intel_sub_group_block_write2(image, coord, value2);
+  intel_sub_group_block_write4(rw, coord, value4);
+  intel_sub_group_block_write8(rw, coord, value8);
+}
+
+uint test_block_read_ui_global(const __global uint *in) {
+  return intel_sub_group_block_read_ui(in);
+}
+
+uint2 test_block_read_ui2_global(const __global uint *in) {
+  return intel_sub_group_block_read_ui2(in);
+}
+
+uint4 test_block_read_ui4_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_ui4(image, coord);
+}
+
+uint8 test_block_read_ui8_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_ui8(image, coord);
+}
+
+void test_block_write_ui_global(__global uint *out, uint value, uint2 value2,
+                                uint4 value4, uint8 value8) {
+  intel_sub_group_block_write_ui(out, value);
+  intel_sub_group_block_write_ui2(out, value2);
+  intel_sub_group_block_write_ui4(out, value4);
+  intel_sub_group_block_write_ui8(out, value8);
+}
+
+void test_block_write_ui_image(read_only image2d_t image,
+                               read_write image2d_t rw, int2 coord,
+                               uint value, uint2 value2, uint4 value4,
+                               uint8 value8) {
+  intel_sub_group_block_write_ui(image, coord, value);
+  intel_sub_group_block_write_ui2(image, coord, value2);
+  intel_sub_group_block_write_ui4(rw, coord, value4);
+  intel_sub_group_block_write_ui8(rw, coord, value8);
+}
+
+void test_long_vectors_rejected(long2 value) {
+  (void)intel_sub_group_shuffle(value, 0u); // expected-error{{no matching 
function for call to 'intel_sub_group_shuffle'}}
+  // expected-note@-1 0+{{candidate function not viable}}
+}
diff --git a/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl
new file mode 100644
index 0000000000000..bad1128a10f6a
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroups-char-builtins.cl
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef char char3 __attribute__((ext_vector_type(3)));
+typedef char char8 __attribute__((ext_vector_type(8)));
+typedef char char16 __attribute__((ext_vector_type(16)));
+typedef uchar uchar2 __attribute__((ext_vector_type(2)));
+typedef uchar uchar4 __attribute__((ext_vector_type(4)));
+typedef uchar uchar8 __attribute__((ext_vector_type(8)));
+typedef uchar uchar16 __attribute__((ext_vector_type(16)));
+
+char3 test_broadcast_char3(char3 value) {
+  return intel_sub_group_broadcast(value, 1u);
+}
+
+uchar8 test_broadcast_uchar8(uchar8 value) {
+  return intel_sub_group_broadcast(value, 1u);
+}
+
+char16 test_shuffle_char16(char16 value) {
+  return intel_sub_group_shuffle(value, 1u);
+}
+
+uchar16 test_shuffle_xor_uchar16(uchar16 value) {
+  return intel_sub_group_shuffle_xor(value, 1u);
+}
+
+char16 test_shuffle_down_char16(char16 current, char16 next) {
+  return intel_sub_group_shuffle_down(current, next, 1u);
+}
+
+uchar16 test_shuffle_up_uchar16(uchar16 previous, uchar16 current) {
+  return intel_sub_group_shuffle_up(previous, current, 1u);
+}
+
+char test_collectives_char(char value) {
+  value = intel_sub_group_reduce_add(value);
+  value = intel_sub_group_reduce_min(value);
+  value = intel_sub_group_reduce_max(value);
+  value = intel_sub_group_scan_exclusive_add(value);
+  value = intel_sub_group_scan_exclusive_min(value);
+  value = intel_sub_group_scan_exclusive_max(value);
+  value = intel_sub_group_scan_inclusive_add(value);
+  value = intel_sub_group_scan_inclusive_min(value);
+  value = intel_sub_group_scan_inclusive_max(value);
+  return value;
+}
+
+uchar test_collectives_uchar(uchar value) {
+  value = intel_sub_group_reduce_add(value);
+  value = intel_sub_group_reduce_min(value);
+  value = intel_sub_group_reduce_max(value);
+  value = intel_sub_group_scan_exclusive_add(value);
+  value = intel_sub_group_scan_exclusive_min(value);
+  value = intel_sub_group_scan_exclusive_max(value);
+  value = intel_sub_group_scan_inclusive_add(value);
+  value = intel_sub_group_scan_inclusive_min(value);
+  value = intel_sub_group_scan_inclusive_max(value);
+  return value;
+}
+
+uchar test_block_read_uc_global(const __global uchar *in) {
+  return intel_sub_group_block_read_uc(in);
+}
+
+uchar2 test_block_read_uc2_global(const __global uchar *in) {
+  return intel_sub_group_block_read_uc2(in);
+}
+
+uchar4 test_block_read_uc4_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_uc4(image, coord);
+}
+
+uchar8 test_block_read_uc8_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_uc8(image, coord);
+}
+
+uchar16 test_block_read_uc16_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_uc16(image, coord);
+}
+
+void test_block_write_uc(__global uchar *out, write_only image2d_t image,
+                         read_write image2d_t rw, int2 coord, uchar value,
+                         uchar2 value2, uchar4 value4, uchar8 value8,
+                         uchar16 value16) {
+  intel_sub_group_block_write_uc(out, value);
+  intel_sub_group_block_write_uc2(out, value2);
+  intel_sub_group_block_write_uc4(out, value4);
+  intel_sub_group_block_write_uc8(out, value8);
+  intel_sub_group_block_write_uc16(out, value16);
+  intel_sub_group_block_write_uc(image, coord, value);
+  intel_sub_group_block_write_uc2(image, coord, value2);
+  intel_sub_group_block_write_uc4(image, coord, value4);
+  intel_sub_group_block_write_uc8(rw, coord, value8);
+  intel_sub_group_block_write_uc16(rw, coord, value16);
+}
+
+void test_broadcast_char16_rejected(char16 value) {
+  (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching 
function for call to 'intel_sub_group_broadcast'}}
+  // expected-note@-1 0+{{candidate function not viable}}
+}
diff --git a/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl
new file mode 100644
index 0000000000000..dce694f8635fe
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroups-long-builtins.cl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+// expected-no-diagnostics
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef ulong ulong2 __attribute__((ext_vector_type(2)));
+typedef ulong ulong4 __attribute__((ext_vector_type(4)));
+typedef ulong ulong8 __attribute__((ext_vector_type(8)));
+typedef ulong ulong16 __attribute__((ext_vector_type(16)));
+
+long test_shuffle_long(long value) {
+  value = intel_sub_group_shuffle(value, 1u);
+  value = intel_sub_group_shuffle_xor(value, 1u);
+  value = intel_sub_group_shuffle_down(value, value, 1u);
+  value = intel_sub_group_shuffle_up(value, value, 1u);
+  return value;
+}
+
+ulong test_shuffle_ulong(ulong value) {
+  value = intel_sub_group_shuffle(value, 1u);
+  value = intel_sub_group_shuffle_xor(value, 1u);
+  value = intel_sub_group_shuffle_down(value, value, 1u);
+  value = intel_sub_group_shuffle_up(value, value, 1u);
+  return value;
+}
+
+ulong test_block_read_ul_global(const __global ulong *in) {
+  return intel_sub_group_block_read_ul(in);
+}
+
+ulong2 test_block_read_ul2_global(const __global ulong *in) {
+  return intel_sub_group_block_read_ul2(in);
+}
+
+ulong4 test_block_read_ul4_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_ul4(image, coord);
+}
+
+ulong8 test_block_read_ul8_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_ul8(image, coord);
+}
+
+ulong16 test_block_read_ul16_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_ul16(image, coord);
+}
+
+void test_block_write_ul(__global ulong *out, write_only image2d_t image,
+                         read_write image2d_t rw, int2 coord, ulong value,
+                         ulong2 value2, ulong4 value4, ulong8 value8,
+                         ulong16 value16) {
+  intel_sub_group_block_write_ul(out, value);
+  intel_sub_group_block_write_ul2(out, value2);
+  intel_sub_group_block_write_ul4(out, value4);
+  intel_sub_group_block_write_ul8(out, value8);
+  intel_sub_group_block_write_ul(image, coord, value);
+  intel_sub_group_block_write_ul2(image, coord, value2);
+  intel_sub_group_block_write_ul4(image, coord, value4);
+  intel_sub_group_block_write_ul8(rw, coord, value8);
+  intel_sub_group_block_write_ul16(rw, coord, value16);
+}
diff --git a/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl 
b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl
new file mode 100644
index 0000000000000..237974733335d
--- /dev/null
+++ b/clang/test/SemaOpenCL/intel-subgroups-short-builtins.cl
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -triple spir-unknown-unknown -cl-std=CL3.0 
-fdeclare-opencl-builtins -verify -fsyntax-only %s
+
+// Keep this test header-free so it exercises OpenCLBuiltins.td instead of
+// declarations from opencl-c.h.
+
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef short short3 __attribute__((ext_vector_type(3)));
+typedef short short8 __attribute__((ext_vector_type(8)));
+typedef short short16 __attribute__((ext_vector_type(16)));
+typedef ushort ushort2 __attribute__((ext_vector_type(2)));
+typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+typedef ushort ushort8 __attribute__((ext_vector_type(8)));
+typedef ushort ushort16 __attribute__((ext_vector_type(16)));
+
+short3 test_broadcast_short3(short3 value) {
+  return intel_sub_group_broadcast(value, 1u);
+}
+
+ushort8 test_broadcast_ushort8(ushort8 value) {
+  return intel_sub_group_broadcast(value, 1u);
+}
+
+short16 test_shuffle_short16(short16 value) {
+  return intel_sub_group_shuffle(value, 1u);
+}
+
+ushort16 test_shuffle_xor_ushort16(ushort16 value) {
+  return intel_sub_group_shuffle_xor(value, 1u);
+}
+
+short16 test_shuffle_down_short16(short16 current, short16 next) {
+  return intel_sub_group_shuffle_down(current, next, 1u);
+}
+
+ushort16 test_shuffle_up_ushort16(ushort16 previous, ushort16 current) {
+  return intel_sub_group_shuffle_up(previous, current, 1u);
+}
+
+short test_collectives_short(short value) {
+  value = intel_sub_group_reduce_add(value);
+  value = intel_sub_group_reduce_min(value);
+  value = intel_sub_group_reduce_max(value);
+  value = intel_sub_group_scan_exclusive_add(value);
+  value = intel_sub_group_scan_exclusive_min(value);
+  value = intel_sub_group_scan_exclusive_max(value);
+  value = intel_sub_group_scan_inclusive_add(value);
+  value = intel_sub_group_scan_inclusive_min(value);
+  value = intel_sub_group_scan_inclusive_max(value);
+  return value;
+}
+
+ushort test_collectives_ushort(ushort value) {
+  value = intel_sub_group_reduce_add(value);
+  value = intel_sub_group_reduce_min(value);
+  value = intel_sub_group_reduce_max(value);
+  value = intel_sub_group_scan_exclusive_add(value);
+  value = intel_sub_group_scan_exclusive_min(value);
+  value = intel_sub_group_scan_exclusive_max(value);
+  value = intel_sub_group_scan_inclusive_add(value);
+  value = intel_sub_group_scan_inclusive_min(value);
+  value = intel_sub_group_scan_inclusive_max(value);
+  return value;
+}
+
+ushort test_block_read_us_global(const __global ushort *in) {
+  return intel_sub_group_block_read_us(in);
+}
+
+ushort2 test_block_read_us2_global(const __global ushort *in) {
+  return intel_sub_group_block_read_us2(in);
+}
+
+ushort4 test_block_read_us4_image(read_only image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_us4(image, coord);
+}
+
+ushort8 test_block_read_us8_rw_image(read_write image2d_t image, int2 coord) {
+  return intel_sub_group_block_read_us8(image, coord);
+}
+
+ushort16 test_block_read_us16_rw_image(read_write image2d_t image, int2 coord) 
{
+  return intel_sub_group_block_read_us16(image, coord);
+}
+
+void test_block_write_us(__global ushort *out, write_only image2d_t image,
+                         read_write image2d_t rw, int2 coord, ushort value,
+                         ushort2 value2, ushort4 value4, ushort8 value8,
+                         ushort16 value16) {
+  intel_sub_group_block_write_us(out, value);
+  intel_sub_group_block_write_us2(out, value2);
+  intel_sub_group_block_write_us4(out, value4);
+  intel_sub_group_block_write_us8(out, value8);
+  intel_sub_group_block_write_us16(out, value16);
+  intel_sub_group_block_write_us(image, coord, value);
+  intel_sub_group_block_write_us2(image, coord, value2);
+  intel_sub_group_block_write_us4(image, coord, value4);
+  intel_sub_group_block_write_us8(rw, coord, value8);
+  intel_sub_group_block_write_us16(rw, coord, value16);
+}
+
+void test_broadcast_short16_rejected(short16 value) {
+  (void)intel_sub_group_broadcast(value, 0u); // expected-error{{no matching 
function for call to 'intel_sub_group_broadcast'}}
+  // expected-note@-1 0+{{candidate function not viable}}
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to