[clang] [llvm] [PowerPC] Implement Deeply Compressed Weights Builtins (PR #184666)

Lei Huang via cfe-commits Thu, 26 Mar 2026 09:47:25 -0700

https://github.com/lei137 updated 
https://github.com/llvm/llvm-project/pull/184666


>From 964ffb48a9caeb7a4cc8e167544f292572a191f6 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 4 Mar 2026 14:12:42 -0500
Subject: [PATCH 1/7] [PowerPC] Implement Deeply Compressed Weights Builtins

Add support for the following deeply compressed weights builtins for ISA Future.
- vec_uncompresshn(vector unsigned char, vector unsigned char)
- vec_uncompressln(vector unsigned char, vector unsigned char)
- vec_uncompresshb(vector unsigned char, vector unsigned char)
- vec_uncompresslb(vector unsigned char, vector unsigned char)
- vec_uncompresshh(vector unsigned char, vector unsigned char)
- vec_uncompresslh(vector unsigned char, vector unsigned char)
- vec_unpack_hsn_to_byte(vector unsigned char)
- vec_unpack_lsn_to_byte(vector unsigned char)
- vec_unpack_int4_to_bf16(vector unsigned char, uint2)
- vec_unpack_int8_to_bf16(vector unsigned char, uint1)
- vec_unpack_int4_to_fp32(vector unsigned char, uint3)
- vec_unpack_int8_to_fp32(vector unsigned char, uint2)
---
 clang/include/clang/Basic/BuiltinsPPC.def     |  26 ++
 clang/lib/Basic/Targets/PPC.cpp               |   4 +
 clang/lib/Basic/Targets/PPC.h                 |   1 +
 clang/lib/Headers/altivec.h                   |  58 +++++
 clang/lib/Sema/SemaPPC.cpp                    |   8 +
 .../builtins-ppc-deeply-compressed-weights.c  | 194 ++++++++++++++
 ...tins-ppc-deeply-compressed-weights-error.c |  54 ++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  30 +++
 llvm/lib/Target/PowerPC/PPCInstrFuture.td     |  48 +++-
 .../PowerPC/deeply-compressed-weights.ll      | 244 ++++++++++++++++++
 10 files changed, 655 insertions(+), 12 deletions(-)
 create mode 100644 
clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
 create mode 100644 
clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
 create mode 100644 llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def 
b/clang/include/clang/Basic/BuiltinsPPC.def
index c0c92c0b73793..f99a019b71f2b 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1162,6 +1162,32 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, 
"vW1024*W256V",
 UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
                              "mma,isa-future-instructions")
 
+// Deeply Compressed Weights built-ins.
+TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "",
+               "isa-future-instructions")
+
 // FIXME: Obviously incomplete.
 
 #undef BUILTIN
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 30ea714fbb6f8..90e2050e4d1d4 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -59,6 +59,8 @@ bool 
PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasP9Vector = true;
     } else if (Feature == "+power10-vector") {
       HasP10Vector = true;
+    } else if (Feature == "+isa-future-instructions") {
+      HasFutureVector = true;
     } else if (Feature == "+pcrelative-memops") {
       HasPCRelativeMemops = true;
     } else if (Feature == "+spe" || Feature == "+efpu2") {
@@ -434,6 +436,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__POWER10_VECTOR__");
   if (HasPCRelativeMemops)
     Builder.defineMacro("__PCREL__");
+  if (HasFutureVector)
+    Builder.defineMacro("__FUTURE_VECTOR__");
 
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 6f90ff1f5d57c..a9f49aa3aebe1 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   bool HasFrsqrte = false;
   bool HasFrsqrtes = false;
   bool HasP10Vector = false;
+  bool HasFutureVector = false;
   bool HasPCRelativeMemops = false;
   bool HasQuadwordAtomics = false;
   bool UseLongCalls = false;
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 1c778ea0a829f..3de356a1a0e4d 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19314,6 +19314,64 @@ vec_sra(vector signed __int128 __a, vector unsigned 
__int128 __b) {
 #endif /* __SIZEOF_INT128__ */
 #endif /* __POWER10_VECTOR__ */
 
+#ifdef __FUTURE_VECTOR__
+
+/* vec_uncompress* - Deeply Compressed Weights builtins */
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshn(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhn(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompressln(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprln(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslb(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlb(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresshh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprhh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_uncompresslh(vector unsigned char __a, vector unsigned char __b) {
+  return __builtin_altivec_vucmprlh(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_hsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupkhsntob(__a);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_unpack_lsn_to_byte(vector unsigned char __a) {
+  return __builtin_altivec_vupklsntob(__a);
+}
+
+#define vec_unpack_int4_to_bf16(__a, __imm) \
+  __builtin_altivec_vupkint4tobf16((__a), (__imm))
+
+#define vec_unpack_int8_to_bf16(__a, __imm) \
+  __builtin_altivec_vupkint8tobf16((__a), (__imm))
+
+#define vec_unpack_int4_to_fp32(__a, __imm) \
+  __builtin_altivec_vupkint4tofp32((__a), (__imm))
+
+#define vec_unpack_int8_to_fp32(__a, __imm) \
+  __builtin_altivec_vupkint8tofp32((__a), (__imm))
+
+#endif /* __FUTURE_VECTOR__ */
+
 #ifdef __POWER8_VECTOR__
 #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps))
 #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps))
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index 6a06dbf12c8dc..8a594fc86dea6 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -249,6 +249,14 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo 
&TI,
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
   case PPC::BI__builtin_vsx_xxpermx:
     return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint4tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
+  case PPC::BI__builtin_altivec_vupkint8tobf16:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1);
+  case PPC::BI__builtin_altivec_vupkint4tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 7);
+  case PPC::BI__builtin_altivec_vupkint8tofp32:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 1, 31);
diff --git 
a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c 
b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
new file mode 100644
index 0000000000000..3b4eb0faa27c2
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
@@ -0,0 +1,194 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64le-unknown-unknown \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+// AI Assisted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshn(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshn(void) {
+  return vec_uncompresshn(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompressln(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompressln(void) {
+  return vec_uncompressln(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshb(void) {
+  return vec_uncompresshb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslb(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslb(void) {
+  return vec_uncompresslb(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresshh(void) {
+  return vec_uncompresshh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslh(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 
x i8> [[TMP2]], <16 x i8> [[TMP3]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP4]]
+//
+vector unsigned char test_vec_uncompresslh(void) {
+  return vec_uncompresslh(vuca, vucb);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_hsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkhsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_hsn_to_byte(void) {
+  return vec_unpack_hsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_lsn_to_byte(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupklsntob(<16 x i8> [[TMP1]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+vector unsigned char test_vec_unpack_lsn_to_byte(void) {
+  return vec_unpack_lsn_to_byte(vuca);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint4tobf16(<16 x i8> [[TMP0]], i32 2)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_bf16(void) {
+  return vec_unpack_int4_to_bf16(vuca, 2);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_bf16(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint8tobf16(<16 x i8> [[TMP0]], i32 1)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_bf16(void) {
+  return vec_unpack_int8_to_bf16(vuca, 1);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint4tofp32(<16 x i8> [[TMP0]], i32 5)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int4_to_fp32(void) {
+  return vec_unpack_int4_to_fp32(vuca, 5);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_fp32(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i8> 
@llvm.ppc.altivec.vupkint8tofp32(<16 x i8> [[TMP0]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+vector unsigned char test_vec_unpack_int8_to_fp32(void) {
+  return vec_unpack_int8_to_fp32(vuca, 3);
+}
diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
new file mode 100644
index 0000000000000..5092b15731c81
--- /dev/null
+++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
@@ -0,0 +1,54 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify %s
+
+// AI Assissted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+vector signed int vsia;
+
+void test_invalid_params(void) {
+  vector unsigned char res;
+
+  // Test invalid parameter types
+  res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
+  res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}}
+  res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
+}
+
+void test_invalid_immediates(void) {
+  vector unsigned char res;
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
+  res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
+  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
+  res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}}
+  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}}
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
+  res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}}
+  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
+  res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
+  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
+}
+
+void test_non_constant_immediates(void) {
+  vector unsigned char res;
+  unsigned int imm = 1;
+
+  // Test non-constant immediate values
+  res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}}
+  res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}}
+  res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}}
+  res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td 
b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index bd8fb9e9a564d..a044b12347db5 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1362,6 +1362,36 @@ def int_ppc_altivec_vmulhsw : 
PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
 def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
 def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
 def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+// Deeply Compressed Weights Intrinsics.
+def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">;
+def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">;
+def int_ppc_altivec_vucmprhb : PowerPC_Vec_BBB_Intrinsic<"vucmprhb">;
+def int_ppc_altivec_vucmprlb : PowerPC_Vec_BBB_Intrinsic<"vucmprlb">;
+def int_ppc_altivec_vucmprhh : PowerPC_Vec_BBB_Intrinsic<"vucmprhh">;
+def int_ppc_altivec_vucmprlh : PowerPC_Vec_BBB_Intrinsic<"vucmprlh">;
+def int_ppc_altivec_vupkhsntob :
+    PowerPC_Vec_Intrinsic<"vupkhsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupklsntob :
+    PowerPC_Vec_Intrinsic<"vupklsntob", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_altivec_vupkint4tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint4tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tobf16 :
+    PowerPC_Vec_Intrinsic<"vupkint8tobf16", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint4tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint4tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_ppc_altivec_vupkint8tofp32 :
+    PowerPC_Vec_Intrinsic<"vupkint8tofp32", [llvm_v16i8_ty],
+                          [llvm_v16i8_ty, llvm_i32_ty],
+                          [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
 
 
//===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td 
b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index 0cd63a88cb96b..4236239f691c9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -431,38 +431,62 @@ let Predicates = [HasFutureVector] in {
   }
 
   def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupkhsntob $VRT, $VRB", []>;
+                                "vupkhsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupkhsntob v16i8:$VRB))]>;
   def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB),
-                                "vupklsntob $VRT, $VRB", []>;
+                                "vupklsntob $VRT, $VRB",
+                                [(set v16i8:$VRT,
+                                  (int_ppc_altivec_vupklsntob v16i8:$VRB))]>;
   def VUPKINT4TOBF16
       : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u2imm:$UIM),
-                          "vupkint4tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT8TOBF16
       : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u1imm:$UIM),
-                          "vupkint8tobf16 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tobf16 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT8TOFP32
       : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u2imm:$UIM),
-                          "vupkint8tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint8tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, 
timm:$UIM))]>;
   def VUPKINT4TOFP32
       : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, 
u3imm:$UIM),
-                          "vupkint4tofp32 $VRT, $VRB, $UIM", []>;
+                          "vupkint4tofp32 $VRT, $VRB, $UIM",
+                          [(set v16i8:$VRT,
+                            (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, 
timm:$UIM))]>;
 
   def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                               "vucmprhn $VRT, $VRA, $VRB", []>;
+                               "vucmprhn $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprhn v16i8:$VRA, 
v16i8:$VRB))]>;
   def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, 
vrrc:$VRB),
-                               "vucmprln $VRT, $VRA, $VRB", []>;
+                               "vucmprln $VRT, $VRA, $VRB",
+                               [(set v16i8:$VRT,
+                                 (int_ppc_altivec_vucmprln v16i8:$VRA, 
v16i8:$VRB))]>;
   def VUCMPRHB
       : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhb $VRT, $VRA, $VRB", []>;
+                      "vucmprhb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLB
       : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlb $VRT, $VRA, $VRB", []>;
+                      "vucmprlb $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlb v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRHH
       : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprhh $VRT, $VRA, $VRB", []>;
+                      "vucmprhh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprhh v16i8:$VRA, v16i8:$VRB))]>;
   def VUCMPRLH
       : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
-                      "vucmprlh $VRT, $VRA, $VRB", []>;
+                      "vucmprlh $VRT, $VRA, $VRB",
+                      [(set v16i8:$VRT,
+                        (int_ppc_altivec_vucmprlh v16i8:$VRA, v16i8:$VRB))]>;
 
   def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                             "xvrlw $XT, $XA, $XB",
diff --git a/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll 
b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
new file mode 100644
index 0000000000000..85f84ade7c3c1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; AI Assissted.
+
+declare <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8>)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8>, i32)
+declare <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8>, i32)
+
+define <16 x i8> @test_vucmprhn(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhn:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhn:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhn 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprln(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprln:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprln 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprln:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprln 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlb:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlb:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlb 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprhh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprhh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprhh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprhh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vucmprlh(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LE-LABEL: test_vucmprlh:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vucmprlh:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vucmprlh 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkhsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkhsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkhsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkhsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkhsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupklsntob(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupklsntob:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupklsntob 2, 2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupklsntob:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupklsntob 2, 2
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tobf16_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tobf16_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tobf16_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tobf16 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tobf16_1(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tobf16_1:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tobf16_1:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tobf16 2, 2, 1
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 1)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint4tofp32_7(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint4tofp32_7:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint4tofp32_7:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint4tofp32 2, 2, 7
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 7)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_0(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_0:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_0:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 0
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 0)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_vupkint8tofp32_3(<16 x i8> %a) {
+; CHECK-LE-LABEL: test_vupkint8tofp32_3:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_vupkint8tofp32_3:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    vupkint8tofp32 2, 2, 3
+; CHECK-BE-NEXT:    blr
+  %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 3)
+  ret <16 x i8> %res
+}

>From d18b9af50e92aea6d470c73a7ae9c1c79449de52 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 4 Mar 2026 22:58:00 +0000
Subject: [PATCH 2/7] fix format

---
 clang/lib/Headers/altivec.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 3de356a1a0e4d..c62dad5293a63 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19358,16 +19358,16 @@ vec_unpack_lsn_to_byte(vector unsigned char __a) {
   return __builtin_altivec_vupklsntob(__a);
 }
 
-#define vec_unpack_int4_to_bf16(__a, __imm) \
+#define vec_unpack_int4_to_bf16(__a, __imm)                                    
\
   __builtin_altivec_vupkint4tobf16((__a), (__imm))
 
-#define vec_unpack_int8_to_bf16(__a, __imm) \
+#define vec_unpack_int8_to_bf16(__a, __imm)                                    
\
   __builtin_altivec_vupkint8tobf16((__a), (__imm))
 
-#define vec_unpack_int4_to_fp32(__a, __imm) \
+#define vec_unpack_int4_to_fp32(__a, __imm)                                    
\
   __builtin_altivec_vupkint4tofp32((__a), (__imm))
 
-#define vec_unpack_int8_to_fp32(__a, __imm) \
+#define vec_unpack_int8_to_fp32(__a, __imm)                                    
\
   __builtin_altivec_vupkint8tofp32((__a), (__imm))
 
 #endif /* __FUTURE_VECTOR__ */

>From 78cffe29f11a6e2ae3364dc0cb1cc1aba384c47a Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 25 Mar 2026 18:17:24 -0400
Subject: [PATCH 3/7] add err checking and move file to PowerPC subdir

---
 ...tins-ppc-deeply-compressed-weights-error.c | 71 +++++++++++++++++++
 ...tins-ppc-deeply-compressed-weights-error.c | 54 --------------
 2 files changed, 71 insertions(+), 54 deletions(-)
 create mode 100644 
clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
 delete mode 100644 
clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c

diff --git 
a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
new file mode 100644
index 0000000000000..ca562b5bfc753
--- /dev/null
+++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
@@ -0,0 +1,71 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify=expected %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-feature +vsx \
+// RUN:   -target-feature +isa-future-instructions -verify=expected %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
+// RUN:   -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 %s
+
+// AI Assissted.
+
+#include <altivec.h>
+
+vector unsigned char vuca, vucb;
+vector signed int vsia;
+
+void test_invalid_params(void) {
+  vector unsigned char res;
+
+  // Test invalid parameter types
+  res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
+                                      // pwr10-error 
{{'__builtin_altivec_vuncompresshn' needs target feature 
isa-future-instructions}}
+  res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}} \
+                                      // pwr10-error 
{{'__builtin_altivec_vuncompressln' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
+                                      // pwr10-error 
{{'__builtin_altivec_vunpackhsntobyte' needs target feature 
isa-future-instructions}}
+}
+
+void test_invalid_immediates(void) {
+  vector unsigned char res;
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
+  res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
+                                          // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
+                                           // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
+  res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}} \
+                                          // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}} \
+                                           // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+
+  // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
+  res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}} \
+                                          // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}} \
+                                           // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+
+  // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
+  res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
+                                          // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
+                                           // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+}
+
+void test_non_constant_immediates(void) {
+  vector unsigned char res;
+  unsigned int imm = 1;
+
+  // Test non-constant immediate values
+  res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}} \
+                                            // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}} \
+                                            // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}} \
+                                            // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+  res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}} \
+                                            // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+}
diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
deleted file mode 100644
index 5092b15731c81..0000000000000
--- a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify %s
-// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify %s
-
-// AI Assissted.
-
-#include <altivec.h>
-
-vector unsigned char vuca, vucb;
-vector signed int vsia;
-
-void test_invalid_params(void) {
-  vector unsigned char res;
-
-  // Test invalid parameter types
-  res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
-  res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}}
-  res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}}
-}
-
-void test_invalid_immediates(void) {
-  vector unsigned char res;
-
-  // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
-  res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
-  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
-
-  // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
-  res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}}
-  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}}
-
-  // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
-  res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}}
-  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}}
-
-  // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
-  res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}}
-  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
-}
-
-void test_non_constant_immediates(void) {
-  vector unsigned char res;
-  unsigned int imm = 1;
-
-  // Test non-constant immediate values
-  res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}}
-  res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}}
-  res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}}
-  res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}}
-}

>From 1435285781a14262b38877f1607b603adf658142 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Wed, 25 Mar 2026 18:28:47 -0400
Subject: [PATCH 4/7] update pwr10 calls

---
 ...tins-ppc-deeply-compressed-weights-error.c | 41 ++++++++++++-------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git 
a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
index ca562b5bfc753..7cc1a0c429e7d 100644
--- a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
+++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
@@ -20,11 +20,14 @@ void test_invalid_params(void) {
 
   // Test invalid parameter types
   res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
-                                      // pwr10-error 
{{'__builtin_altivec_vuncompresshn' needs target feature 
isa-future-instructions}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_uncompresshn'}} \
+                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
   res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}} \
-                                      // pwr10-error 
{{'__builtin_altivec_vuncompressln' needs target feature 
isa-future-instructions}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_uncompressln'}} \
+                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
   res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
-                                      // pwr10-error 
{{'__builtin_altivec_vunpackhsntobyte' needs target feature 
isa-future-instructions}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_unpack_hsn_to_byte'}} \
+                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
 }
 
 void test_invalid_immediates(void) {
@@ -32,27 +35,31 @@ void test_invalid_immediates(void) {
 
   // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
   res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
-                                          // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_bf16'}} \
+                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
-                                           // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
 
   // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
   res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}} \
-                                          // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_bf16'}} \
+                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}} \
-                                           // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
 
   // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
   res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}} \
-                                          // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_fp32'}} \
+                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}} \
-                                           // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
 
   // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
   res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
-                                          // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_fp32'}} \
+                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
-                                           // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
 }
 
 void test_non_constant_immediates(void) {
@@ -61,11 +68,15 @@ void test_non_constant_immediates(void) {
 
   // Test non-constant immediate values
   res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}} \
-                                            // pwr10-error 
{{'__builtin_altivec_vupkint4tobf16' needs target feature 
isa-future-instructions}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_bf16'}} \
+                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}} \
-                                            // pwr10-error 
{{'__builtin_altivec_vupkint8tobf16' needs target feature 
isa-future-instructions}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_bf16'}} \
+                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}} \
-                                            // pwr10-error 
{{'__builtin_altivec_vupkint4tofp32' needs target feature 
isa-future-instructions}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_fp32'}} \
+                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
   res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}} \
-                                            // pwr10-error 
{{'__builtin_altivec_vupkint8tofp32' needs target feature 
isa-future-instructions}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_fp32'}} \
+                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
 }

>From 5aacf672285e554b8d96d918be4d69c2e41c33a2 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Thu, 26 Mar 2026 10:31:38 -0400
Subject: [PATCH 5/7] update to use future-vector

---
 clang/include/clang/Basic/BuiltinsPPC.def | 26 +++++++++++------------
 clang/lib/Basic/Targets/PPC.cpp           |  2 +-
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def 
b/clang/include/clang/Basic/BuiltinsPPC.def
index f99a019b71f2b..8422d37e30688 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1164,29 +1164,27 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, 
"vW1024*W256Vi255i15i3",
 
 // Deeply Compressed Weights built-ins.
 TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "",
-               "isa-future-instructions")
-TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "",
-               "isa-future-instructions")
-TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "",
-               "isa-future-instructions")
+               "future-vector")
+TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "", "future-vector")
+TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "", "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "",
-               "isa-future-instructions")
+               "future-vector")
 TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "",
-               "isa-future-instructions")
+               "future-vector")
 
 // FIXME: Obviously incomplete.
 
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 90e2050e4d1d4..c9a41df806aff 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -59,7 +59,7 @@ bool 
PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasP9Vector = true;
     } else if (Feature == "+power10-vector") {
       HasP10Vector = true;
-    } else if (Feature == "+isa-future-instructions") {
+    } else if (Feature == "+future-vector") {
       HasFutureVector = true;
     } else if (Feature == "+pcrelative-memops") {
       HasPCRelativeMemops = true;

>From 9119862c4ad45d02a64409c81f3ceb3bfe5919e8 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Thu, 26 Mar 2026 11:22:21 -0400
Subject: [PATCH 6/7] cleanup test

---
 ...tins-ppc-deeply-compressed-weights-error.c | 53 +++++++------------
 1 file changed, 18 insertions(+), 35 deletions(-)

diff --git 
a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
index 7cc1a0c429e7d..243e179d01834 100644
--- a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
+++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
@@ -1,12 +1,10 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify=expected %s
+// RUN:   -flax-vector-conversions=none -target-cpu future -verify=expected %s
 // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -verify=expected %s
+// RUN:   -flax-vector-conversions=none -target-cpu future -verify=expected %s
 // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \
-// RUN:   -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 %s
+// RUN:   -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 
-verify-ignore-unexpected=error %s
 
 // AI Assissted.
 
@@ -20,14 +18,11 @@ void test_invalid_params(void) {
 
   // Test invalid parameter types
   res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
-                                      // pwr10-error {{call to undeclared 
function 'vec_uncompresshn'}} \
-                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_uncompresshn'}}
   res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__b' here}} \
-                                      // pwr10-error {{call to undeclared 
function 'vec_uncompressln'}} \
-                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_uncompressln'}}
   res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector 
int' (vector of 4 'int' values) to parameter of incompatible type '__vector 
unsigned char' (vector of 16 'unsigned char' values)}} 
[email protected]:* {{passing argument to parameter '__a' here}} \
-                                      // pwr10-error {{call to undeclared 
function 'vec_unpack_hsn_to_byte'}} \
-                                      // pwr10-error {{assigning to '__vector 
unsigned char' (vector of 16 'unsigned char' values) from incompatible type 
'int'}}
+                                      // pwr10-error {{call to undeclared 
function 'vec_unpack_hsn_to_byte'}}
 }
 
 void test_invalid_immediates(void) {
@@ -35,31 +30,23 @@ void test_invalid_immediates(void) {
 
   // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid 
range: 0-3)
   res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
-                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_bf16'}} \
-                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
-  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
-                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_bf16'}}
+  res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
 
   // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid 
range: 0-1)
   res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 
is outside the valid range [0, 1]}} \
-                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_bf16'}} \
-                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
-  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}} \
-                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_bf16'}}
+  res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 1]}}
 
   // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid 
range: 0-7)
   res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 
is outside the valid range [0, 7]}} \
-                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_fp32'}} \
-                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
-  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}} \
-                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int4_to_fp32'}}
+  res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 7]}}
 
   // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid 
range: 0-3)
   res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 
is outside the valid range [0, 3]}} \
-                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_fp32'}} \
-                                          // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
-  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}} \
-                                           // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                          // pwr10-error {{call to undeclared 
function 'vec_unpack_int8_to_fp32'}}
+  res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value 
-1 is outside the valid range [0, 3]}}
 }
 
 void test_non_constant_immediates(void) {
@@ -68,15 +55,11 @@ void test_non_constant_immediates(void) {
 
   // Test non-constant immediate values
   res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tobf16' must be a constant integer}} \
-                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_bf16'}} \
-                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_bf16'}}
   res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tobf16' must be a constant integer}} \
-                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_bf16'}} \
-                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_bf16'}}
   res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint4tofp32' must be a constant integer}} \
-                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_fp32'}} \
-                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int4_to_fp32'}}
   res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to 
'__builtin_altivec_vupkint8tofp32' must be a constant integer}} \
-                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_fp32'}} \
-                                            // pwr10-error {{assigning to 
'__vector unsigned char' (vector of 16 'unsigned char' values) from 
incompatible type 'int'}}
+                                            // pwr10-error {{call to 
undeclared function 'vec_unpack_int8_to_fp32'}}
 }

>From 39a0cb329a8f0745e560a867eaac381d35471cc4 Mon Sep 17 00:00:00 2001
From: Lei Huang <[email protected]>
Date: Thu, 26 Mar 2026 11:27:51 -0400
Subject: [PATCH 7/7] cleanup tests

---
 ...-weights.c => builtins-deeply-compressed-weights.c} | 10 ++++------
 ...or.c => builtins-deeply-compressed-weights-error.c} |  0
 2 files changed, 4 insertions(+), 6 deletions(-)
 rename clang/test/CodeGen/PowerPC/{builtins-ppc-deeply-compressed-weights.c => 
builtins-deeply-compressed-weights.c} (95%)
 rename clang/test/Sema/PowerPC/{builtins-ppc-deeply-compressed-weights-error.c 
=> builtins-deeply-compressed-weights-error.c} (100%)

diff --git 
a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c 
b/clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c
similarity index 95%
rename from clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
rename to clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c
index 3b4eb0faa27c2..664e2ffa34295 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c
+++ b/clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c
@@ -1,11 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
 // REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64-unknown-unknown \
-// RUN:   -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \
-// RUN:   -target-feature +isa-future-instructions -triple 
powerpc64le-unknown-unknown \
-// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \
+// RUN:   -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \
+// RUN:   -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s
 
 // AI Assisted.
 
diff --git 
a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c 
b/clang/test/Sema/PowerPC/builtins-deeply-compressed-weights-error.c
similarity index 100%
rename from 
clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c
rename to clang/test/Sema/PowerPC/builtins-deeply-compressed-weights-error.c

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [PowerPC] Implement Deeply Compressed Weights Builtins (PR #184666)

Reply via email to