https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/184666
>From 964ffb48a9caeb7a4cc8e167544f292572a191f6 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 4 Mar 2026 14:12:42 -0500 Subject: [PATCH 1/7] [PowerPC] Implement Deeply Compressed Weights Builtins Add support for the following deeply compressed weights builtins for ISA Future. - vec_uncompresshn(vector unsigned char, vector unsigned char) - vec_uncompressln(vector unsigned char, vector unsigned char) - vec_uncompresshb(vector unsigned char, vector unsigned char) - vec_uncompresslb(vector unsigned char, vector unsigned char) - vec_uncompresshh(vector unsigned char, vector unsigned char) - vec_uncompresslh(vector unsigned char, vector unsigned char) - vec_unpack_hsn_to_byte(vector unsigned char) - vec_unpack_lsn_to_byte(vector unsigned char) - vec_unpack_int4_to_bf16(vector unsigned char, uint2) - vec_unpack_int8_to_bf16(vector unsigned char, uint1) - vec_unpack_int4_to_fp32(vector unsigned char, uint3) - vec_unpack_int8_to_fp32(vector unsigned char, uint2) --- clang/include/clang/Basic/BuiltinsPPC.def | 26 ++ clang/lib/Basic/Targets/PPC.cpp | 4 + clang/lib/Basic/Targets/PPC.h | 1 + clang/lib/Headers/altivec.h | 58 +++++ clang/lib/Sema/SemaPPC.cpp | 8 + .../builtins-ppc-deeply-compressed-weights.c | 194 ++++++++++++++ ...tins-ppc-deeply-compressed-weights-error.c | 54 ++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 30 +++ llvm/lib/Target/PowerPC/PPCInstrFuture.td | 48 +++- .../PowerPC/deeply-compressed-weights.ll | 244 ++++++++++++++++++ 10 files changed, 655 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c create mode 100644 clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c create mode 100644 llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index c0c92c0b73793..f99a019b71f2b 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1162,6 +1162,32 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V", UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3", "mma,isa-future-instructions") +// Deeply Compressed Weights built-ins. +TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "", + "isa-future-instructions") +TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "", + "isa-future-instructions") + // FIXME: Obviously incomplete. #undef BUILTIN diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 30ea714fbb6f8..90e2050e4d1d4 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -59,6 +59,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasP9Vector = true; } else if (Feature == "+power10-vector") { HasP10Vector = true; + } else if (Feature == "+isa-future-instructions") { + HasFutureVector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; } else if (Feature == "+spe" || Feature == "+efpu2") { @@ -434,6 +436,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__POWER10_VECTOR__"); if (HasPCRelativeMemops) Builder.defineMacro("__PCREL__"); + if (HasFutureVector) + Builder.defineMacro("__FUTURE_VECTOR__"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 6f90ff1f5d57c..a9f49aa3aebe1 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool HasFrsqrte = false; bool HasFrsqrtes = false; bool HasP10Vector = false; + bool HasFutureVector = false; bool HasPCRelativeMemops = false; bool HasQuadwordAtomics = false; bool UseLongCalls = false; diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 1c778ea0a829f..3de356a1a0e4d 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -19314,6 +19314,64 @@ vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) { #endif /* __SIZEOF_INT128__ */ #endif /* __POWER10_VECTOR__ */ +#ifdef __FUTURE_VECTOR__ + +/* vec_uncompress* - Deeply Compressed Weights builtins */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshn(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhn(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompressln(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprln(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhb(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresslb(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprlb(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresshh(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprhh(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_uncompresslh(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vucmprlh(__a, __b); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_unpack_hsn_to_byte(vector unsigned char __a) { + return __builtin_altivec_vupkhsntob(__a); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_unpack_lsn_to_byte(vector unsigned char __a) { + return __builtin_altivec_vupklsntob(__a); +} + +#define vec_unpack_int4_to_bf16(__a, __imm) \ + __builtin_altivec_vupkint4tobf16((__a), (__imm)) + +#define vec_unpack_int8_to_bf16(__a, __imm) \ + __builtin_altivec_vupkint8tobf16((__a), (__imm)) + +#define vec_unpack_int4_to_fp32(__a, __imm) \ + __builtin_altivec_vupkint4tofp32((__a), (__imm)) + +#define vec_unpack_int8_to_fp32(__a, __imm) \ + __builtin_altivec_vupkint8tofp32((__a), (__imm)) + +#endif /* __FUTURE_VECTOR__ */ + #ifdef __POWER8_VECTOR__ #define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps)) #define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps)) diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 6a06dbf12c8dc..8a594fc86dea6 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -249,6 +249,14 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7); case PPC::BI__builtin_vsx_xxpermx: return SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 7); + case PPC::BI__builtin_altivec_vupkint4tobf16: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3); + case PPC::BI__builtin_altivec_vupkint8tobf16: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1); + case PPC::BI__builtin_altivec_vupkint4tofp32: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 7); + case PPC::BI__builtin_altivec_vupkint8tofp32: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 3); case PPC::BI__builtin_ppc_tw: case PPC::BI__builtin_ppc_tdw: return SemaRef.BuiltinConstantArgRange(TheCall, 2, 1, 31); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c new file mode 100644 index 0000000000000..3b4eb0faa27c2 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c @@ -0,0 +1,194 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// AI Assisted. + +#include <altivec.h> + +vector unsigned char vuca, vucb; + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshn( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshn(void) { + return vec_uncompresshn(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompressln( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompressln(void) { + return vec_uncompressln(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshb( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshb(void) { + return vec_uncompresshb(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslb( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresslb(void) { + return vec_uncompresslb(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresshh( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresshh(void) { + return vec_uncompresshh(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_uncompresslh( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[__B_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @vucb, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[__B_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +vector unsigned char test_vec_uncompresslh(void) { + return vec_uncompresslh(vuca, vucb); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_hsn_to_byte( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +vector unsigned char test_vec_unpack_hsn_to_byte(void) { + return vec_unpack_hsn_to_byte(vuca); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_lsn_to_byte( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: store <16 x i8> [[TMP0]], ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[__A_ADDR_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> [[TMP1]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +vector unsigned char test_vec_unpack_lsn_to_byte(void) { + return vec_unpack_lsn_to_byte(vuca); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_bf16( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> [[TMP0]], i32 2) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int4_to_bf16(void) { + return vec_unpack_int4_to_bf16(vuca, 2); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_bf16( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int8_to_bf16(void) { + return vec_unpack_int8_to_bf16(vuca, 1); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int4_to_fp32( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> [[TMP0]], i32 5) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int4_to_fp32(void) { + return vec_unpack_int4_to_fp32(vuca, 5); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vec_unpack_int8_to_fp32( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @vuca, align 16 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> [[TMP0]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +vector unsigned char test_vec_unpack_int8_to_fp32(void) { + return vec_unpack_int8_to_fp32(vuca, 3); +} diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c new file mode 100644 index 0000000000000..5092b15731c81 --- /dev/null +++ b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c @@ -0,0 +1,54 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify %s + +// AI Assissted. + +#include <altivec.h> + +vector unsigned char vuca, vucb; +vector signed int vsia; + +void test_invalid_params(void) { + vector unsigned char res; + + // Test invalid parameter types + res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} + res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} + res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} +} + +void test_invalid_immediates(void) { + vector unsigned char res; + + // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) + res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} + + // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) + res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} + + // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) + res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + + // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) + res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} +} + +void test_non_constant_immediates(void) { + vector unsigned char res; + unsigned int imm = 1; + + // Test non-constant immediate values + res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} + res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} + res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} + res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index bd8fb9e9a564d..a044b12347db5 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1362,6 +1362,36 @@ def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">; def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">; def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">; def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">; +// Deeply Compressed Weights Intrinsics. +def int_ppc_altivec_vucmprhn : PowerPC_Vec_BBB_Intrinsic<"vucmprhn">; +def int_ppc_altivec_vucmprln : PowerPC_Vec_BBB_Intrinsic<"vucmprln">; +def int_ppc_altivec_vucmprhb : PowerPC_Vec_BBB_Intrinsic<"vucmprhb">; +def int_ppc_altivec_vucmprlb : PowerPC_Vec_BBB_Intrinsic<"vucmprlb">; +def int_ppc_altivec_vucmprhh : PowerPC_Vec_BBB_Intrinsic<"vucmprhh">; +def int_ppc_altivec_vucmprlh : PowerPC_Vec_BBB_Intrinsic<"vucmprlh">; +def int_ppc_altivec_vupkhsntob : + PowerPC_Vec_Intrinsic<"vupkhsntob", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_altivec_vupklsntob : + PowerPC_Vec_Intrinsic<"vupklsntob", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_altivec_vupkint4tobf16 : + PowerPC_Vec_Intrinsic<"vupkint4tobf16", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint8tobf16 : + PowerPC_Vec_Intrinsic<"vupkint8tobf16", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint4tofp32 : + PowerPC_Vec_Intrinsic<"vupkint4tofp32", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; +def int_ppc_altivec_vupkint8tofp32 : + PowerPC_Vec_Intrinsic<"vupkint8tofp32", [llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Definitions. diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 0cd63a88cb96b..4236239f691c9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -431,38 +431,62 @@ let Predicates = [HasFutureVector] in { } def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), - "vupkhsntob $VRT, $VRB", []>; + "vupkhsntob $VRT, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkhsntob v16i8:$VRB))]>; def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB), - "vupklsntob $VRT, $VRB", []>; + "vupklsntob $VRT, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vupklsntob v16i8:$VRB))]>; def VUPKINT4TOBF16 : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), - "vupkint4tobf16 $VRT, $VRB, $UIM", []>; + "vupkint4tobf16 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint4tobf16 v16i8:$VRB, timm:$UIM))]>; def VUPKINT8TOBF16 : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, u1imm:$UIM), - "vupkint8tobf16 $VRT, $VRB, $UIM", []>; + "vupkint8tobf16 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint8tobf16 v16i8:$VRB, timm:$UIM))]>; def VUPKINT8TOFP32 : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), - "vupkint8tofp32 $VRT, $VRB, $UIM", []>; + "vupkint8tofp32 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint8tofp32 v16i8:$VRB, timm:$UIM))]>; def VUPKINT4TOFP32 : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u3imm:$UIM), - "vupkint4tofp32 $VRT, $VRB, $UIM", []>; + "vupkint4tofp32 $VRT, $VRB, $UIM", + [(set v16i8:$VRT, + (int_ppc_altivec_vupkint4tofp32 v16i8:$VRB, timm:$UIM))]>; def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhn $VRT, $VRA, $VRB", []>; + "vucmprhn $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhn v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprln $VRT, $VRA, $VRB", []>; + "vucmprln $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprln v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRHB : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhb $VRT, $VRA, $VRB", []>; + "vucmprhb $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhb v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLB : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprlb $VRT, $VRA, $VRB", []>; + "vucmprlb $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprlb v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRHH : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprhh $VRT, $VRA, $VRB", []>; + "vucmprhh $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprhh v16i8:$VRA, v16i8:$VRB))]>; def VUCMPRLH : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), - "vucmprlh $VRT, $VRA, $VRB", []>; + "vucmprlh $VRT, $VRA, $VRB", + [(set v16i8:$VRT, + (int_ppc_altivec_vucmprlh v16i8:$VRA, v16i8:$VRB))]>; def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvrlw $XT, $XA, $XB", diff --git a/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll new file mode 100644 index 0000000000000..85f84ade7c3c1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/deeply-compressed-weights.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s --check-prefix=CHECK-BE + +; AI Assissted. + +declare <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8>) +declare <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8>, i32) +declare <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8>, i32) + +define <16 x i8> @test_vucmprhn(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhn: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhn 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhn: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhn 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhn(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprln(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprln: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprln 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprln: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprln 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprln(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprhb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhb: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhb 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhb: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhb 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprlb(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprlb: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprlb 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprlb: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprlb 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprlb(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprhh(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprhh: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprhh 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprhh: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprhh 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprhh(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vucmprlh(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LE-LABEL: test_vucmprlh: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vucmprlh 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vucmprlh: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vucmprlh 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vucmprlh(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkhsntob(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkhsntob: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkhsntob 2, 2 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkhsntob: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkhsntob 2, 2 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkhsntob(<16 x i8> %a) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupklsntob(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupklsntob: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupklsntob 2, 2 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupklsntob: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupklsntob 2, 2 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupklsntob(<16 x i8> %a) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tobf16_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tobf16_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tobf16 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tobf16_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tobf16 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tobf16_3(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tobf16_3: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tobf16 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tobf16_3: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tobf16 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tobf16(<16 x i8> %a, i32 3) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tobf16_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tobf16_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tobf16 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tobf16_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tobf16 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tobf16_1(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tobf16_1: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tobf16 2, 2, 1 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tobf16_1: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tobf16 2, 2, 1 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tobf16(<16 x i8> %a, i32 1) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tofp32_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tofp32_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tofp32 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tofp32_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tofp32 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint4tofp32_7(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint4tofp32_7: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint4tofp32 2, 2, 7 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint4tofp32_7: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint4tofp32 2, 2, 7 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint4tofp32(<16 x i8> %a, i32 7) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tofp32_0(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tofp32_0: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tofp32 2, 2, 0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tofp32_0: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tofp32 2, 2, 0 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 0) + ret <16 x i8> %res +} + +define <16 x i8> @test_vupkint8tofp32_3(<16 x i8> %a) { +; CHECK-LE-LABEL: test_vupkint8tofp32_3: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: vupkint8tofp32 2, 2, 3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vupkint8tofp32_3: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: vupkint8tofp32 2, 2, 3 +; CHECK-BE-NEXT: blr + %res = call <16 x i8> @llvm.ppc.altivec.vupkint8tofp32(<16 x i8> %a, i32 3) + ret <16 x i8> %res +} >From d18b9af50e92aea6d470c73a7ae9c1c79449de52 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 4 Mar 2026 22:58:00 +0000 Subject: [PATCH 2/7] fix format --- clang/lib/Headers/altivec.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 3de356a1a0e4d..c62dad5293a63 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -19358,16 +19358,16 @@ vec_unpack_lsn_to_byte(vector unsigned char __a) { return __builtin_altivec_vupklsntob(__a); } -#define vec_unpack_int4_to_bf16(__a, __imm) \ +#define vec_unpack_int4_to_bf16(__a, __imm) \ __builtin_altivec_vupkint4tobf16((__a), (__imm)) -#define vec_unpack_int8_to_bf16(__a, __imm) \ +#define vec_unpack_int8_to_bf16(__a, __imm) \ __builtin_altivec_vupkint8tobf16((__a), (__imm)) -#define vec_unpack_int4_to_fp32(__a, __imm) \ +#define vec_unpack_int4_to_fp32(__a, __imm) \ __builtin_altivec_vupkint4tofp32((__a), (__imm)) -#define vec_unpack_int8_to_fp32(__a, __imm) \ +#define vec_unpack_int8_to_fp32(__a, __imm) \ __builtin_altivec_vupkint8tofp32((__a), (__imm)) #endif /* __FUTURE_VECTOR__ */ >From 78cffe29f11a6e2ae3364dc0cb1cc1aba384c47a Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 25 Mar 2026 18:17:24 -0400 Subject: [PATCH 3/7] add err checking and move file to PowerPC subdir --- ...tins-ppc-deeply-compressed-weights-error.c | 71 +++++++++++++++++++ ...tins-ppc-deeply-compressed-weights-error.c | 54 -------------- 2 files changed, 71 insertions(+), 54 deletions(-) create mode 100644 clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c delete mode 100644 clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c diff --git a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c new file mode 100644 index 0000000000000..ca562b5bfc753 --- /dev/null +++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c @@ -0,0 +1,71 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify=expected %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-feature +vsx \ +// RUN: -target-feature +isa-future-instructions -verify=expected %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 %s + +// AI Assissted. + +#include <altivec.h> + +vector unsigned char vuca, vucb; +vector signed int vsia; + +void test_invalid_params(void) { + vector unsigned char res; + + // Test invalid parameter types + res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ + // pwr10-error {{'__builtin_altivec_vuncompresshn' needs target feature isa-future-instructions}} + res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} \ + // pwr10-error {{'__builtin_altivec_vuncompressln' needs target feature isa-future-instructions}} + res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ + // pwr10-error {{'__builtin_altivec_vunpackhsntobyte' needs target feature isa-future-instructions}} +} + +void test_invalid_immediates(void) { + vector unsigned char res; + + // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) + res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + + // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) + res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + + // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) + res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + + // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) + res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} + res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} +} + +void test_non_constant_immediates(void) { + vector unsigned char res; + unsigned int imm = 1; + + // Test non-constant immediate values + res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} \ + // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} \ + // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} +} diff --git a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c deleted file mode 100644 index 5092b15731c81..0000000000000 --- a/clang/test/Sema/builtins-ppc-deeply-compressed-weights-error.c +++ /dev/null @@ -1,54 +0,0 @@ -// REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ -// RUN: -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -verify %s -// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ -// RUN: -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -verify %s - -// AI Assissted. - -#include <altivec.h> - -vector unsigned char vuca, vucb; -vector signed int vsia; - -void test_invalid_params(void) { - vector unsigned char res; - - // Test invalid parameter types - res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} - res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} - res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} -} - -void test_invalid_immediates(void) { - vector unsigned char res; - - // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) - res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} - res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} - - // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) - res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} - res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} - - // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) - res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} - - // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) - res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} - res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} -} - -void test_non_constant_immediates(void) { - vector unsigned char res; - unsigned int imm = 1; - - // Test non-constant immediate values - res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} - res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} - res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} - res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} -} >From 1435285781a14262b38877f1607b603adf658142 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Wed, 25 Mar 2026 18:28:47 -0400 Subject: [PATCH 4/7] update pwr10 calls --- ...tins-ppc-deeply-compressed-weights-error.c | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c index ca562b5bfc753..7cc1a0c429e7d 100644 --- a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c +++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c @@ -20,11 +20,14 @@ void test_invalid_params(void) { // Test invalid parameter types res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ - // pwr10-error {{'__builtin_altivec_vuncompresshn' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_uncompresshn'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} \ - // pwr10-error {{'__builtin_altivec_vuncompressln' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_uncompressln'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ - // pwr10-error {{'__builtin_altivec_vunpackhsntobyte' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_hsn_to_byte'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} } void test_invalid_immediates(void) { @@ -32,27 +35,31 @@ void test_invalid_immediates(void) { // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} } void test_non_constant_immediates(void) { @@ -61,11 +68,15 @@ void test_non_constant_immediates(void) { // Test non-constant immediate values res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tobf16' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} \ - // pwr10-error {{'__builtin_altivec_vupkint4tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} \ - // pwr10-error {{'__builtin_altivec_vupkint8tofp32' needs target feature isa-future-instructions}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} \ + // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} } >From 5aacf672285e554b8d96d918be4d69c2e41c33a2 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Thu, 26 Mar 2026 10:31:38 -0400 Subject: [PATCH 5/7] update to use future-vector --- clang/include/clang/Basic/BuiltinsPPC.def | 26 +++++++++++------------ clang/lib/Basic/Targets/PPC.cpp | 2 +- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index f99a019b71f2b..8422d37e30688 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1164,29 +1164,27 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3", // Deeply Compressed Weights built-ins. TARGET_BUILTIN(__builtin_altivec_vucmprhn, "V16UcV16UcV16Uc", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vucmprln, "V16UcV16UcV16Uc", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vucmprhb, "V16UcV16UcV16Uc", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vucmprlb, "V16UcV16UcV16Uc", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vucmprhh, "V16UcV16UcV16Uc", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vucmprlh, "V16UcV16UcV16Uc", "", - "isa-future-instructions") -TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "", - "isa-future-instructions") -TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "", - "isa-future-instructions") + "future-vector") +TARGET_BUILTIN(__builtin_altivec_vupkhsntob, "V16UcV16Uc", "", "future-vector") +TARGET_BUILTIN(__builtin_altivec_vupklsntob, "V16UcV16Uc", "", "future-vector") TARGET_BUILTIN(__builtin_altivec_vupkint4tobf16, "V16UcV16UcIi", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vupkint8tobf16, "V16UcV16UcIi", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vupkint4tofp32, "V16UcV16UcIi", "", - "isa-future-instructions") + "future-vector") TARGET_BUILTIN(__builtin_altivec_vupkint8tofp32, "V16UcV16UcIi", "", - "isa-future-instructions") + "future-vector") // FIXME: Obviously incomplete. diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 90e2050e4d1d4..c9a41df806aff 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -59,7 +59,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasP9Vector = true; } else if (Feature == "+power10-vector") { HasP10Vector = true; - } else if (Feature == "+isa-future-instructions") { + } else if (Feature == "+future-vector") { HasFutureVector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; >From 9119862c4ad45d02a64409c81f3ceb3bfe5919e8 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Thu, 26 Mar 2026 11:22:21 -0400 Subject: [PATCH 6/7] cleanup test --- ...tins-ppc-deeply-compressed-weights-error.c | 53 +++++++------------ 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c index 7cc1a0c429e7d..243e179d01834 100644 --- a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c +++ b/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c @@ -1,12 +1,10 @@ // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ -// RUN: -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -verify=expected %s +// RUN: -flax-vector-conversions=none -target-cpu future -verify=expected %s // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ -// RUN: -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -verify=expected %s +// RUN: -flax-vector-conversions=none -target-cpu future -verify=expected %s // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ -// RUN: -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 %s +// RUN: -flax-vector-conversions=none -target-cpu pwr10 -verify=pwr10 -verify-ignore-unexpected=error %s // AI Assissted. @@ -20,14 +18,11 @@ void test_invalid_params(void) { // Test invalid parameter types res = vec_uncompresshn(vsia, vucb); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ - // pwr10-error {{call to undeclared function 'vec_uncompresshn'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_uncompresshn'}} res = vec_uncompressln(vuca, vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__b' here}} \ - // pwr10-error {{call to undeclared function 'vec_uncompressln'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_uncompressln'}} res = vec_unpack_hsn_to_byte(vsia); // expected-error {{passing '__vector int' (vector of 4 'int' values) to parameter of incompatible type '__vector unsigned char' (vector of 16 'unsigned char' values)}} [email protected]:* {{passing argument to parameter '__a' here}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_hsn_to_byte'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_hsn_to_byte'}} } void test_invalid_immediates(void) { @@ -35,31 +30,23 @@ void test_invalid_immediates(void) { // Test out-of-range immediate values for vec_unpack_int4_to_bf16 (valid range: 0-3) res = vec_unpack_int4_to_bf16(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} - res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} + res = vec_unpack_int4_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} // Test out-of-range immediate values for vec_unpack_int8_to_bf16 (valid range: 0-1) res = vec_unpack_int8_to_bf16(vuca, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} - res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} + res = vec_unpack_int8_to_bf16(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}} // Test out-of-range immediate values for vec_unpack_int4_to_fp32 (valid range: 0-7) res = vec_unpack_int4_to_fp32(vuca, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} - res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} + res = vec_unpack_int4_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} // Test out-of-range immediate values for vec_unpack_int8_to_fp32 (valid range: 0-3) res = vec_unpack_int8_to_fp32(vuca, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} - res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} + res = vec_unpack_int8_to_fp32(vuca, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}} } void test_non_constant_immediates(void) { @@ -68,15 +55,11 @@ void test_non_constant_immediates(void) { // Test non-constant immediate values res = vec_unpack_int4_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tobf16' must be a constant integer}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_bf16'}} res = vec_unpack_int8_to_bf16(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tobf16' must be a constant integer}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_bf16'}} res = vec_unpack_int4_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint4tofp32' must be a constant integer}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int4_to_fp32'}} res = vec_unpack_int8_to_fp32(vuca, imm); // expected-error {{argument to '__builtin_altivec_vupkint8tofp32' must be a constant integer}} \ - // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} \ - // pwr10-error {{assigning to '__vector unsigned char' (vector of 16 'unsigned char' values) from incompatible type 'int'}} + // pwr10-error {{call to undeclared function 'vec_unpack_int8_to_fp32'}} } >From 39a0cb329a8f0745e560a867eaac381d35471cc4 Mon Sep 17 00:00:00 2001 From: Lei Huang <[email protected]> Date: Thu, 26 Mar 2026 11:27:51 -0400 Subject: [PATCH 7/7] cleanup tests --- ...-weights.c => builtins-deeply-compressed-weights.c} | 10 ++++------ ...or.c => builtins-deeply-compressed-weights-error.c} | 0 2 files changed, 4 insertions(+), 6 deletions(-) rename clang/test/CodeGen/PowerPC/{builtins-ppc-deeply-compressed-weights.c => builtins-deeply-compressed-weights.c} (95%) rename clang/test/Sema/PowerPC/{builtins-ppc-deeply-compressed-weights-error.c => builtins-deeply-compressed-weights-error.c} (100%) diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c b/clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c similarity index 95% rename from clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c rename to clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c index 3b4eb0faa27c2..664e2ffa34295 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-deeply-compressed-weights.c +++ b/clang/test/CodeGen/PowerPC/builtins-deeply-compressed-weights.c @@ -1,11 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: powerpc-registered-target -// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -triple powerpc64-unknown-unknown \ -// RUN: -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -target-feature +vsx \ -// RUN: -target-feature +isa-future-instructions -triple powerpc64le-unknown-unknown \ -// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu future \ +// RUN: -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu future \ +// RUN: -flax-vector-conversions=none -emit-llvm %s -o - | FileCheck %s // AI Assisted. diff --git a/clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c b/clang/test/Sema/PowerPC/builtins-deeply-compressed-weights-error.c similarity index 100% rename from clang/test/Sema/PowerPC/builtins-ppc-deeply-compressed-weights-error.c rename to clang/test/Sema/PowerPC/builtins-deeply-compressed-weights-error.c _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
