https://github.com/mikolaj-pirog created 
https://github.com/llvm/llvm-project/pull/171906

This is a followup to https://github.com/llvm/llvm-project/pull/165556

I've missed some parts of amx-transpose during initial removal

From 65d85dc5962d8b92b7bfd9cc875dad4fc71de2b8 Mon Sep 17 00:00:00 2001
From: "Pirog, Mikolaj Maciej" <[email protected]>
Date: Thu, 11 Dec 2025 19:27:26 +0100
Subject: [PATCH] Remove t2rpntlvw[z0,z1]rs[,t1] instruction (amx-transpose)

---
 clang/test/CodeGen/X86/amx_tf32.c            |  2 +-
 clang/test/CodeGen/X86/amx_tf32_api.c        |  2 +-
 clang/test/CodeGen/X86/amx_tf32_errors.c     |  2 +-
 clang/test/CodeGen/X86/amx_tf32_inline_asm.c |  9 +-----
 llvm/include/llvm/IR/IntrinsicsX86.td        | 32 --------------------
 llvm/lib/Target/X86/X86LowerAMXType.cpp      |  4 ---
 6 files changed, 4 insertions(+), 47 deletions(-)

diff --git a/clang/test/CodeGen/X86/amx_tf32.c 
b/clang/test/CodeGen/X86/amx_tf32.c
index 54ad6bb714933..24893243b66e6 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown 
-target-feature +amx-tile -target-feature +amx-tf32 \
-// RUN: -target-feature +amx-transpose -emit-llvm -o - -Wall -Werror -pedantic 
-Wno-gnu-statement-expression | FileCheck %s
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression 
| FileCheck %s
 
 #include <immintrin.h>
 #include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c 
b/clang/test/CodeGen/X86/amx_tf32_api.c
index 8f574b7bc71dc..531378dbd0d72 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding 
-triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose  \
+// RUN: -target-feature +amx-tf32 \
 // RUN: -target-feature +amx-bf16 -target-feature +avx512f \
 // RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s
 
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c 
b/clang/test/CodeGen/X86/amx_tf32_errors.c
index f0fdd060363cf..a1c525547c786 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-tf32 -target-feature +amx-transpose -verify
+// RUN: -target-feature +amx-tf32 -verify
 
 #include <immintrin.h>
 #include <stddef.h>
diff --git a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c 
b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
index 76d164737d88b..ed67dda04e9f7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
+++ b/clang/test/CodeGen/X86/amx_tf32_inline_asm.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown 
-target-feature +amx-tf32 -target-feature +amx-transpose -emit-llvm -o - -Wall 
-Werror -pedantic | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown 
-target-feature +amx-tf32 -emit-llvm -o - -Wall -Werror -pedantic | FileCheck %s
 
 void f_tilemul(short a)
 {
@@ -8,11 +8,4 @@ void f_tilemul(short a)
                     "tmmultf32ps %%tmm6, %%tmm0, %%tmm7   \n\t"
                     "tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
           ::: "memory", "tmm0", "tmm6", "tmm7");
-
-  //CHECK:  call void asm sideeffect "tileloadd 0(%rsi,%r13,4), %tmm0   
\0A\09tileloadd 0(%rdx,%r14,4), %tmm6   \0A\09ttmmultf32ps %tmm6, %tmm0, %tmm7  
  \0A\09tilestored %tmm7, 0(%r12,%r15,4) \0A\09", 
"~{memory},~{tmm0},~{tmm6},~{tmm7},~{dirflag},~{fpsr},~{flags}"()
-  __asm__ volatile ("tileloadd 0(%%rsi,%%r13,4), %%tmm0   \n\t"
-                    "tileloadd 0(%%rdx,%%r14,4), %%tmm6   \n\t"
-                    "ttmmultf32ps %%tmm6, %%tmm0, %%tmm7  \n\t"
-                    "tilestored %%tmm7, 0(%%r12,%%r15,4) \n\t"
-          ::: "memory", "tmm0", "tmm6", "tmm7");
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td 
b/llvm/include/llvm/IR/IntrinsicsX86.td
index ec80ba3e1ee81..0245611bc422b 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5505,20 +5505,6 @@ let TargetPrefix = "x86" in {
                         [ImmArg<ArgIndex<0>>,
                         ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
 
-  // AMX-MORVS, AMX-TRANSPOSE
-  def int_x86_t2rpntlvwz0rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rs">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz0rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz0rst1">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz1rs : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rs">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-  def int_x86_t2rpntlvwz1rst1 : ClangBuiltin<"__builtin_ia32_t2rpntlvwz1rst1">,
-              Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
-                        [ImmArg<ArgIndex<0>>]>;
-
   // AMX-AVX512
   def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
               Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
@@ -5627,24 +5613,6 @@ let TargetPrefix = "x86" in {
                          llvm_x86amx_ty, llvm_x86amx_ty,
                          llvm_x86amx_ty], []>;
 
-  // AMX-MORVS, AMX-TRANSPOSE - internal intrinsics
-  def int_x86_t2rpntlvwz0rs_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, 
llvm_i64_ty],
-                        [IntrArgMemOnly, IntrReadMem]>;
-  def int_x86_t2rpntlvwz0rst1_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, 
llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-  def int_x86_t2rpntlvwz1rs_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, 
llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-  def int_x86_t2rpntlvwz1rst1_internal :
-              Intrinsic<[llvm_x86amx_ty, llvm_x86amx_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, 
llvm_i64_ty],
-                        [IntrArgMemOnly]>;
-
   def int_x86_tcvtrowd2ps_internal :
               ClangBuiltin<"__builtin_ia32_tcvtrowd2ps_internal">,
               Intrinsic<[llvm_v16f32_ty],
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp 
b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 2fc5d38ef5055..d93bcd31c5721 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -163,10 +163,6 @@ std::pair<Value *, Value *> getShape(IntrinsicInst *II, 
unsigned OpNo) {
   case Intrinsic::x86_tileloadd64_internal:
   case Intrinsic::x86_tileloaddt164_internal:
   case Intrinsic::x86_tilestored64_internal:
-  case Intrinsic::x86_t2rpntlvwz0rs_internal:
-  case Intrinsic::x86_t2rpntlvwz0rst1_internal:
-  case Intrinsic::x86_t2rpntlvwz1rs_internal:
-  case Intrinsic::x86_t2rpntlvwz1rst1_internal:
   case Intrinsic::x86_tileloaddrs64_internal:
   case Intrinsic::x86_tileloaddrst164_internal: {
     Row = II->getArgOperand(0);

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to