https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/154526
Backport d770567a514716cdb250a2dee635435c22622e34 Requested by: @nikic >From 0cf566fd6434fcd52a36ded92b4bfdcde6b9681d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Wed, 20 Aug 2025 12:18:10 +0100 Subject: [PATCH] [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) Unlike CVTTP2SI, CVTTP2UI is only available on AVX512 targets, so we don't fallback to the AVX1 variant when we split a 512-bit vector, so we can only use the 128/256-bit variants if we have AVX512VL. Fixes #154492 (cherry picked from commit d770567a514716cdb250a2dee635435c22622e34) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++++- llvm/test/CodeGen/X86/pr154492.ll | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr154492.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c7839baf7de8e..85e5ebc385c68 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44178,8 +44178,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } // Conversions. // TODO: Add more CVT opcodes when we have test coverage. - case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI: { + if (!Subtarget.hasVLX()) + break; + [[fallthrough]]; + } + case X86ISD::CVTTP2SI: { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f16 && !Subtarget.hasVLX()) break; diff --git a/llvm/test/CodeGen/X86/pr154492.ll b/llvm/test/CodeGen/X86/pr154492.ll new file mode 100644 index 0000000000000..1ba17594976e1 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr154492.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL + +define <16 x i32> @PR154492() { +; AVX512F-LABEL: PR154492: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512F-NEXT: vmovaps %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: PR154492: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 +; AVX512VL-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> zeroinitializer, <16 x i32> zeroinitializer, i16 255, i32 4) + ret <16 x i32> %res +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits