Author: David Li Date: 2023-11-16T13:47:31-08:00 New Revision: ac3779e92ef9405fd2c602a08e8031f7b8aeedd8
URL: https://github.com/llvm/llvm-project/commit/ac3779e92ef9405fd2c602a08e8031f7b8aeedd8 DIFF: https://github.com/llvm/llvm-project/commit/ac3779e92ef9405fd2c602a08e8031f7b8aeedd8.diff LOG: Enable Custom Lowering for fabs.v8f16 on AVX (#71730) [X86]: Enable custom lowering for fabs.v8f16 on AVX Currently, custom lowering of fabs.v8f16 requires AVX512FP16, which is too restrictive. For v8f16 fabs lowering, no instructions in AVX512FP16 are needed. Without the fix, horribly inefficient code is generated without AVX512FP16. Note instcombiner generates calls to intrinsics @llvm.fabs.v8f16 when simplifyping AND <8 x half> operations. Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/vec_fabs.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9381218eee7840..7f9d971ceeeeaf6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1396,6 +1396,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMINIMUM, VT, Custom); } + setOperationAction(ISD::FABS, MVT::v8f16, Custom); + // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted // even though v8i16 is a legal type. setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 8876d2f9b19928e..8af067d88a57e96 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -2,10 +2,12 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VL +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512FP16 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VLDQ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ define <2 x double> @fabs_v2f64(<2 x double> %p) { @@ -137,6 +139,86 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { } declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) +define <8 x half> @fabs_v8f16(ptr %p) { +; X86-AVX1-LABEL: fabs_v8f16: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl 4(%esp), [[ADDRREG:%.*]] +; X86-AVX1-NEXT: vmovaps ([[ADDRREG]]), %xmm0 +; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX1-NEXT: retl + +; X86-AVX2-LABEL: fabs_v8f16: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]] +; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0 +; X86-AVX2-NEXT: retl + +; X64-AVX512VL-LABEL: fabs_v8f16: +; X64-AVX512VL: # %bb.0: +; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0 +; X64-AVX512VL-NEXT: retq + +; X64-AVX1-LABEL: fabs_v8f16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: retq + +; X64-AVX2-LABEL: fabs_v8f16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 +; X64-AVX2-NEXT: retq + + %v = load <8 x half>, ptr %p, align 16 + %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v) + ret <8 x half> %nnv +} +declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p) + +define <16 x half> @fabs_v16f16(ptr %p) { +; X86-AVX512FP16-LABEL: fabs_v16f16: +; X86-AVX512FP16: # %bb.0: +; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] +; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]] +; X86-AVX512FP16-NEXT: vpand ([[REG]]), [[YMM]], [[YMM]] +; X86-AVX512FP16-NEXT: retl + +; X64-AVX512FP16-LABEL: fabs_v16f16: +; X64-AVX512FP16: # %bb.0: +; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]] +; X64-AVX512FP16-NEXT: vpand (%rdi), [[YMM]], [[YMM]] +; X64-AVX512FP16-NEXT: retq +; + %v = load <16 x half>, ptr %p, align 32 + %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v) + ret <16 x half> %nnv +} +declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p) + +define <32 x half> @fabs_v32f16(ptr %p) { +; X86-AVX512FP16-LABEL: fabs_v32f16: +; X86-AVX512FP16: # %bb.0: +; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] +; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]] +; X86-AVX512FP16-NEXT: vpandq ([[REG]]), [[ZMM]], [[ZMM]] +; X86-AVX512FP16-NEXT: retl + +; X64-AVX512FP16-LABEL: fabs_v32f16: +; X64-AVX512FP16: # %bb.0: +; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]] +; X64-AVX512FP16-NEXT: vpandq (%rdi), [[ZMM]], [[ZMM]] +; X64-AVX512FP16-NEXT: retq + + %v = load <32 x half>, ptr %p, align 64 + %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v) + ret <32 x half> %nnv +} +declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p) + + define <8 x float> @fabs_v8f32(<8 x float> %p) { ; X86-AVX1-LABEL: fabs_v8f32: ; X86-AVX1: # %bb.0: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits