On Tue, Oct 8, 2024 at 3:24 PM Levy Hsu <ad...@levyhsu.com> wrote: > > Bootstrapped and tested on x86_64-linux-gnu, OK for trunk? Ok. > > gcc/ChangeLog: > > * config/i386/i386.md: Rewrite insn truncsfbf2. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/truncsfbf-1.c: New test. > * gcc.target/i386/truncsfbf-2.c: New test. > --- > gcc/config/i386/i386.md | 16 ++--- > gcc/testsuite/gcc.target/i386/truncsfbf-1.c | 9 +++ > gcc/testsuite/gcc.target/i386/truncsfbf-2.c | 65 +++++++++++++++++++++ > 3 files changed, 83 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/truncsfbf-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/truncsfbf-2.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 9c2a0aa6112..d3fee0968d8 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -5672,16 +5672,18 @@ > (set_attr "mode" "HF")]) > > (define_insn "truncsfbf2" > - [(set (match_operand:BF 0 "register_operand" "=x, v") > + [(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv") > (float_truncate:BF > - (match_operand:SF 1 "register_operand" "x,v")))] > - "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT) > - && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations" > + (match_operand:SF 1 "register_operand" "0,x,v,Yv")))] > + "TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)" > "@ > + psrld\t{$16, %0|%0, 16} > %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1} > - vcvtneps2bf16\t{%1, %0|%0, %1}" > - [(set_attr "isa" "avxneconvert,avx512bf16vl") > - (set_attr "prefix" "vex,evex")]) > + vcvtneps2bf16\t{%1, %0|%0, %1} > + vpsrld\t{$16, %1, %0|%0, %1, 16}" > + [(set_attr "isa" "noavx,avxneconvert,avx512bf16vl,avx") > + (set_attr "prefix" "orig,vex,evex,vex") > + (set_attr "type" "sseishft1,ssecvt,ssecvt,sseishft1")]) > > ;; Signed conversion to DImode. > > diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-1.c > b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c > new file mode 100644 > index 00000000000..dd3ff8a50b4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/truncsfbf-1.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2 -O2 -ffast-math" } */ > +/* { dg-final { scan-assembler-times "psrld" 1 } } */ > + > +__bf16 > +foo (float a) > +{ > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/truncsfbf-2.c > b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c > new file mode 100644 > index 00000000000..f4952f88fc9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/truncsfbf-2.c > @@ -0,0 +1,65 @@ > +/* { dg-do run } */ > +/* { dg-options "-msse2 -O2 -ffast-math" } */ > + > +#include <stdlib.h> > +#include <stdint.h> > +#include <string.h> > +#include <math.h> > + > +__bf16 > +foo (float a) > +{ > + return a; > +} > + > +static __bf16 > +CALC (float *a) > +{ > + uint32_t bits; > + memcpy (&bits, a, sizeof (bits)); > + bits >>= 16; > + uint16_t bfloat16_bits = (uint16_t) bits; > + __bf16 bf16; > + memcpy (&bf16, &bfloat16_bits, sizeof (bf16)); > + return bf16; > +} > + > +int > +main (void) > +{ > + float test_values[] = { 0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f, 1000.0f, > -1000.0f, > + 3.1415926f, -3.1415926f, 1e-8f, -1e-8f, > + 1.0e+38f, -1.0e+38f, 1.0e-38f, -1.0e-38f }; > + size_t num_values = sizeof (test_values) / sizeof (test_values[0]); > + > + for (size_t i = 0; i < num_values; ++i) > + { > + float original = test_values[i]; > + __bf16 hw_bf16 = foo (original); > + __bf16 sw_bf16 = CALC (&original); > + > + /* Verify psrld $16, %0 == %0 >> 16 */ > + if (memcmp (&hw_bf16, &sw_bf16, sizeof (__bf16)) != 0) > + abort (); > + > + /* Reconstruct the float value from the __bf16 bits */ > + uint16_t bf16_bits; > + memcpy (&bf16_bits, &hw_bf16, sizeof (bf16_bits)); > + uint32_t reconstructed_bits = ((uint32_t) bf16_bits) << 16; > + float converted; > + memcpy (&converted, &reconstructed_bits, sizeof (converted)); > + > + float diff = fabsf (original - converted); > + > + /* Expected Maximum Precision Loss */ > + uint32_t orig_bits; > + memcpy (&orig_bits, &original, sizeof (orig_bits)); > + int exponent = ((orig_bits >> 23) & 0xFF) - 127; > + float expected_loss = (exponent == -127) > + ? ldexpf (1.0f, -126 - 7) > + : ldexpf (1.0f, exponent - 7); > + if (diff > expected_loss) > + abort (); > + } > + return 0; > +} > -- > 2.31.1 >
-- BR, Hongtao