Enable V4BFmode and V2BFmode with the same ABI as V4HFmode and V2HFmode. No real operation is supported for them except for movement. This should solve PR target/107261.
Also I notice there's redundancy in VALID_AVX512FP16_REG_MODE, and remove V2BFmode remove it. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/107261 * config/i386/i386-modes.def (VECTOR_MODE): Support V2BFmode. * config/i386/i386.cc (classify_argument): Handle V4BFmode and V2BFmode. (ix86_convert_const_vector_to_integer): Ditto. * config/i386/i386.h (VALID_AVX512FP16_REG_MODE): Remove V2BFmode. (VALID_SSE2_REG_MODE): Add V4BFmode and V2BFmode. (VALID_MMX_REG_MODE): Add V4BFmode. * config/i386/i386.md (mode): Add V4BF and V2BF. (MODE_SIZE): Ditto. * config/i386/mmx.md (MMXMODE) Add V4BF. (V_32): Add V2BF. (V_16_32_64): Add V4BF and V2BF. (mmxinsnmode): Add V4BF and V2BF. (*mov<mode>_internal): Hanlde V4BFmode and V2BFmode. gcc/testsuite/ChangeLog: * gcc.target/i386/pr107261.c: New test. --- gcc/config/i386/i386-modes.def | 1 + gcc/config/i386/i386.cc | 6 ++++ gcc/config/i386/i386.h | 9 +++--- gcc/config/i386/i386.md | 5 ++-- gcc/config/i386/mmx.md | 26 +++++++++------- gcc/testsuite/gcc.target/i386/pr107261.c | 38 ++++++++++++++++++++++++ 6 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr107261.c diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index b49daaef253..dbc3165c5fc 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -93,6 +93,7 @@ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */ VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */ VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */ VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, BF, 2); /* V2BF */ VECTOR_MODE (FLOAT, HF, 6); /* V6HF */ VECTOR_MODE (INT, TI, 1); /* V1TI */ VECTOR_MODE (INT, DI, 1); /* V1DI */ diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index aeea26ef4be..1aca7d55a09 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -2507,7 +2507,9 @@ classify_argument (machine_mode mode, const_tree type, case E_V2SImode: case E_V4HImode: case E_V4HFmode: + case E_V4BFmode: case E_V2HFmode: + case E_V2BFmode: case E_V8QImode: classes[0] = X86_64_SSE_CLASS; return 1; @@ -2991,6 +2993,7 @@ pass_in_reg: case E_V8QImode: case E_V4HImode: case E_V4HFmode: + case E_V4BFmode: case E_V2SImode: case E_V2SFmode: case E_V1TImode: @@ -3240,6 +3243,7 @@ pass_in_reg: case E_V8QImode: case E_V4HImode: case E_V4HFmode: + case E_V4BFmode: case E_V2SImode: case E_V2SFmode: case E_V1TImode: @@ -15810,7 +15814,9 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) } break; case E_V2HFmode: + case E_V2BFmode: case E_V4HFmode: + case E_V4BFmode: case E_V2SFmode: for (int i = 0; i < nunits; ++i) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index fd7c9df47e5..16d9c606077 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1033,13 +1033,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V8BFmode || (MODE) == TImode) #define VALID_AVX512FP16_REG_MODE(MODE) \ - ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \ - || (MODE) == V2HFmode) + ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode) #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \ - || (MODE) == V8BFmode \ + || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode \ || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \ || (MODE) == HFmode || (MODE) == BFmode) @@ -1057,7 +1056,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); ((MODE) == V1DImode || (MODE) == DImode \ || (MODE) == V2SImode || (MODE) == SImode \ || (MODE) == V4HImode || (MODE) == V8QImode \ - || (MODE) == V4HFmode) + || (MODE) == V4HFmode || (MODE) == V4BFmode) #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode) @@ -1074,7 +1073,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == CSImode || (MODE) == CDImode \ || (MODE) == SDmode || (MODE) == DDmode \ || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode \ - || (MODE) == V2HImode || (MODE) == V2HFmode \ + || (MODE) == V2HImode || (MODE) == V2HFmode || (MODE) == V2BFmode \ || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \ || (TARGET_64BIT \ && ((MODE) == TImode || (MODE) == CTImode \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index baf1f1f8fa2..e46554efea8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -500,7 +500,7 @@ (define_attr "type" ;; Main data type used by the insn (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF, - V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF" + V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF" (const_string "unknown")) ;; The CPU unit operations uses. @@ -1119,7 +1119,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (V4SF "16") (V8SF "32") (V16SF "64") (V8HF "16") (V16HF "32") (V32HF "64") (V4HF "8") (V2HF "4") - (V8BF "16") (V16BF "32") (V32BF "64")]) + (V8BF "16") (V16BF "32") (V32BF "64") + (V4BF "8") (V2BF "4")]) ;; Double word integer modes as mode attribute. (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c359e2dd6de..d5134cc351e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -48,7 +48,7 @@ (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX -(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF]) +(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF]) (define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF]) ;; Mix-n-match @@ -58,7 +58,7 @@ (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) ;; All 4-byte integer/float16 vector modes -(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF]) +(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF]) ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) @@ -72,7 +72,8 @@ (define_mode_iterator VI1_16_32 [V4QI V2QI]) ;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element (define_mode_iterator V_16_32_64 [V2QI V4QI V2HI V2HF - (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT") + (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") + (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT") (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")]) ;; V2S* modes @@ -92,6 +93,7 @@ (define_mode_attr mmxinsnmode (V4HI "DI") (V2HI "SI") (V2SI "DI") (V4HF "DI") (V2HF "SI") + (V4BF "DI") (V2BF "SI") (V2SF "DI")]) (define_mode_attr mmxdoublemode @@ -213,9 +215,9 @@ (define_insn "*mov<mode>_internal" (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "11,12") - (cond [(match_test "<MODE>mode == V2SFmode") - (const_string "V4SF") - (match_test "<MODE>mode == V4HFmode") + (cond [(match_test "<MODE>mode == V2SFmode + || <MODE>mode == V4HFmode + || <MODE>mode == V4BFmode") (const_string "V4SF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) @@ -227,13 +229,15 @@ (define_insn "*mov<mode>_internal" (ior (ior (and (match_test "<MODE>mode == V2SFmode") (not (match_test "TARGET_MMX_WITH_SSE"))) (not (match_test "TARGET_SSE2"))) - (match_test "<MODE>mode == V4HFmode"))) + (match_test "<MODE>mode == V4HFmode + || <MODE>mode == V4BFmode"))) (const_string "V2SF") (and (eq_attr "alternative" "14") (ior (ior (match_test "<MODE>mode == V2SFmode") (not (match_test "TARGET_SSE2"))) - (match_test "<MODE>mode == V4HFmode"))) + (match_test "<MODE>mode == V4HFmode + || <MODE>mode == V4BFmode"))) (const_string "V2SF") ] (const_string "DI"))) @@ -321,7 +325,8 @@ (define_insn "*mov<mode>_internal" (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2,3") - (cond [(match_test "<MODE>mode == V2HFmode") + (cond [(match_test "<MODE>mode == V2HFmode + || <MODE>mode == V2BFmode") (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") @@ -332,7 +337,8 @@ (define_insn "*mov<mode>_internal" (const_string "TI")) (and (eq_attr "alternative" "4,5") - (ior (match_test "<MODE>mode == V2HFmode") + (ior (match_test "<MODE>mode == V2HFmode + || <MODE>mode == V2BFmode") (not (match_test "TARGET_SSE2")))) (const_string "SF") ] diff --git a/gcc/testsuite/gcc.target/i386/pr107261.c b/gcc/testsuite/gcc.target/i386/pr107261.c new file mode 100644 index 00000000000..eb1d232fbfc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107261.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef __bf16 v4bf __attribute__ ((vector_size (8))); +typedef __bf16 v2bf __attribute__ ((vector_size (4))); + +v4bf +v4bf_abi_1 (v4bf a) +{ + return a; +} + +v4bf +v4bf_abi_3 (v4bf a, v4bf b, v4bf c) +{ + return c; +} + +/* { dg-final { scan-assembler-times "movq\[\\t \]*%mm2, %mm0" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm2, %xmm0" 1 { target { ! ia32 } } } } */ + +v4bf +v4bf_abi_4 (v4bf a, v4bf b, v4bf c, v4bf d) +{ + return d; +} + +/* { dg-final { scan-assembler-times "movq\[\\t \]*4\\(%esp\\), %mm0" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm3, %xmm0" 1 { target { ! ia32 } } } } */ + +v2bf +v2bf_test (v2bf a, v2bf b, v2bf c, v2bf d) +{ + return b; +} + +/* { dg-final { scan-assembler-times "movl\[\\t \]*8\\(%esp\\), %eax" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movaps\[\\t \]*%xmm1, %xmm0" 1 { target { ! ia32 } } } } */ -- 2.27.0