Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
On Tue, Aug 11, 2020 at 05:43:48PM +0800, Hongtao Liu via Gcc-patches wrote: > Hi: > The issue is described in the bugzilla. > Bootstrap is ok, regression test for i386/x86-64 backend is ok. > Ok for trunk? > > ChangeLog > gcc/ > PR target/96551 > * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector > compare to integer mask, don't use gen_rtx_LT , use Please remove the space before comma. > ix86_expand_mask_vec_cmp instead. > (vec_unpacku_float_hi_v16si): Ditto. > > gcc/testsuite > * gcc.target/i386/pr96551-1.c: New test. > * gcc.target/i386/pr96551-2.c: New test. And please rename the testcases to avx512f-pr96551-{1,2}.c. > + for (int i = 0; i != 256; i++) > +if (exp[i] != b[i]) > + __builtin_abort (); You can use just abort (); here, given that avx512-check.h includes stdlib.h. Ok for trunk with those nits changed. Jakub
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
ping ^2 On Wed, Aug 19, 2020 at 7:37 PM Hongtao Liu wrote: > > ping^1 > > On Tue, Aug 11, 2020 at 5:43 PM Hongtao Liu wrote: > > > > Hi: > > The issue is described in the bugzilla. > > Bootstrap is ok, regression test for i386/x86-64 backend is ok. > > Ok for trunk? > > > > ChangeLog > > gcc/ > > PR target/96551 > > * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector > > compare to integer mask, don't use gen_rtx_LT , use > > ix86_expand_mask_vec_cmp instead. > > (vec_unpacku_float_hi_v16si): Ditto. > > > > gcc/testsuite > > * gcc.target/i386/pr96551-1.c: New test. > > * gcc.target/i386/pr96551-2.c: New test. > > > > -- > > BR, > > Hongtao > > > > -- > BR, > Hongtao -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
ping^1 On Tue, Aug 11, 2020 at 5:43 PM Hongtao Liu wrote: > > Hi: > The issue is described in the bugzilla. > Bootstrap is ok, regression test for i386/x86-64 backend is ok. > Ok for trunk? > > ChangeLog > gcc/ > PR target/96551 > * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector > compare to integer mask, don't use gen_rtx_LT , use > ix86_expand_mask_vec_cmp instead. > (vec_unpacku_float_hi_v16si): Ditto. > > gcc/testsuite > * gcc.target/i386/pr96551-1.c: New test. > * gcc.target/i386/pr96551-2.c: New test. > > -- > BR, > Hongtao -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
Hi: The issue is described in the bugzilla. Bootstrap is ok, regression test for i386/x86-64 backend is ok. Ok for trunk? ChangeLog gcc/ PR target/96551 * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector compare to integer mask, don't use gen_rtx_LT , use ix86_expand_mask_vec_cmp instead. (vec_unpacku_float_hi_v16si): Ditto. gcc/testsuite * gcc.target/i386/pr96551-1.c: New test. * gcc.target/i386/pr96551-2.c: New test. -- BR, Hongtao From 6e8e1502591d78e14fc9e3c25e7d47c0f2c4559a Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 11 Aug 2020 11:05:40 +0800 Subject: [PATCH] Refine expander vec_unpacku_float_hi_v16si/vec_unpacku_float_lo_v16si gcc/ PR target/96551 * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector compare to integer mask, don't use gen_rtx_LT , use ix86_expand_mask_vec_cmp instead. (vec_unpacku_float_hi_v16si): Ditto. gcc/testsuite * gcc.target/i386/pr96551-1.c: New test. * gcc.target/i386/pr96551-2.c: New test. --- gcc/config/i386/sse.md| 4 +-- gcc/testsuite/gcc.target/i386/pr96551-1.c | 18 + gcc/testsuite/gcc.target/i386/pr96551-2.c | 33 +++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr96551-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr96551-2.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ad8169f6f08..a890f994ab0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6971,7 +6971,7 @@ emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1])); emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3])); - emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); + ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]); emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); emit_move_insn (operands[0], tmp[2]); DONE; @@ -7018,7 +7018,7 @@ k = gen_reg_rtx (QImode); emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1])); - emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); + ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]); emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); emit_move_insn (operands[0], tmp[2]); DONE; diff --git a/gcc/testsuite/gcc.target/i386/pr96551-1.c b/gcc/testsuite/gcc.target/i386/pr96551-1.c new file mode 100644 index 000..598bb6e85f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr96551-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mprefer-vector-width=512" } */ + +unsigned int a[256]; +double b[256]; + +void +__attribute__ ((noipa, optimize ("tree-vectorize"))) +foo(void) +{ + int i; + + for (i=0; i<256; ++i) +b[i] = a[i]; +} + +/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\]*zmm" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr96551-2.c b/gcc/testsuite/gcc.target/i386/pr96551-2.c new file mode 100644 index 000..722767aaf2a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr96551-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f -mprefer-vector-width=512" } */ +/* { dg-require-effective-target avx512f } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST test_512 +#endif + +#include "pr96551-1.c" + +static void +TEST (void) +{ + double exp[256]; + for (int i = 0; i != 256; i++) +{ + a[i] = i * i + 3 * i + 13; + exp[i] = a[i]; + b[i] = 0; +} + + foo (); + + for (int i = 0; i != 256; i++) +if (exp[i] != b[i]) + __builtin_abort (); +} -- 2.18.1
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
On Fri, Aug 7, 2020 at 11:02 PM Kirill Yukhin wrote: > > Hello, > > On 05 авг 09:29, Hongtao Liu wrote: > > On Tue, Aug 4, 2020 at 6:28 PM Kirill Yukhin > > wrote: > > > > > > On 04 авг 13:26, Kirill Yukhin wrote: > > > > Could you please clarify, how your patch relared to [1]? > > > > I see from the bug that it describes perf issue w.r.t. scalar > > > > operations. > > > > > Sorry for Typo, it's pr96243. > > Please, don't forget to update ChangeLog entry. > Yes. > It's a pity that we don't support vector comparisons in CSE, > hope will fix in future. > > Patch LGTM. > > -- > K Thanks. -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
Hello, On 05 авг 09:29, Hongtao Liu wrote: > On Tue, Aug 4, 2020 at 6:28 PM Kirill Yukhin wrote: > > > > On 04 авг 13:26, Kirill Yukhin wrote: > > > Could you please clarify, how your patch relared to [1]? > > > I see from the bug that it describes perf issue w.r.t. scalar > > > operations. > > > Sorry for Typo, it's pr96243. Please, don't forget to update ChangeLog entry. It's a pity that we don't support vector comparisons in CSE, hope will fix in future. Patch LGTM. -- K
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
On Tue, Aug 4, 2020 at 6:28 PM Kirill Yukhin wrote: > > On 04 авг 13:26, Kirill Yukhin wrote: > > Could you please clarify, how your patch relared to [1]? > > I see from the bug that it describes perf issue w.r.t. scalar > > operations. > Sorry for Typo, it's pr96243. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96243 > [1] - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96226 > > > > > -- > > Regards, Kirill Yukhin -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
On 04 авг 13:26, Kirill Yukhin wrote: > Could you please clarify, how your patch relared to [1]? > I see from the bug that it describes perf issue w.r.t. scalar > operations. [1] - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96226 > > -- > Regards, Kirill Yukhin
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
Hello, On 20 июл 13:46, Hongtao Liu wrote: > Hi: > For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a > boolean value and try to do some optimization. But it is not true for > vector compare, also other places in rtl passes hold the same > assumption. > > Bootstrap is ok, regression test is ok for i386 backend. > > 2020-07-20 Hongtao Liu > > gcc/ > PR target/96226 Could you please clarify, how your patch relared to [1]? I see from the bug that it describes perf issue w.r.t. scalar operations. -- Regards, Kirill Yukhin
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
ping^2 On Mon, Jul 27, 2020 at 5:31 PM Hongtao Liu wrote: > > ping > > On Mon, Jul 20, 2020 at 4:40 PM Hongtao Liu wrote: > > > > Correct PR number in ChangeLog > > it's pr96243. > > > > On Mon, Jul 20, 2020 at 1:46 PM Hongtao Liu wrote: > > > > > > Hi: > > > For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a > > > boolean value and try to do some optimization. But it is not true for > > > vector compare, also other places in rtl passes hold the same > > > assumption. > > > > > > Bootstrap is ok, regression test is ok for i386 backend. > > > > > > 2020-07-20 Hongtao Liu > > > > > > gcc/ > > > PR target/96243 > > > * config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for > > > maskcmp. > > > (ix86_expand_mask_vec_cmp): Change prototype. > > > * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change > > > prototype. > > > * config/i386/i386.c (ix86_print_operand): Remove operand > > > modifier 'I'. > > > * config/i386/sse.md > > > (*_cmp3, > > > *_cmp3, > > > *_ucmp3, > > > *_ucmp3, > > > avx512f_maskcmp3): Deleted. > > > > > > gcc/testsuite > > > * gcc.target/i386/pr92865-1.c: Adjust testcase. > > > > > > > > > -- > > > BR, > > > Hongtao > > > > > > > > -- > > BR, > > Hongtao > > > > -- > BR, > Hongtao -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
ping On Mon, Jul 20, 2020 at 4:40 PM Hongtao Liu wrote: > > Correct PR number in ChangeLog > it's pr96243. > > On Mon, Jul 20, 2020 at 1:46 PM Hongtao Liu wrote: > > > > Hi: > > For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a > > boolean value and try to do some optimization. But it is not true for > > vector compare, also other places in rtl passes hold the same > > assumption. > > > > Bootstrap is ok, regression test is ok for i386 backend. > > > > 2020-07-20 Hongtao Liu > > > > gcc/ > > PR target/96243 > > * config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for > > maskcmp. > > (ix86_expand_mask_vec_cmp): Change prototype. > > * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change > > prototype. > > * config/i386/i386.c (ix86_print_operand): Remove operand > > modifier 'I'. > > * config/i386/sse.md > > (*_cmp3, > > *_cmp3, > > *_ucmp3, > > *_ucmp3, > > avx512f_maskcmp3): Deleted. > > > > gcc/testsuite > > * gcc.target/i386/pr92865-1.c: Adjust testcase. > > > > > > -- > > BR, > > Hongtao > > > > -- > BR, > Hongtao -- BR, Hongtao
Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
Correct PR number in ChangeLog it's pr96243. On Mon, Jul 20, 2020 at 1:46 PM Hongtao Liu wrote: > > Hi: > For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a > boolean value and try to do some optimization. But it is not true for > vector compare, also other places in rtl passes hold the same > assumption. > > Bootstrap is ok, regression test is ok for i386 backend. > > 2020-07-20 Hongtao Liu > > gcc/ > PR target/96243 > * config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for > maskcmp. > (ix86_expand_mask_vec_cmp): Change prototype. > * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change > prototype. > * config/i386/i386.c (ix86_print_operand): Remove operand > modifier 'I'. > * config/i386/sse.md > (*_cmp3, > *_cmp3, > *_ucmp3, > *_ucmp3, > avx512f_maskcmp3): Deleted. > > gcc/testsuite > * gcc.target/i386/pr92865-1.c: Adjust testcase. > > > -- > BR, > Hongtao -- BR, Hongtao
[PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]
Hi: For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a boolean value and try to do some optimization. But it is not true for vector compare, also other places in rtl passes hold the same assumption. Bootstrap is ok, regression test is ok for i386 backend. 2020-07-20 Hongtao Liu gcc/ PR target/96226 * config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for maskcmp. (ix86_expand_mask_vec_cmp): Change prototype. * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change prototype. * config/i386/i386.c (ix86_print_operand): Remove operand modifier 'I'. * config/i386/sse.md (*_cmp3, *_cmp3, *_ucmp3, *_ucmp3, avx512f_maskcmp3): Deleted. gcc/testsuite * gcc.target/i386/pr92865-1.c: Adjust testcase. -- BR, Hongtao From 69ee4f981e090fb06ff5f27692fa4f79be7d54ea Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 20 Jul 2020 10:13:58 +0800 Subject: [PATCH] Using UNSPEC for vector compare to mask register. For rtx like (eq:HI (V8SI 90) (V8SI 91)), cse will take it as a boolean value and try to do some optimization. But it is not true for vector compare, also other places in rtl passes hold the same assumption. 2020-07-20 Hongtao Liu gcc/ * config/i386/i386-expand.c (ix86_expand_sse_cmp): Refine for maskcmp. (ix86_expand_mask_vec_cmp): Change prototype. * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): Change prototype. * config/i386/i386.c (ix86_print_operand): Remove operand modifier 'I'. * config/i386/sse.md (*_cmp3, *_cmp3, *_ucmp3, *_ucmp3, avx512f_maskcmp3): Deleted. gcc/testsuite * gcc.target/i386/pr92865-1.c: Adjust testcase. --- gcc/config/i386/i386-expand.c | 19 +++--- gcc/config/i386/i386-protos.h | 2 +- gcc/config/i386/i386.c| 35 --- gcc/config/i386/sse.md| 72 +++ gcc/testsuite/gcc.target/i386/pr92865-1.c | 10 ++-- 5 files changed, 26 insertions(+), 112 deletions(-) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index e194214804b..1bd0df4daf4 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3480,6 +3480,13 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, || (op_false && reg_overlap_mentioned_p (dest, op_false))) dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); + if (maskcmp) +{ + bool ok = ix86_expand_mask_vec_cmp (dest, code, cmp_op0, cmp_op1); + gcc_assert (ok); + return dest; +} + x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); if (cmp_mode != mode && !maskcmp) @@ -3915,11 +3922,10 @@ ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) /* Expand AVX-512 vector comparison. */ bool -ix86_expand_mask_vec_cmp (rtx operands[]) +ix86_expand_mask_vec_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1) { - machine_mode mask_mode = GET_MODE (operands[0]); - machine_mode cmp_mode = GET_MODE (operands[2]); - enum rtx_code code = GET_CODE (operands[1]); + machine_mode mask_mode = GET_MODE (dest); + machine_mode cmp_mode = GET_MODE (cmp_op0); rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); int unspec_code; rtx unspec; @@ -3937,10 +3943,9 @@ ix86_expand_mask_vec_cmp (rtx operands[]) unspec_code = UNSPEC_PCMP; } - unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], - operands[3], imm), + unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, cmp_op0, cmp_op1, imm), unspec_code); - emit_insn (gen_rtx_SET (operands[0], unspec)); + emit_insn (gen_rtx_SET (dest, unspec)); return true; } diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 7c2ce618f3f..0f7ce291315 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -143,7 +143,7 @@ extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_vec_perm (rtx[]); -extern bool ix86_expand_mask_vec_cmp (rtx[]); +extern bool ix86_expand_mask_vec_cmp (rtx, enum rtx_code, rtx, rtx); extern bool ix86_expand_int_vec_cmp (rtx[]); extern bool ix86_expand_fp_vec_cmp (rtx[]); extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5c373c091ce..73342c479aa 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12544,7 +12544,6 @@ print_reg (rtx x, int code, FILE *file) M -- print addr32 prefix for TARGET_X32 with VSIB address. ! -- print NOTRACK prefix for jxx/call/ret instructions if required. N -- print maskz if it's constant 0 operand. - I -- print comparision predicate operand for sse cmp condition. */ void @@ -12774,40 +12773,6 @@ ix86_print_operand (FILE *file, rtx x, int code) } return; - case 'I': -