Re: [PATCH 1/4][PR target/88808]Enable bitwise operator for AVX512 masks.
On Wed, Aug 19, 2020 at 3:52 AM Hongtao Liu wrote: > > On Mon, Aug 17, 2020 at 5:20 PM Uros Bizjak wrote: > > > > On Fri, Aug 14, 2020 at 10:22 AM Hongtao Liu wrote: > > > > > > Hi: > > > First, since avx512 masks involve both vector isa and general part, > > > so i add both maintainers to the maillist. > > > > > > I'm doing this in 4 steps: > > > 1 - Add cost model for operation of mask registers. > > > 2 - Introduce new cover class INT_MASK_REGS, this will enable direct > > > move between gpr and mask registers in pass_reload by consideration of > > > cost model, this is similar as INT_SSE_REGS. > > > 3 - Tune cost model. > > > 4 - Enable operator or/xor/and/andn/not for mask register. kxnor is > > > not enabled since there's no corresponding instruction for general > > > registers, 64bit mask op is not enabled for 32bit target. > > > kadd/kshift/ktest are not merged into general versionsadd/ashl/test > > > since i think it would be odd to use mask register for those > > > operations. > > > > > > Bootstrap is ok, regression test is ok for i386/x86-64 result. > > > There's some improvement for performance of SPEC2017 tested on SKL, > > > i observe there're many spills from integer to mask registers instead > > > of memory which is the reason for the improvement. > > > > + if (MASK_CLASS_P (regclass)) > > +{ > > + int index; > > + switch (GET_MODE_SIZE (mode)) > > +{ > > +case 1: > > + index = 0; > > + break; > > +case 2: > > + index = 1; > > + break; > > +default: > > + index = 3; > > > > Max index = 2! > > > > Fix typo. > > > + break; > > +} > > + > > + if (in == 2) > > +return MAX (ix86_cost->hard_register.mask_load[index], > > +ix86_cost->hard_register.mask_store[index]); > > + return in ? ix86_cost->hard_register.mask_load[2] > > +: ix86_cost->hard_register.mask_store[2]; > > +} > > > > Are DImode loads and stores assumed to cost the same as SImode? A > > comment would be nice here. > > > > Yes, comment is added. > > > Uros. > > Update patch. Patch is OK (no comment on costs, though). Thanks, Uros.
Re: [PATCH 1/4][PR target/88808]Enable bitwise operator for AVX512 masks.
On Mon, Aug 17, 2020 at 5:20 PM Uros Bizjak wrote: > > On Fri, Aug 14, 2020 at 10:22 AM Hongtao Liu wrote: > > > > Hi: > > First, since avx512 masks involve both vector isa and general part, > > so i add both maintainers to the maillist. > > > > I'm doing this in 4 steps: > > 1 - Add cost model for operation of mask registers. > > 2 - Introduce new cover class INT_MASK_REGS, this will enable direct > > move between gpr and mask registers in pass_reload by consideration of > > cost model, this is similar as INT_SSE_REGS. > > 3 - Tune cost model. > > 4 - Enable operator or/xor/and/andn/not for mask register. kxnor is > > not enabled since there's no corresponding instruction for general > > registers, 64bit mask op is not enabled for 32bit target. > > kadd/kshift/ktest are not merged into general versionsadd/ashl/test > > since i think it would be odd to use mask register for those > > operations. > > > > Bootstrap is ok, regression test is ok for i386/x86-64 result. > > There's some improvement for performance of SPEC2017 tested on SKL, > > i observe there're many spills from integer to mask registers instead > > of memory which is the reason for the improvement. > > + if (MASK_CLASS_P (regclass)) > +{ > + int index; > + switch (GET_MODE_SIZE (mode)) > +{ > +case 1: > + index = 0; > + break; > +case 2: > + index = 1; > + break; > +default: > + index = 3; > > Max index = 2! > Fix typo. > + break; > +} > + > + if (in == 2) > +return MAX (ix86_cost->hard_register.mask_load[index], > +ix86_cost->hard_register.mask_store[index]); > + return in ? ix86_cost->hard_register.mask_load[2] > +: ix86_cost->hard_register.mask_store[2]; > +} > > Are DImode loads and stores assumed to cost the same as SImode? A > comment would be nice here. > Yes, comment is added. > Uros. Update patch. -- BR, Hongtao From 70e9e389d751c79caf957ef336dded34726f0533 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 3 Sep 2019 14:41:02 -0700 Subject: [PATCH 1/4] x86: Add cost model for operation of mask registers. gcc/ PR target/71453 * config/i386/i386.h (struct processor_costs): Add member mask_to_integer, integer_to_mask, mask_load[3], mask_store[3], mask_move. * config/i386/x86-tune-costs.h (ix86_size_cost, i386_cost, i386_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, bdver_cost, znver1_cost, znver2_cost, skylake_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost, atom_cost, slm_cost, intel_cost, generic_cost, core_cost): Initialize mask_load[3], mask_store[3], mask_move, integer_to_mask, mask_to_integer for all target costs. * config/i386/i386.c (ix86_register_move_cost): Using cost model of mask registers. (inline_memory_move_cost): Ditto. (ix86_register_move_cost): Ditto. --- gcc/config/i386/i386.c | 34 gcc/config/i386/i386.h | 7 ++ gcc/config/i386/x86-tune-costs.h | 144 +++ 3 files changed, 185 insertions(+) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8ea6a4d7ea7..f5e824a16ad 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18769,6 +18769,29 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) return in ? ix86_cost->hard_register.sse_load [index] : ix86_cost->hard_register.sse_store [index]; } + if (MASK_CLASS_P (regclass)) +{ + int index; + switch (GET_MODE_SIZE (mode)) + { + case 1: + index = 0; + break; + case 2: + index = 1; + break; + /* DImode loads and stores assumed to cost the same as SImode. */ + default: + index = 2; + break; + } + + if (in == 2) + return MAX (ix86_cost->hard_register.mask_load[index], + ix86_cost->hard_register.mask_store[index]); + return in ? ix86_cost->hard_register.mask_load[2] + : ix86_cost->hard_register.mask_store[2]; +} if (MMX_CLASS_P (regclass)) { int index; @@ -18894,6 +18917,17 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, ? ix86_cost->hard_register.sse_to_integer : ix86_cost->hard_register.integer_to_sse); + /* Moves between mask register and GPR. */ + if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) +{ + return (MASK_CLASS_P (class1) + ? ix86_cost->hard_register.mask_to_integer + : ix86_cost->hard_register.integer_to_mask); +} + /* Moving between mask registers. */ + if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) +return ix86_cost->hard_register.mask_move; + if (MAYBE_FLOAT_CLASS_P (class1)) return ix86_cost->hard_register.fp_move; if (MAYBE_SSE_CLASS_P (class1)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 114967e49a3..e0af87450b8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -279,6 +279,13 @@ struct processor_costs { in SImode,
Re: [PATCH 1/4][PR target/88808]Enable bitwise operator for AVX512 masks.
On Fri, Aug 14, 2020 at 10:22 AM Hongtao Liu wrote: > > Hi: > First, since avx512 masks involve both vector isa and general part, > so i add both maintainers to the maillist. > > I'm doing this in 4 steps: > 1 - Add cost model for operation of mask registers. > 2 - Introduce new cover class INT_MASK_REGS, this will enable direct > move between gpr and mask registers in pass_reload by consideration of > cost model, this is similar as INT_SSE_REGS. > 3 - Tune cost model. > 4 - Enable operator or/xor/and/andn/not for mask register. kxnor is > not enabled since there's no corresponding instruction for general > registers, 64bit mask op is not enabled for 32bit target. > kadd/kshift/ktest are not merged into general versionsadd/ashl/test > since i think it would be odd to use mask register for those > operations. > > Bootstrap is ok, regression test is ok for i386/x86-64 result. > There's some improvement for performance of SPEC2017 tested on SKL, > i observe there're many spills from integer to mask registers instead > of memory which is the reason for the improvement. + if (MASK_CLASS_P (regclass)) +{ + int index; + switch (GET_MODE_SIZE (mode)) +{ +case 1: + index = 0; + break; +case 2: + index = 1; + break; +default: + index = 3; Max index = 2! + break; +} + + if (in == 2) +return MAX (ix86_cost->hard_register.mask_load[index], +ix86_cost->hard_register.mask_store[index]); + return in ? ix86_cost->hard_register.mask_load[2] +: ix86_cost->hard_register.mask_store[2]; +} Are DImode loads and stores assumed to cost the same as SImode? A comment would be nice here. Uros.
[PATCH 1/4][PR target/88808]Enable bitwise operator for AVX512 masks.
Hi: First, since avx512 masks involve both vector isa and general part, so i add both maintainers to the maillist. I'm doing this in 4 steps: 1 - Add cost model for operation of mask registers. 2 - Introduce new cover class INT_MASK_REGS, this will enable direct move between gpr and mask registers in pass_reload by consideration of cost model, this is similar as INT_SSE_REGS. 3 - Tune cost model. 4 - Enable operator or/xor/and/andn/not for mask register. kxnor is not enabled since there's no corresponding instruction for general registers, 64bit mask op is not enabled for 32bit target. kadd/kshift/ktest are not merged into general versionsadd/ashl/test since i think it would be odd to use mask register for those operations. Bootstrap is ok, regression test is ok for i386/x86-64 result. There's some improvement for performance of SPEC2017 tested on SKL, i observe there're many spills from integer to mask registers instead of memory which is the reason for the improvement. 500.perlbench_r 0.65% 502.gcc_r 0.32% 505.mcf_r -0.75% 520.omnetpp_r 2.15% 523.xalancbmk_r -0.95% 525.x264_r 1.83% 531.deepsjeng_r 0.00% 541.leela_r 0.66% 548.exchange2_r -0.45% 557.xz_r -0.94% INT geomean 0.25% 503.bwaves_r 0.00% 507.cactuBSSN_r 0.78% 508.namd_r 0.34% 510.parest_r 0.16% 511.povray_r 1.37% 519.lbm_r 1.33% 521.wrf_r 0.04% 526.blender_r 0.15% 527.cam4_r 0.69% 538.imagick_r 0.51% 544.nab_r 0.27% 549.fotonik3d_r 2.00% 554.roms_r 4.35% FP geomean 0.99% -- BR, Hongtao From 7a8393426bbd54b0906846a9f29833cf068732d5 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 3 Sep 2019 14:41:02 -0700 Subject: [PATCH 1/4] x86: Add cost model for operation of mask registers. gcc/ PR target/71453 * config/i386/i386.h (struct processor_costs): Add member mask_to_integer, integer_to_mask, mask_load[3], mask_store[3], mask_move. * config/i386/x86-tune-costs.h (ix86_size_cost, i386_cost, i386_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, bdver_cost, znver1_cost, znver2_cost, skylake_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost, atom_cost, slm_cost, intel_cost, generic_cost, core_cost): Initialize mask_load[3], mask_store[3], mask_move, integer_to_mask, mask_to_integer for all target costs. * config/i386/i386.c (ix86_register_move_cost): Using cost model of mask registers. (inline_memory_move_cost): Ditto. (ix86_register_move_cost): Ditto. --- gcc/config/i386/i386.c | 33 +++ gcc/config/i386/i386.h | 7 ++ gcc/config/i386/x86-tune-costs.h | 144 +++ 3 files changed, 184 insertions(+) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8ea6a4d7ea7..156df77166b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18769,6 +18769,28 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) return in ? ix86_cost->hard_register.sse_load [index] : ix86_cost->hard_register.sse_store [index]; } + if (MASK_CLASS_P (regclass)) +{ + int index; + switch (GET_MODE_SIZE (mode)) + { + case 1: + index = 0; + break; + case 2: + index = 1; + break; + default: + index = 3; + break; + } + + if (in == 2) + return MAX (ix86_cost->hard_register.mask_load[index], + ix86_cost->hard_register.mask_store[index]); + return in ? ix86_cost->hard_register.mask_load[2] + : ix86_cost->hard_register.mask_store[2]; +} if (MMX_CLASS_P (regclass)) { int index; @@ -18894,6 +18916,17 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, ? ix86_cost->hard_register.sse_to_integer : ix86_cost->hard_register.integer_to_sse); + /* Moves between mask register and GPR. */ + if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) +{ + return (MASK_CLASS_P (class1) + ? ix86_cost->hard_register.mask_to_integer + : ix86_cost->hard_register.integer_to_mask); +} + /* Moving between mask registers. */ + if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2)) +return ix86_cost->hard_register.mask_move; + if (MAYBE_FLOAT_CLASS_P (class1)) return ix86_cost->hard_register.fp_move; if (MAYBE_SSE_CLASS_P (class1)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 114967e49a3..e0af87450b8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -279,6 +279,13 @@ struct processor_costs { in SImode, DImode and TImode. */ const int sse_to_integer; /* cost of moving SSE register to integer. */ const int integer_to_sse; /* cost of moving integer register to SSE. */ + const int mask_to_integer; /* cost of moving mask register to integer. */ + const int integer_to_mask; /* cost of moving integer register to mask. */ + const int mask_load[3]; /* cost of loading mask registers + in QImode, HImode and SImode. */ + const int mask_store[3]; /* cost of storing mask register +