Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)
On Wed, 17 Jan 2018, Jakub Jelinek wrote: > On Tue, Jan 16, 2018 at 02:19:16PM +0100, Christophe Lyon wrote: > > I've noticed that this new test fails on arm, eg: > > arm-none-linux-gnueabihf > > --with-mode arm > > --with-cpu cortex-a9 > > --with-fpu neon-fp16 > > FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging > > "Merging successful" 3 (found 0 times) > > Ugh, the problem that arm announces itself as a store_merge target when > it can't do unaligned stores again, so essentially dup of PR83195. > We really shouldn't lie :(. > > Anyway, for now I've checked in the following which matches what I've done > for PR83195. > > Better would be to have store_merge_unaligned and store_merge, where > the former would be current store_merge except for arm, and latter > would be all targets that can perform store merging (isn't that all except > targets that don't have 8-bit chars or pdp endianity)? Ok. > 2018-01-17 Jakub Jelinek > > PR tree-optimization/83843 > * gcc.dg/store_merging_18.c: Don't expect "Merging successful" on arm. > * gcc.dg/store_merging_19.c: New test. > > --- gcc/testsuite/gcc.dg/store_merging_18.c.jj2018-01-16 > 09:52:26.231235131 +0100 > +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-17 12:10:07.862957549 > +0100 > @@ -1,7 +1,7 @@ > /* PR tree-optimization/83843 */ > /* { dg-do run } */ > /* { dg-options "-O2 -fdump-tree-store-merging" } */ > -/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" > { target store_merge } } } */ > +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" > { target { store_merge && { ! arm*-*-* } } } } } */ > > __attribute__((noipa)) void > foo (unsigned char *buf, unsigned char *tab) > --- gcc/testsuite/gcc.dg/store_merging_19.c.jj2018-01-17 > 12:10:34.819962003 +0100 > +++ gcc/testsuite/gcc.dg/store_merging_19.c 2018-01-17 12:13:08.425987375 > +0100 > @@ -0,0 +1,57 @@ > +/* PR tree-optimization/83843 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-store-merging" } */ > +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" > { target store_merge } } } */ > + > +__attribute__((noipa)) void > +foo (unsigned char *buf, unsigned char *tab) > +{ > + tab = __builtin_assume_aligned (tab, 2); > + buf = __builtin_assume_aligned (buf, 2); > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +bar (unsigned char *buf, unsigned char *tab) > +{ > + tab = __builtin_assume_aligned (tab, 2); > + buf = __builtin_assume_aligned (buf, 2); > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = (v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +baz (unsigned char *buf, unsigned char *tab) > +{ > + tab = __builtin_assume_aligned (tab, 2); > + buf = __builtin_assume_aligned (buf, 2); > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = v; > +} > + > +int > +main () > +{ > + volatile unsigned char l1 = 0; > + volatile unsigned char l2 = 1; > + unsigned char buf[2] __attribute__((aligned (2))); > + unsigned char tab[2] __attribute__((aligned (2))) = { l1 + 1, l2 * 2 }; > + foo (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 7; > + buf[1] = l2 * 8; > + bar (buf, tab); > + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 9; > + buf[1] = l2 * 10; > + baz (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) > +__builtin_abort (); > + return 0; > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)
On Tue, Jan 16, 2018 at 02:19:16PM +0100, Christophe Lyon wrote: > I've noticed that this new test fails on arm, eg: > arm-none-linux-gnueabihf > --with-mode arm > --with-cpu cortex-a9 > --with-fpu neon-fp16 > FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging > "Merging successful" 3 (found 0 times) Ugh, the problem that arm announces itself as a store_merge target when it can't do unaligned stores again, so essentially dup of PR83195. We really shouldn't lie :(. Anyway, for now I've checked in the following which matches what I've done for PR83195. Better would be to have store_merge_unaligned and store_merge, where the former would be current store_merge except for arm, and latter would be all targets that can perform store merging (isn't that all except targets that don't have 8-bit chars or pdp endianity)? 2018-01-17 Jakub Jelinek PR tree-optimization/83843 * gcc.dg/store_merging_18.c: Don't expect "Merging successful" on arm. * gcc.dg/store_merging_19.c: New test. --- gcc/testsuite/gcc.dg/store_merging_18.c.jj 2018-01-16 09:52:26.231235131 +0100 +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-17 12:10:07.862957549 +0100 @@ -1,7 +1,7 @@ /* PR tree-optimization/83843 */ /* { dg-do run } */ /* { dg-options "-O2 -fdump-tree-store-merging" } */ -/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target store_merge } } } */ +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target { store_merge && { ! arm*-*-* } } } } } */ __attribute__((noipa)) void foo (unsigned char *buf, unsigned char *tab) --- gcc/testsuite/gcc.dg/store_merging_19.c.jj 2018-01-17 12:10:34.819962003 +0100 +++ gcc/testsuite/gcc.dg/store_merging_19.c 2018-01-17 12:13:08.425987375 +0100 @@ -0,0 +1,57 @@ +/* PR tree-optimization/83843 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target store_merge } } } */ + +__attribute__((noipa)) void +foo (unsigned char *buf, unsigned char *tab) +{ + tab = __builtin_assume_aligned (tab, 2); + buf = __builtin_assume_aligned (buf, 2); + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +bar (unsigned char *buf, unsigned char *tab) +{ + tab = __builtin_assume_aligned (tab, 2); + buf = __builtin_assume_aligned (buf, 2); + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = (v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +baz (unsigned char *buf, unsigned char *tab) +{ + tab = __builtin_assume_aligned (tab, 2); + buf = __builtin_assume_aligned (buf, 2); + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = v; +} + +int +main () +{ + volatile unsigned char l1 = 0; + volatile unsigned char l2 = 1; + unsigned char buf[2] __attribute__((aligned (2))); + unsigned char tab[2] __attribute__((aligned (2))) = { l1 + 1, l2 * 2 }; + foo (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) +__builtin_abort (); + buf[0] = l1 + 7; + buf[1] = l2 * 8; + bar (buf, tab); + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) +__builtin_abort (); + buf[0] = l1 + 9; + buf[1] = l2 * 10; + baz (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) +__builtin_abort (); + return 0; +} Jakub
Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)
On 15 January 2018 at 22:44, Jakub Jelinek wrote: > Hi! > > When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in > the middle, but due to the way process_store handles those it can appear > around the value, which is something output_merged_store didn't handle. > > Fixed thusly, where we handle not just the case when the bswap (or nop) > value needs inversion as whole, but also cases where only a few portions of > it need xoring with some mask. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-01-15 Jakub Jelinek > > PR tree-optimization/83843 > * gimple-ssa-store-merging.c > (imm_store_chain_info::output_merged_store): Handle bit_not_p on > store_immediate_info for bswap/nop orig_stores. > > * gcc.dg/store_merging_18.c: New test. > Hi Jakub, I've noticed that this new test fails on arm, eg: arm-none-linux-gnueabihf --with-mode arm --with-cpu cortex-a9 --with-fpu neon-fp16 FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging "Merging successful" 3 (found 0 times) Do you want me to file a PR? Christophe > --- gcc/gimple-ssa-store-merging.c.jj 2018-01-04 00:43:17.629703230 +0100 > +++ gcc/gimple-ssa-store-merging.c 2018-01-15 12:29:14.105789381 +0100 > @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor > gimple_seq_add_stmt_without_update (&seq, stmt); > src = gimple_assign_lhs (stmt); > } > + inv_op = invert_op (split_store, 2, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > + { > + stmt = gimple_build_assign (make_ssa_name (int_type), > + inv_op, src, xor_mask); > + gimple_set_location (stmt, loc); > + gimple_seq_add_stmt_without_update (&seq, stmt); > + src = gimple_assign_lhs (stmt); > + } > break; > default: > src = ops[0]; > --- gcc/testsuite/gcc.dg/store_merging_18.c.jj 2018-01-15 12:43:49.607227365 > +0100 > +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 > +0100 > @@ -0,0 +1,51 @@ > +/* PR tree-optimization/83843 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-store-merging" } */ > +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" > { target store_merge } } } */ > + > +__attribute__((noipa)) void > +foo (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +bar (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = (v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +baz (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = v; > +} > + > +int > +main () > +{ > + volatile unsigned char l1 = 0; > + volatile unsigned char l2 = 1; > + unsigned char buf[2]; > + unsigned char tab[2] = { l1 + 1, l2 * 2 }; > + foo (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 7; > + buf[1] = l2 * 8; > + bar (buf, tab); > + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 9; > + buf[1] = l2 * 10; > + baz (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) > +__builtin_abort (); > + return 0; > +} > > Jakub
Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)
On Mon, 15 Jan 2018, Jakub Jelinek wrote: > Hi! > > When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in > the middle, but due to the way process_store handles those it can appear > around the value, which is something output_merged_store didn't handle. > > Fixed thusly, where we handle not just the case when the bswap (or nop) > value needs inversion as whole, but also cases where only a few portions of > it need xoring with some mask. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Richard. > 2018-01-15 Jakub Jelinek > > PR tree-optimization/83843 > * gimple-ssa-store-merging.c > (imm_store_chain_info::output_merged_store): Handle bit_not_p on > store_immediate_info for bswap/nop orig_stores. > > * gcc.dg/store_merging_18.c: New test. > > --- gcc/gimple-ssa-store-merging.c.jj 2018-01-04 00:43:17.629703230 +0100 > +++ gcc/gimple-ssa-store-merging.c2018-01-15 12:29:14.105789381 +0100 > @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor > gimple_seq_add_stmt_without_update (&seq, stmt); > src = gimple_assign_lhs (stmt); > } > + inv_op = invert_op (split_store, 2, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > + { > + stmt = gimple_build_assign (make_ssa_name (int_type), > + inv_op, src, xor_mask); > + gimple_set_location (stmt, loc); > + gimple_seq_add_stmt_without_update (&seq, stmt); > + src = gimple_assign_lhs (stmt); > + } > break; > default: > src = ops[0]; > --- gcc/testsuite/gcc.dg/store_merging_18.c.jj2018-01-15 > 12:43:49.607227365 +0100 > +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 > +0100 > @@ -0,0 +1,51 @@ > +/* PR tree-optimization/83843 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-store-merging" } */ > +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" > { target store_merge } } } */ > + > +__attribute__((noipa)) void > +foo (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +bar (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = (v >> 8); > + buf[1] = ~v; > +} > + > +__attribute__((noipa)) void > +baz (unsigned char *buf, unsigned char *tab) > +{ > + unsigned v = tab[1] ^ (tab[0] << 8); > + buf[0] = ~(v >> 8); > + buf[1] = v; > +} > + > +int > +main () > +{ > + volatile unsigned char l1 = 0; > + volatile unsigned char l2 = 1; > + unsigned char buf[2]; > + unsigned char tab[2] = { l1 + 1, l2 * 2 }; > + foo (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 7; > + buf[1] = l2 * 8; > + bar (buf, tab); > + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) > +__builtin_abort (); > + buf[0] = l1 + 9; > + buf[1] = l2 * 10; > + baz (buf, tab); > + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) > +__builtin_abort (); > + return 0; > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
[PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)
Hi! When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in the middle, but due to the way process_store handles those it can appear around the value, which is something output_merged_store didn't handle. Fixed thusly, where we handle not just the case when the bswap (or nop) value needs inversion as whole, but also cases where only a few portions of it need xoring with some mask. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-01-15 Jakub Jelinek PR tree-optimization/83843 * gimple-ssa-store-merging.c (imm_store_chain_info::output_merged_store): Handle bit_not_p on store_immediate_info for bswap/nop orig_stores. * gcc.dg/store_merging_18.c: New test. --- gcc/gimple-ssa-store-merging.c.jj 2018-01-04 00:43:17.629703230 +0100 +++ gcc/gimple-ssa-store-merging.c 2018-01-15 12:29:14.105789381 +0100 @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor gimple_seq_add_stmt_without_update (&seq, stmt); src = gimple_assign_lhs (stmt); } + inv_op = invert_op (split_store, 2, int_type, xor_mask); + if (inv_op != NOP_EXPR) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + inv_op, src, xor_mask); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } break; default: src = ops[0]; --- gcc/testsuite/gcc.dg/store_merging_18.c.jj 2018-01-15 12:43:49.607227365 +0100 +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 +0100 @@ -0,0 +1,51 @@ +/* PR tree-optimization/83843 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target store_merge } } } */ + +__attribute__((noipa)) void +foo (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +bar (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = (v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +baz (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = v; +} + +int +main () +{ + volatile unsigned char l1 = 0; + volatile unsigned char l2 = 1; + unsigned char buf[2]; + unsigned char tab[2] = { l1 + 1, l2 * 2 }; + foo (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) +__builtin_abort (); + buf[0] = l1 + 7; + buf[1] = l2 * 8; + bar (buf, tab); + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) +__builtin_abort (); + buf[0] = l1 + 9; + buf[1] = l2 * 10; + baz (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) +__builtin_abort (); + return 0; +} Jakub