Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-17 Thread Richard Biener
On Wed, 17 Jan 2018, Jakub Jelinek wrote:

> On Tue, Jan 16, 2018 at 02:19:16PM +0100, Christophe Lyon wrote:
> > I've noticed that this new test fails on arm, eg:
> > arm-none-linux-gnueabihf
> > --with-mode arm
> > --with-cpu cortex-a9
> > --with-fpu neon-fp16
> > FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging
> > "Merging successful" 3 (found 0 times)
> 
> Ugh, the problem that arm announces itself as a store_merge target when
> it can't do unaligned stores again, so essentially dup of PR83195.
> We really shouldn't lie :(.
> 
> Anyway, for now I've checked in the following which matches what I've done
> for PR83195.
> 
> Better would be to have store_merge_unaligned and store_merge, where
> the former would be current store_merge except for arm, and latter
> would be all targets that can perform store merging (isn't that all except
> targets that don't have 8-bit chars or pdp endianity)?

Ok.

> 2018-01-17  Jakub Jelinek  
> 
>   PR tree-optimization/83843
>   * gcc.dg/store_merging_18.c: Don't expect "Merging successful" on arm.
>   * gcc.dg/store_merging_19.c: New test.
> 
> --- gcc/testsuite/gcc.dg/store_merging_18.c.jj2018-01-16 
> 09:52:26.231235131 +0100
> +++ gcc/testsuite/gcc.dg/store_merging_18.c   2018-01-17 12:10:07.862957549 
> +0100
> @@ -1,7 +1,7 @@
>  /* PR tree-optimization/83843 */
>  /* { dg-do run } */
>  /* { dg-options "-O2 -fdump-tree-store-merging" } */
> -/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target store_merge } } } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target { store_merge && { ! arm*-*-* } } } } } */
>  
>  __attribute__((noipa)) void
>  foo (unsigned char *buf, unsigned char *tab)
> --- gcc/testsuite/gcc.dg/store_merging_19.c.jj2018-01-17 
> 12:10:34.819962003 +0100
> +++ gcc/testsuite/gcc.dg/store_merging_19.c   2018-01-17 12:13:08.425987375 
> +0100
> @@ -0,0 +1,57 @@
> +/* PR tree-optimization/83843 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target store_merge } } } */
> +
> +__attribute__((noipa)) void
> +foo (unsigned char *buf, unsigned char *tab)
> +{
> +  tab = __builtin_assume_aligned (tab, 2);
> +  buf = __builtin_assume_aligned (buf, 2);
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +bar (unsigned char *buf, unsigned char *tab)
> +{
> +  tab = __builtin_assume_aligned (tab, 2);
> +  buf = __builtin_assume_aligned (buf, 2);
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = (v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +baz (unsigned char *buf, unsigned char *tab)
> +{
> +  tab = __builtin_assume_aligned (tab, 2);
> +  buf = __builtin_assume_aligned (buf, 2);
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = v;
> +}
> +
> +int
> +main ()
> +{
> +  volatile unsigned char l1 = 0;
> +  volatile unsigned char l2 = 1;
> +  unsigned char buf[2] __attribute__((aligned (2)));
> +  unsigned char tab[2] __attribute__((aligned (2))) = { l1 + 1, l2 * 2 };
> +  foo (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 7;
> +  buf[1] = l2 * 8;
> +  bar (buf, tab);
> +  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 9;
> +  buf[1] = l2 * 10;
> +  baz (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
> +__builtin_abort ();
> +  return 0;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-17 Thread Jakub Jelinek
On Tue, Jan 16, 2018 at 02:19:16PM +0100, Christophe Lyon wrote:
> I've noticed that this new test fails on arm, eg:
> arm-none-linux-gnueabihf
> --with-mode arm
> --with-cpu cortex-a9
> --with-fpu neon-fp16
> FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging
> "Merging successful" 3 (found 0 times)

Ugh, the problem that arm announces itself as a store_merge target when
it can't do unaligned stores again, so essentially dup of PR83195.
We really shouldn't lie :(.

Anyway, for now I've checked in the following which matches what I've done
for PR83195.

Better would be to have store_merge_unaligned and store_merge, where
the former would be current store_merge except for arm, and latter
would be all targets that can perform store merging (isn't that all except
targets that don't have 8-bit chars or pdp endianity)?

2018-01-17  Jakub Jelinek  

PR tree-optimization/83843
* gcc.dg/store_merging_18.c: Don't expect "Merging successful" on arm.
* gcc.dg/store_merging_19.c: New test.

--- gcc/testsuite/gcc.dg/store_merging_18.c.jj  2018-01-16 09:52:26.231235131 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-17 12:10:07.862957549 
+0100
@@ -1,7 +1,7 @@
 /* PR tree-optimization/83843 */
 /* { dg-do run } */
 /* { dg-options "-O2 -fdump-tree-store-merging" } */
-/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { 
target store_merge } } } */
+/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { 
target { store_merge && { ! arm*-*-* } } } } } */
 
 __attribute__((noipa)) void
 foo (unsigned char *buf, unsigned char *tab)
--- gcc/testsuite/gcc.dg/store_merging_19.c.jj  2018-01-17 12:10:34.819962003 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_19.c 2018-01-17 12:13:08.425987375 
+0100
@@ -0,0 +1,57 @@
+/* PR tree-optimization/83843 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { 
target store_merge } } } */
+
+__attribute__((noipa)) void
+foo (unsigned char *buf, unsigned char *tab)
+{
+  tab = __builtin_assume_aligned (tab, 2);
+  buf = __builtin_assume_aligned (buf, 2);
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = ~(v >> 8);
+  buf[1] = ~v;
+}
+
+__attribute__((noipa)) void
+bar (unsigned char *buf, unsigned char *tab)
+{
+  tab = __builtin_assume_aligned (tab, 2);
+  buf = __builtin_assume_aligned (buf, 2);
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = (v >> 8);
+  buf[1] = ~v;
+}
+
+__attribute__((noipa)) void
+baz (unsigned char *buf, unsigned char *tab)
+{
+  tab = __builtin_assume_aligned (tab, 2);
+  buf = __builtin_assume_aligned (buf, 2);
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = ~(v >> 8);
+  buf[1] = v;
+}
+
+int
+main ()
+{
+  volatile unsigned char l1 = 0;
+  volatile unsigned char l2 = 1;
+  unsigned char buf[2] __attribute__((aligned (2)));
+  unsigned char tab[2] __attribute__((aligned (2))) = { l1 + 1, l2 * 2 };
+  foo (buf, tab);
+  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
+__builtin_abort ();
+  buf[0] = l1 + 7;
+  buf[1] = l2 * 8;
+  bar (buf, tab);
+  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
+__builtin_abort ();
+  buf[0] = l1 + 9;
+  buf[1] = l2 * 10;
+  baz (buf, tab);
+  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
+__builtin_abort ();
+  return 0;
+}

Jakub


Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-16 Thread Christophe Lyon
On 15 January 2018 at 22:44, Jakub Jelinek  wrote:
> Hi!
>
> When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in
> the middle, but due to the way process_store handles those it can appear
> around the value, which is something output_merged_store didn't handle.
>
> Fixed thusly, where we handle not just the case when the bswap (or nop)
> value needs inversion as whole, but also cases where only a few portions of
> it need xoring with some mask.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-01-15  Jakub Jelinek  
>
> PR tree-optimization/83843
> * gimple-ssa-store-merging.c
> (imm_store_chain_info::output_merged_store): Handle bit_not_p on
> store_immediate_info for bswap/nop orig_stores.
>
> * gcc.dg/store_merging_18.c: New test.
>
Hi Jakub,

I've noticed that this new test fails on arm, eg:
arm-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a9
--with-fpu neon-fp16
FAIL: gcc.dg/store_merging_18.c scan-tree-dump-times store-merging
"Merging successful" 3 (found 0 times)

Do you want me to file a PR?

Christophe



> --- gcc/gimple-ssa-store-merging.c.jj   2018-01-04 00:43:17.629703230 +0100
> +++ gcc/gimple-ssa-store-merging.c  2018-01-15 12:29:14.105789381 +0100
> @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor
>   gimple_seq_add_stmt_without_update (&seq, stmt);
>   src = gimple_assign_lhs (stmt);
> }
> + inv_op = invert_op (split_store, 2, int_type, xor_mask);
> + if (inv_op != NOP_EXPR)
> +   {
> + stmt = gimple_build_assign (make_ssa_name (int_type),
> + inv_op, src, xor_mask);
> + gimple_set_location (stmt, loc);
> + gimple_seq_add_stmt_without_update (&seq, stmt);
> + src = gimple_assign_lhs (stmt);
> +   }
>   break;
> default:
>   src = ops[0];
> --- gcc/testsuite/gcc.dg/store_merging_18.c.jj  2018-01-15 12:43:49.607227365 
> +0100
> +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 
> +0100
> @@ -0,0 +1,51 @@
> +/* PR tree-optimization/83843 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target store_merge } } } */
> +
> +__attribute__((noipa)) void
> +foo (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +bar (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = (v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +baz (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = v;
> +}
> +
> +int
> +main ()
> +{
> +  volatile unsigned char l1 = 0;
> +  volatile unsigned char l2 = 1;
> +  unsigned char buf[2];
> +  unsigned char tab[2] = { l1 + 1, l2 * 2 };
> +  foo (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 7;
> +  buf[1] = l2 * 8;
> +  bar (buf, tab);
> +  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 9;
> +  buf[1] = l2 * 10;
> +  baz (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
> +__builtin_abort ();
> +  return 0;
> +}
>
> Jakub


Re: [PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-15 Thread Richard Biener
On Mon, 15 Jan 2018, Jakub Jelinek wrote:

> Hi!
> 
> When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in
> the middle, but due to the way process_store handles those it can appear
> around the value, which is something output_merged_store didn't handle.
> 
> Fixed thusly, where we handle not just the case when the bswap (or nop)
> value needs inversion as whole, but also cases where only a few portions of
> it need xoring with some mask.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard.

> 2018-01-15  Jakub Jelinek  
> 
>   PR tree-optimization/83843
>   * gimple-ssa-store-merging.c
>   (imm_store_chain_info::output_merged_store): Handle bit_not_p on
>   store_immediate_info for bswap/nop orig_stores.
> 
>   * gcc.dg/store_merging_18.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2018-01-04 00:43:17.629703230 +0100
> +++ gcc/gimple-ssa-store-merging.c2018-01-15 12:29:14.105789381 +0100
> @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor
> gimple_seq_add_stmt_without_update (&seq, stmt);
> src = gimple_assign_lhs (stmt);
>   }
> +   inv_op = invert_op (split_store, 2, int_type, xor_mask);
> +   if (inv_op != NOP_EXPR)
> + {
> +   stmt = gimple_build_assign (make_ssa_name (int_type),
> +   inv_op, src, xor_mask);
> +   gimple_set_location (stmt, loc);
> +   gimple_seq_add_stmt_without_update (&seq, stmt);
> +   src = gimple_assign_lhs (stmt);
> + }
> break;
>   default:
> src = ops[0];
> --- gcc/testsuite/gcc.dg/store_merging_18.c.jj2018-01-15 
> 12:43:49.607227365 +0100
> +++ gcc/testsuite/gcc.dg/store_merging_18.c   2018-01-15 12:43:24.882245004 
> +0100
> @@ -0,0 +1,51 @@
> +/* PR tree-optimization/83843 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" 
> { target store_merge } } } */
> +
> +__attribute__((noipa)) void
> +foo (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +bar (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = (v >> 8);
> +  buf[1] = ~v;
> +}
> +
> +__attribute__((noipa)) void
> +baz (unsigned char *buf, unsigned char *tab)
> +{
> +  unsigned v = tab[1] ^ (tab[0] << 8);
> +  buf[0] = ~(v >> 8);
> +  buf[1] = v;
> +}
> +
> +int
> +main ()
> +{
> +  volatile unsigned char l1 = 0;
> +  volatile unsigned char l2 = 1;
> +  unsigned char buf[2];
> +  unsigned char tab[2] = { l1 + 1, l2 * 2 };
> +  foo (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 7;
> +  buf[1] = l2 * 8;
> +  bar (buf, tab);
> +  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
> +__builtin_abort ();
> +  buf[0] = l1 + 9;
> +  buf[1] = l2 * 10;
> +  baz (buf, tab);
> +  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
> +__builtin_abort ();
> +  return 0;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[PATCH] Fix store-merging for ~ of bswap (PR tree-optimization/83843)

2018-01-15 Thread Jakub Jelinek
Hi!

When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in
the middle, but due to the way process_store handles those it can appear
around the value, which is something output_merged_store didn't handle.

Fixed thusly, where we handle not just the case when the bswap (or nop)
value needs inversion as whole, but also cases where only a few portions of
it need xoring with some mask.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-01-15  Jakub Jelinek  

PR tree-optimization/83843
* gimple-ssa-store-merging.c
(imm_store_chain_info::output_merged_store): Handle bit_not_p on
store_immediate_info for bswap/nop orig_stores.

* gcc.dg/store_merging_18.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2018-01-04 00:43:17.629703230 +0100
+++ gcc/gimple-ssa-store-merging.c  2018-01-15 12:29:14.105789381 +0100
@@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor
  gimple_seq_add_stmt_without_update (&seq, stmt);
  src = gimple_assign_lhs (stmt);
}
+ inv_op = invert_op (split_store, 2, int_type, xor_mask);
+ if (inv_op != NOP_EXPR)
+   {
+ stmt = gimple_build_assign (make_ssa_name (int_type),
+ inv_op, src, xor_mask);
+ gimple_set_location (stmt, loc);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+   }
  break;
default:
  src = ops[0];
--- gcc/testsuite/gcc.dg/store_merging_18.c.jj  2018-01-15 12:43:49.607227365 
+0100
+++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 
+0100
@@ -0,0 +1,51 @@
+/* PR tree-optimization/83843 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { 
target store_merge } } } */
+
+__attribute__((noipa)) void
+foo (unsigned char *buf, unsigned char *tab)
+{
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = ~(v >> 8);
+  buf[1] = ~v;
+}
+
+__attribute__((noipa)) void
+bar (unsigned char *buf, unsigned char *tab)
+{
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = (v >> 8);
+  buf[1] = ~v;
+}
+
+__attribute__((noipa)) void
+baz (unsigned char *buf, unsigned char *tab)
+{
+  unsigned v = tab[1] ^ (tab[0] << 8);
+  buf[0] = ~(v >> 8);
+  buf[1] = v;
+}
+
+int
+main ()
+{
+  volatile unsigned char l1 = 0;
+  volatile unsigned char l2 = 1;
+  unsigned char buf[2];
+  unsigned char tab[2] = { l1 + 1, l2 * 2 };
+  foo (buf, tab);
+  if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2)
+__builtin_abort ();
+  buf[0] = l1 + 7;
+  buf[1] = l2 * 8;
+  bar (buf, tab);
+  if (buf[0] != 1 || buf[1] != (unsigned char) ~2)
+__builtin_abort ();
+  buf[0] = l1 + 9;
+  buf[1] = l2 * 10;
+  baz (buf, tab);
+  if (buf[0] != (unsigned char) ~1 || buf[1] != 2)
+__builtin_abort ();
+  return 0;
+}

Jakub