r12-5102-gfb161782545224f5 improves integer bit test on __atomic_fetch_[or|and]_* returns only for nop_convert, .i.e.
transfrom mask_5 = 1 << bit_4(D); mask.0_1 = (unsigned int) mask_5; _2 = __atomic_fetch_or_4 (a_7(D), mask.0_1, 0); t1_9 = (int) _2; t2_10 = mask_5 & t1_9; to mask_5 = 1 << n_4(D); mask.1_1 = (unsigned int) mask_5; _11 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_1_4, n_4(D), 0); _8 = (int) _11; And this patch extend the original patch to handle truncation. .i.e. transform long int mask; mask_8 = 1 << n_7(D); mask.0_1 = (long unsigned int) mask_8; _2 = __sync_fetch_and_or_8 (&pscc_a_2_3, mask.0_1); _3 = (unsigned int) _2; _4 = (unsigned int) mask_8; _5 = _3 & _4; _6 = (int) _5; to long int mask; mask_8 = 1 << n_7(D); mask.0_1 = (long unsigned int) mask_8; _14 = .ATOMIC_BIT_TEST_AND_SET (&pscc_a_2_3, n_7(D), 0); _5 = (unsigned int) _14; _6 = (int) _5; Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} Ok for trunk? 2021-11-17 Hongtao Liu <hongtao....@intel.com> H.J. Lu <hongjiu...@intel.com> gcc/ChangeLog: PR tree-optimization/103194 * match.pd (gimple_nop_atomic_bit_test_and_p): Extended to match truncation. * tree-ssa-ccp.c (gimple_nop_convert): Declare. (optimize_atomic_bit_test_and): Enhance optimize_atomic_bit_test_and to handle truncation. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103194-2.c: New test. * gcc.target/i386/pr103194-3.c: New test. * gcc.target/i386/pr103194-4.c: New test. * gcc.target/i386/pr103194-5.c: New test. * gcc.target/i386/pr103194.c: New test. --- gcc/match.pd | 48 ++++++----- gcc/testsuite/gcc.target/i386/pr103194-2.c | 64 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr103194-3.c | 64 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr103194-4.c | 61 +++++++++++++ gcc/testsuite/gcc.target/i386/pr103194-5.c | 61 +++++++++++++ gcc/testsuite/gcc.target/i386/pr103194.c | 16 ++++ gcc/tree-ssa-ccp.c | 99 +++++++++++----------- 7 files changed, 345 insertions(+), 68 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103194-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103194.c diff --git a/gcc/match.pd b/gcc/match.pd index 7f76925b6c6..6c68534fff5 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4021,39 +4021,43 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) #if GIMPLE (match (nop_atomic_bit_test_and_p @0 @1 @4) - (bit_and (nop_convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3)) + (bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3)) INTEGER_CST@1) (with { int ibit = tree_log2 (@0); int ibit2 = tree_log2 (@1); } (if (ibit == ibit2 - && ibit >= 0)))) + && ibit >= 0 + && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))) (match (nop_atomic_bit_test_and_p @0 @1 @3) - (bit_and (nop_convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0)) + (bit_and (convert?@3 (SYNC_FETCH_OR_XOR_N @2 INTEGER_CST@0)) INTEGER_CST@1) (with { int ibit = tree_log2 (@0); int ibit2 = tree_log2 (@1); } (if (ibit == ibit2 - && ibit >= 0)))) + && ibit >= 0 + && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))) (match (nop_atomic_bit_test_and_p @0 @0 @4) (bit_and:c - (nop_convert?@4 + (convert1?@4 (ATOMIC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@5 @6)) @3)) - @0)) + (convert2? @0)) + (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))) (match (nop_atomic_bit_test_and_p @0 @0 @4) (bit_and:c - (nop_convert?@4 + (convert1?@4 (SYNC_FETCH_OR_XOR_N @2 (nop_convert? (lshift@0 integer_onep@3 @5)))) - @0)) + (convert2? @0)) + (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))) (match (nop_atomic_bit_test_and_p @0 @1 @3) - (bit_and@4 (nop_convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5)) + (bit_and@4 (convert?@3 (ATOMIC_FETCH_AND_N @2 INTEGER_CST@0 @5)) INTEGER_CST@1) (with { int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)), @@ -4061,11 +4065,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) int ibit2 = tree_log2 (@1); } (if (ibit == ibit2 - && ibit >= 0)))) + && ibit >= 0 + && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))) (match (nop_atomic_bit_test_and_p @0 @1 @3) (bit_and@4 - (nop_convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0)) + (convert?@3 (SYNC_FETCH_AND_AND_N @2 INTEGER_CST@0)) INTEGER_CST@1) (with { int ibit = wi::exact_log2 (wi::zext (wi::bit_not (wi::to_wide (@0)), @@ -4073,19 +4078,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) int ibit2 = tree_log2 (@1); } (if (ibit == ibit2 - && ibit >= 0)))) + && ibit >= 0 + && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2)))))) -(match (nop_atomic_bit_test_and_p @0 @0 @3) +(match (nop_atomic_bit_test_and_p @4 @0 @3) (bit_and:c - (nop_convert?@3 - (ATOMIC_FETCH_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))) @5)) - @0)) + (convert1?@3 + (ATOMIC_FETCH_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7))) @5)) + (convert2? @0)) + (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))) -(match (nop_atomic_bit_test_and_p @0 @0 @3) +(match (nop_atomic_bit_test_and_p @4 @0 @3) (bit_and:c - (nop_convert?@3 - (SYNC_FETCH_AND_AND_N @2 (nop_convert? (bit_not (lshift@0 integer_onep@6 @7))))) - @0)) + (convert1?@3 + (SYNC_FETCH_AND_AND_N @2 (nop_convert?@4 (bit_not (lshift@0 integer_onep@6 @7))))) + (convert2? @0)) + (if (TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@2))))) #endif diff --git a/gcc/testsuite/gcc.target/i386/pr103194-2.c b/gcc/testsuite/gcc.target/i386/pr103194-2.c new file mode 100644 index 00000000000..1a991fe0199 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103194-2.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +#include <stdatomic.h> +#include <stdbool.h> + +#define FOO(RTYPE,TYPE,MASK) \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_or (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_xor (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_xor_and_fetch (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_and (a, ~mask) & mask; \ + } \ + +FOO(char, short, 0); +FOO(char, short, 7); +FOO(short, int, 0); +FOO(short, int, 15); + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 8 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 16 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 8 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103194-3.c b/gcc/testsuite/gcc.target/i386/pr103194-3.c new file mode 100644 index 00000000000..4907598bbd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103194-3.c @@ -0,0 +1,64 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +#include <stdatomic.h> +#include <stdbool.h> +typedef long long int64; + +#define FOO(RTYPE, TYPE,MASK) \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_or (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_xor (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_xor_and_fetch (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_and (a, ~mask) & mask; \ + } \ + + +FOO(int, int64, 1); +FOO(int, int64, 31); + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 4 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 8 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 4 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103194-4.c b/gcc/testsuite/gcc.target/i386/pr103194-4.c new file mode 100644 index 00000000000..8573016c5d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103194-4.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +#include <stdatomic.h> +#include <stdbool.h> + +#define FOO(RTYPE,TYPE) \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_or (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_xor (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_xor_and_fetch (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1 << MASK; \ + return __sync_fetch_and_and (a, ~mask) & mask; \ + } \ + +FOO(short, int); + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103194-5.c b/gcc/testsuite/gcc.target/i386/pr103194-5.c new file mode 100644 index 00000000000..dfaddf0aa6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103194-5.c @@ -0,0 +1,61 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +#include <stdatomic.h> +#include <stdbool.h> + +#define FOO(RTYPE,TYPE) \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_xor_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_xor_fetch (a, mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + atomic_fetch_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_or_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_or (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_xor_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_xor (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_xor_and_fetch_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_xor_and_fetch (a, mask) & mask; \ + } \ + __attribute__((noinline,noclone)) RTYPE \ + sync_fetch_and_and_##TYPE##_##MASK (_Atomic TYPE* a, TYPE MASK) \ + { \ + TYPE mask = 1ll << MASK; \ + return __sync_fetch_and_and (a, ~mask) & mask; \ + } \ + +FOO(int, long); + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 2 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 4 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 2 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103194.c b/gcc/testsuite/gcc.target/i386/pr103194.c new file mode 100644 index 00000000000..a6d84332e4d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103194.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +long pscc_a_2_3; +int pscc_a_1_4; +void pscc() +{ + pscc_a_1_4 = __sync_fetch_and_and(&pscc_a_2_3, 1); +} + +static int si; +long +test_types (long n) +{ + unsigned int u2 = __atomic_fetch_xor (&si, 0, 5); + return u2; +} diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index 18d57729d8a..9e12da8f011 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -3326,6 +3326,7 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, */ extern bool gimple_nop_atomic_bit_test_and_p (tree, tree *, tree (*) (tree)); +extern bool gimple_nop_convert (tree, tree*, tree (*) (tree)); /* Optimize mask_2 = 1 << cnt_1; @@ -3462,16 +3463,16 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, ibit = 0; } else if (TYPE_PRECISION (TREE_TYPE (use_lhs)) - == TYPE_PRECISION (TREE_TYPE (use_rhs))) + <= TYPE_PRECISION (TREE_TYPE (use_rhs))) { gimple *use_nop_stmt; if (!single_imm_use (use_lhs, &use_p, &use_nop_stmt) || !is_gimple_assign (use_nop_stmt)) return; + tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt); rhs_code = gimple_assign_rhs_code (use_nop_stmt); if (rhs_code != BIT_AND_EXPR) { - tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt); if (TREE_CODE (use_nop_lhs) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_nop_lhs)) return; @@ -3584,24 +3585,23 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, } else { - tree and_expr = gimple_assign_lhs (use_nop_stmt); tree match_op[3]; gimple *g; - if (!gimple_nop_atomic_bit_test_and_p (and_expr, + if (!gimple_nop_atomic_bit_test_and_p (use_nop_lhs, &match_op[0], NULL) || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (match_op[2]) || !single_imm_use (match_op[2], &use_p, &g) || !is_gimple_assign (g)) return; - mask = match_op[1]; - if (TREE_CODE (mask) == INTEGER_CST) + mask = match_op[0]; + if (TREE_CODE (match_op[1]) == INTEGER_CST) { - ibit = tree_log2 (mask); + ibit = tree_log2 (match_op[1]); gcc_assert (ibit >= 0); } else { - g = SSA_NAME_DEF_STMT (mask); + g = SSA_NAME_DEF_STMT (match_op[1]); gcc_assert (is_gimple_assign (g)); bit = gimple_assign_rhs2 (g); } @@ -3623,19 +3623,30 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, _1 = __atomic_fetch_and_* (ptr_6, ~mask_7, _3); _12 = _3 & mask_7; _5 = (int) _12; - */ - replace_uses_by (use_lhs, lhs); - tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt); - var = make_ssa_name (TREE_TYPE (use_nop_lhs)); - gimple_assign_set_lhs (use_nop_stmt, var); + + and Convert + _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3); + _2 = (short int) _1; + _5 = _2 & mask; + to + _1 = __atomic_fetch_and_4 (ptr_6, ~mask, _3); + _8 = _1 & mask; + _5 = (short int) _8; + */ + gimple_seq stmts = NULL; + match_op[1] = gimple_convert (&stmts, + TREE_TYPE (use_rhs), + match_op[1]); + var = gimple_build (&stmts, BIT_AND_EXPR, + TREE_TYPE (use_rhs), use_rhs, match_op[1]); gsi = gsi_for_stmt (use_stmt); gsi_remove (&gsi, true); release_defs (use_stmt); - gsi_remove (gsip, true); - g = gimple_build_assign (use_nop_lhs, NOP_EXPR, var); + use_stmt = gimple_seq_last_stmt (stmts); gsi = gsi_for_stmt (use_nop_stmt); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - use_stmt = use_nop_stmt; + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + gimple_assign_set_rhs_with_ops (&gsi, CONVERT_EXPR, var); + update_stmt (use_nop_stmt); } } else @@ -3671,55 +3682,47 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, else if (TREE_CODE (mask) == SSA_NAME) { gimple *g = SSA_NAME_DEF_STMT (mask); - if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + tree match_op; + if (gimple_nop_convert (mask, &match_op, NULL)) { - if (!is_gimple_assign (g) - || gimple_assign_rhs_code (g) != BIT_NOT_EXPR) - return; - mask = gimple_assign_rhs1 (g); + mask = match_op; if (TREE_CODE (mask) != SSA_NAME) return; g = SSA_NAME_DEF_STMT (mask); } if (!is_gimple_assign (g)) return; - rhs_code = gimple_assign_rhs_code (g); - if (rhs_code != LSHIFT_EXPR) - { - if (rhs_code != NOP_EXPR) - return; - /* Handle - _1 = 1 << bit_4(D); - mask_5 = (unsigned int) _1; - _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0); - _3 = _2 & mask_5; - */ - tree nop_lhs = gimple_assign_lhs (g); - tree nop_rhs = gimple_assign_rhs1 (g); - if (TYPE_PRECISION (TREE_TYPE (nop_lhs)) - != TYPE_PRECISION (TREE_TYPE (nop_rhs))) + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + { + if (gimple_assign_rhs_code (g) != BIT_NOT_EXPR) return; - g = SSA_NAME_DEF_STMT (nop_rhs); - if (!is_gimple_assign (g) - || gimple_assign_rhs_code (g) != LSHIFT_EXPR) + mask = gimple_assign_rhs1 (g); + if (TREE_CODE (mask) != SSA_NAME) return; + g = SSA_NAME_DEF_STMT (mask); } - if (!integer_onep (gimple_assign_rhs1 (g))) + + rhs_code = gimple_assign_rhs_code (g); + if (rhs_code != LSHIFT_EXPR + || !integer_onep (gimple_assign_rhs1 (g))) return; bit = gimple_assign_rhs2 (g); } else return; + tree cmp_mask; if (gimple_assign_rhs1 (use_stmt) == lhs) - { - if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0)) - return; - } - else if (gimple_assign_rhs2 (use_stmt) != lhs - || !operand_equal_p (gimple_assign_rhs1 (use_stmt), - mask, 0)) + cmp_mask = gimple_assign_rhs2 (use_stmt); + else + cmp_mask = gimple_assign_rhs1 (use_stmt); + + tree match_op; + if (gimple_nop_convert (cmp_mask, &match_op, NULL)) + cmp_mask = match_op; + + if (!operand_equal_p (cmp_mask, mask, 0)) return; } -- 2.18.1