Hi all,
This is a patch to demonstrate some unusual behavior I have encountered in
combine.
A summary of the behaviour is:
when combining A -> B, the register equivalence notes of A are checked, the
register notes of B are not checked.
Is this expected behaviour?
from combine.c:1484 in combine_instructions
/* Try this insn with each REG_EQUAL note it links back to. */
FOR_EACH_LOG_LINK (links, insn)
{
rtx set, note;
rtx_insn *temp = links->insn;
if ((set = single_set (temp)) != 0
&& (note = find_reg_equal_equiv_note (temp)) != 0
&& (note = XEXP (note, 0), GET_CODE (note)) != EXPR_LIST
The register equivalance notes of temp are checked, but the register
equivalence notes of insn are not checked.
To reproduce:
With the patch applied:
Compile the following function
void
bar (float *a, int *b)
{
int i;
for (i = 0; i < 1024; i++)
a[i] = (((float)b[i])/ 4.0f);
}
Combine does not check the REG_EQUAL note on insn 12, and does not try the
equivalent pattern, using a const_vector instead of register 99.
Trying 10 -> 12:
10: r97:V4SF=float(r96:V4SI)
REG_DEAD r96:V4SI
12: r98:V4SF=r97:V4SF*r99:V4SF
REG_DEAD r97:V4SF
REG_EQUAL r97:V4SF*const_vector
Failed to match this instruction:
(set (reg:V4SF 98 [ D.3422 ])
(mult:V4SF (float:V4SF (reg:V4SI 96 [ D.3420 ]))
(reg:V4SF 99)))
For comparison, a similar pattern, in which the REG_EQUAL note is attached to
the first insn, the REG_EQUAL note is checked, and the equivalent constant is
used.
foo (float *a, int *b)
{
int i;
for (i = 0; i < 1024; i++)
b[i] = a[i] * 4.0f;
}
Trying 11 -> 12:
11: r97:V4SF=r96:V4SF*r98:V4SF
REG_DEAD r96:V4SF
REG_EQUAL r96:V4SF*const_vector
12: r99:V4SI=fix(unspec[r97:V4SF] 23)
REG_DEAD r97:V4SF
Failed to match this instruction:
(set (reg:V4SI 99 [ D.3432 ])
(fix:V4SI (unspec:V4SI [
(mult:V4SF (reg:V4SF 96 [ D.3430 ])
(reg:V4SF 98))
] UNSPEC_FRINTZ)))
Trying 11 -> 12:
11: r97:V4SF=r96:V4SF*const_vector
REG_DEAD r96:V4SF
REG_EQUAL r96:V4SF*const_vector
12: r99:V4SI=fix(unspec[r97:V4SF] 23)
REG_DEAD r97:V4SF
Successfully matched this instruction:
(set (reg:V4SI 99 [ D.3432 ])
(fix:V4SI (unspec:V4SI [
(mult:V4SF (reg:V4SF 96 [ D.3430 ])
(const_vector:V4SF [
(const_double:SF 4.0e+0 [0x0.8p+3]) repeated x4
]))
] UNSPEC_FRINTZ)))
Built from current trunk
$gcc -v
COLLECT_GCC=$BUILD/install/bin/aarch64-none-elf-gcc
COLLECT_LTO_WRAPPER=$BUILD/install/libexec/gcc/aarch64-none-elf/10.0.0/lto-wrapper
Target: aarch64-none-elf
Configured with: $SRC/gcc/configure --target=aarch64-none-elf
--prefix=$BUILD/install/ --with-gmp=$BUILD/host-tools
--with-mpfr=$BUILD/host-tools --with-mpc=$BUILD/host-tools
--with-isl=$BUILD/host-tools --disable-shared --disable-nls --disable-threads
--disable-tls --enable-checking=yes --enable-languages=c,c++,fortran
--with-newlib --with-pkgversion=unknown
Thread model: single
gcc version 10.0.0 20190524 (experimental) (unknown)
Test cases compiled with:
aarch64-none-elf-gcc -S -mcpu=cortex-a53 -O2 tmp.c -ftree-vectorize -fno-inline
-fdump-rtl-all -fno-vect-cost-model -dp -fdump-rtl-combine-all
-fdump-tree-optimized -o -
From 7e744509575030ca5b3fa6042d02d27171fbfbfd Mon Sep 17 00:00:00 2001
From: Joel Hutton <[email protected]>
Date: Tue, 11 Jun 2019 10:10:07 +0100
Subject: [PATCH] Minimal pattern to demonstrate combine behaviour
---
gcc/config/aarch64/aarch64-protos.h | 1 +
gcc/config/aarch64/aarch64-simd.md | 13 +++++++++++++
gcc/config/aarch64/aarch64.c | 6 ++++++
gcc/config/aarch64/predicates.md | 3 +++
4 files changed, 23 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a0723266f22..ff1787c37ed 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -483,6 +483,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
enum reg_class aarch64_regno_regclass (unsigned);
int aarch64_asm_preferred_eh_data_format (int, int);
int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fp_const_vec (rtx);
machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
machine_mode);
int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d4c48d2aa61..698b49c006f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2133,6 +2133,19 @@
"TARGET_SIMD"
{})
+(define_insn "*aarch64_combine_scvtf"
+ [(set (match_operand 0 "register_operand" "=w")
+ (mult
+ (float
+ (match_operand 1 "" "w"))
+ (match_operand 2 "aarch64_fp_const_vec" ""))
+ )]
+ ""
+ {
+ return "test_match";
+ }
+)
+
(define_insn "<optab><fcvt_target><VHSDF:mode>2"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(FLOATUORS:VHSDF
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 83453d03095..f836246e184 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18327,6 +18327,12 @@ aarch64_fpconst_pow_of_2 (rtx x)
return exact_log2 (real_to_integer (r));
}
+int
+aarch64_fp_const_vec (rtx x)
+{
+ return GET_CODE (x) == CONST_VECTOR;
+}
+
/* If X is a vector of equal CONST_DOUBLE values and that value is
Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..8fece3811b9 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -101,6 +101,9 @@
(define_predicate "aarch64_fp_vec_pow2"
(match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
+(define_predicate "aarch64_fp_const_vec"
+ (match_test "aarch64_fp_const_vec (op)"))
+
(define_predicate "aarch64_sve_cnt_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_sve_cnt_immediate_p (op)")))
--
2.17.1