Hi,
Since r16-4391-g85ab3a22ed11c9 we can use a punned type/mode for grouped
loads and stores. Vineet reported an x264 wrong-code bug since that
commit. The crux of the issue is that in avlprop we back-propagate
the AVL from consumers (like stores) to producers.
When e.g. a V4QI vector is type-punned by a V1SI vector
(subreg:V1SI (reg:V4QI ...)
the AVL of that instruction refers to the outer subreg mode, i.e. for an
AVL of 1 in a store we store one SImode element. The producer of the
store data is not type punned and still uses V4QI and we produce 4
QImode elements. Due to this mismatch we back-propagate the consumer
AVL of 1 to the producers, causing wrong code.
This patch looks if the use is inside a subreg and scales the immediate
AVL by the ratio of inner and outer mode.
Regtested on rv64gcv_zvl512b.
Regards
Robin
PR target/122445
gcc/ChangeLog:
* config/riscv/riscv-avlprop.cc
(pass_avlprop::get_vlmax_ta_preferred_avl):
Scale AVL of subreg uses.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr122445.c: New test.
---
gcc/config/riscv/riscv-avlprop.cc | 41 +++++++++++++++++++
.../gcc.target/riscv/rvv/autovec/pr122445.c | 26 ++++++++++++
2 files changed, 67 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
diff --git a/gcc/config/riscv/riscv-avlprop.cc
b/gcc/config/riscv/riscv-avlprop.cc
index b8547a722c5..ee92f55ea81 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "df.h"
#include "rtl-ssa.h"
+#include "rtl-iter.h"
#include "cfgcleanup.h"
#include "insn-attr.h"
#include "tm-constrs.h"
@@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn)
const
&& def1->insn ()->compare_with (insn) >= 0)
return NULL_RTX;
}
+ else
+ {
+ /* If the use is in a subreg e.g. in a store it is possible that
+ we punned the vector mode with a larger mode like
+ (subreg:V1SI (reg:V4QI 123)).
+ For an AVL of 1 that means we actually store one SImode
+ element and not 1 QImode elements. But the latter is what we
+ would propagate if we took the AVL operand literally.
+ Instead we scale it by the ratio of inner and outer mode
+ (4 in the example above). */
+ int factor = 1;
+ if (use->includes_subregs ())
+ {
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)
+ {
+ const_rtx x = *iter;
+ if (!x)
+ break;
+ if (SUBREG_P (x)
+ && REGNO (SUBREG_REG (x)) == use->regno ()
+ && known_eq (GET_MODE_SIZE (use->mode ()),
+ GET_MODE_SIZE (GET_MODE (x))))
+ {
+ if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()),
+ GET_MODE_NUNITS (GET_MODE (x)),
+ &factor))
+ {
+ gcc_assert (factor > 0);
+ break;
+ }
+ else
+ return NULL_RTX;
+ }
+ }
+ }
+
+ if (factor > 1)
+ new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);
+ }
if (!use_avl)
use_avl = new_use_avl;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
new file mode 100644
index 00000000000..47368684faa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcbv_zvl256b -mabi=lp64d -O3 -mrvv-vector-bits=zvl
--param=riscv-autovec-mode=V4QI -mtune=generic-ooo -fdump-rtl-avlprop-all" } */
+
+typedef unsigned char uint8_t;
+typedef short int16_t;
+
+#define FDEC_STRIDE 32
+
+static inline uint8_t x264_clip_uint8( int x )
+{
+ return x;
+}
+
+void
+x264_add4x4_idct (uint8_t *p_dst, int16_t d[16])
+{
+ for( int y = 0; y < 4; y++ )
+ {
+ for( int x = 0; x < 4; x++ )
+ p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] );
+ p_dst += FDEC_STRIDE;
+ }
+}
+
+/* { dg-final { scan-rtl-dump "Propagating AVL: \\(const_int 4" "avlprop" } }
*/
+/* { dg-final { scan-rtl-dump-not "Propagating AVL: \\(const_int 1" "avlprop"
} } */
--
2.51.0