This patch adds support for matching a saturating truncate on a signed
input variable, however, negative values are clipped to 0 instead of
NT_MAX. This pattern is seen in x264. We change the prior loop codegen
from

        vsetvli a5,a2,e32,m1,ta,mu
        vle32.v v1,0(a1)
        slli    a4,a5,2
        sub     a2,a2,a5
        add     a1,a1,a4
        vmsgtu.vx       v0,v1,a3
        vrsub.vi        v2,v1,0
        vsra.vi v1,v2,31,v0.t
        vsetvli zero,zero,e16,mf2,ta,ma
        vnsrl.wi        v1,v1,0
        vsetvli zero,zero,e8,mf4,ta,ma
        vnsrl.wi        v1,v1,0
        vse8.v  v1,0(a0)

to

        vsetvli a5,a2,e32,m1,ta,ma
        vle32.v v1,0(a1)
        slli    a4,a5,2
        sub     a2,a2,a5
        add     a1,a1,a4
        vmax.vv v1,v1,v2 <-- v2 defined by `vmv.v.i     v2,0` outside
        vsetvli zero,zero,e16,mf2,ta,ma
        vnclipu.wi      v1,v1,0
        vsetvli zero,zero,e8,mf4,ta,ma
        vnclipu.wi      v1,v1,0
        vse8.v  v1,0(a0)

which is closer to how clang vectorizes the pattern.

        PR target/120378

gcc/ChangeLog:

        * match.pd: Add narrow clip pattern.
        * tree-vect-patterns.cc (gimple_unsigned_integer_narrow_clip):
        Add function declaration.
        (vect_recog_sat_trunc_pattern): Perform narrow clip check before
        regular sat trunc check.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr120378.c: New test.

Signed-off-by: Edwin Lu <e...@rivosinc.com>
---
 gcc/match.pd                                  | 23 +++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr120378.c   | 21 ++++++++++++
 gcc/tree-vect-patterns.cc                     | 32 +++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr120378.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 82e6e291ae1..546e8fb3685 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3360,6 +3360,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     }
     (if (wi::eq_p (sum, wi::uhwi (0, precision))))))))

+(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
+ (match (unsigned_integer_narrow_clip @0)
+  /* SAT_U_TRUNC = (UT)X & (NT)(-1) ? (-X) >> 31 : X
+
+     The gimple representation uses X > (NT)(-1) instead of
+     using & so match on gt instead of bit_and.  */
+  (convert (cond^ (gt (nop_convert? @0) INTEGER_CST@1)
+        (rshift:s (nop_convert? (negate (nop_convert? @0))) INTEGER_CST@2)
+        @0))
+  (if (! TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (with
+    {
+     unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
+     unsigned otype_precision = TYPE_PRECISION (type);
+     wide_int trunc_max = wi::mask (otype_precision, false, itype_precision);
+     wide_int int_cst_1 = wi::to_wide (@1, itype_precision);
+     wide_int int_cst_2 = wi::to_wide (@2, itype_precision);
+     wide_int shift_amount = wi::uhwi ((HOST_WIDE_INT_1U << 5) - 1,
+                                 itype_precision); // Aka 31
+    }
+    (if (otype_precision < itype_precision && wi::eq_p (trunc_max,
+    int_cst_1) && wi::eq_p(int_cst_2, shift_amount)))))))
+
 /* Saturation truncate for unsigned integer.  */
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
  (match (unsigned_integer_sat_trunc @0)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr120378.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr120378.c
new file mode 100644
index 00000000000..500028e7a15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr120378.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include <stdint.h>
+
+inline uint8_t
+clip_uint8 (int x)
+{
+  return x & (~255) ? (-x) >> 31 : x;
+}
+
+void __attribute__ ((noipa))
+clip_loop (uint8_t *res, int *x, int w)
+{
+  for (int i = 0; i < w; i++)
+    res[i] = clip_uint8 (x[i]);
+}
+
+/* { dg-final { scan-tree-dump-times ".SAT_TRUNC " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MAX_EXPR " 1 "optimized" } } */
+/* { dg-final { scan-assembler-times {vnclipu\.wi} 2 } } */
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index ffb320fbf23..a6588950be4 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4505,6 +4505,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));

+extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
+
 extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
@@ -4739,6 +4741,36 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   tree lhs = gimple_assign_lhs (last_stmt);
   tree otype = TREE_TYPE (lhs);

+  if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
+       && type_has_mode_precision_p (otype))
+    {
+      tree itype = TREE_TYPE (ops[0]);
+      tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
+      tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
+      internal_fn fn = IFN_SAT_TRUNC;
+
+      if (v_itype != NULL_TREE && v_otype != NULL_TREE
+       && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
+                                          OPTIMIZE_FOR_BOTH))
+       {
+         tree temp = vect_recog_temp_ssa_var (itype, NULL);
+         gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, 
itype, build_zero_cst(itype), ops[0]));
+         append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
+
+         gcall *call = gimple_build_call_internal (fn, 1, temp);
+         tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
+
+         gimple_call_set_lhs (call, out_ssa);
+         gimple_call_set_nothrow (call, /* nothrow_p */ false);
+         gimple_set_location (call, gimple_location (last_stmt));
+
+         *type_out = v_otype;
+
+         return call;
+       }
+
+    }
+
   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
       && type_has_mode_precision_p (otype))
--
2.43.0

Reply via email to