This is a patch from late 2024 (just before stage1 freeze), but I never pushed hard to the change, and thus never integrated it.

It's mostly unchanged except for updating insn in the hash table after finding an optimizable case. We were holding the deleted insn in the hash table rather than the new insn. Just something I noticed recently.

Bootstrapped and regression tested on my BPI and regression tested riscv32-elf and riscv64-elf configurations. We've used this since November internally, so it's well exercised on spec as well.

Waiting pre-commit testing's verdict...

jeff

gcc/
        * config.cc (riscv): Add riscv-vect-permcost.o to extra_objs.
        * config/riscv/riscv-passes.def (pass_vector_permcost): Add new pass.
        * config/riscv/riscv-protos.h (make_pass_vector_permconst): Declare.
        * config/riscv/riscv-vect-permconst.cc: New file.
        * config/riscv/t-riscv: Add build rule for riscv-vect-permcost.o


diff --git a/gcc/config.gcc b/gcc/config.gcc
index d98df883fce..2d4155b14e4 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -550,7 +550,7 @@ pru-*-*)
 riscv*)
        cpu_type=riscv
        extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
-       extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o 
riscv-avlprop.o"
+       extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o 
riscv-avlprop.o riscv-vect-permconst.o"
        extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o 
sifive-vector-builtins-bases.o"
        extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o"
        d_target_objs="riscv-d.o"
diff --git a/gcc/config/riscv/riscv-passes.def 
b/gcc/config/riscv/riscv-passes.def
index 7e6a2a0e53d..bc803c4678e 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -21,3 +21,5 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, 
pass_shorten_memrefs);
 INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
 INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
+
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2bedd878a04..2e889903eb3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -201,6 +201,8 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context 
*ctxt);
 rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
+rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
+
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc 
b/gcc/config/riscv/riscv-vect-permconst.cc
new file mode 100644
index 00000000000..feecc7ed6da
--- /dev/null
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -0,0 +1,300 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+#define INCLUDE_MEMORY
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "cfgcleanup.h"
+#include "insn-attr.h"
+#include "tm-constrs.h"
+#include "insn-opinit.h"
+#include "cfgrtl.h"
+
+/* So the basic idea of this pass is to identify loads of permutation
+   constants from the constant pool which could instead be trivially
+   derived from some earlier vector permutation constant.  This will
+   replace a memory load from the constant pool with a vadd.vi
+   instruction.
+
+   Conceptually this is much like the related_values optimization in
+   CSE, reload_cse_move2add or using SLSR to optimize constant synthesis.
+   If we wanted to make this generic I would suggest putting it into CSE
+   and providing target hooks to determine if particular permutation
+   constants could be derived from earlier permutation constants.  */
+
+const pass_data pass_data_vect_permconst = {
+  RTL_PASS,     /* type */
+  "vect_permconst",     /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE,      /* tv_id */
+  0,            /* properties_required */
+  0,            /* properties_provided */
+  0,            /* properties_destroyed */
+  0,            /* todo_flags_start */
+  0,            /* todo_flags_finish */
+};
+
+/* Entry in the hash table.  We "normalize" the permutation constant
+   by adjusting all entries by the value in the first element.  This
+   allows simple hashing to discover permutation constants that differ
+   by a single constant across all their elements and may be derived
+   from each other with a vadd.vi.  */
+
+struct vector_permconst_entry
+{
+  /* The CONST_VECTOR in normalized form (first entry is zero).  */
+  /* We could avoid copying the vector with a more customized hash
+     routine which took care of normalization.  */
+  rtx normalized_vec;
+
+  /* The destination register holding the CONST_VECTOR.  When the optimization
+     applies this will be used as a source operand in the vadd.vi.  */
+  rtx dest;
+
+  /* The insn generating DEST, the only reason we need this is because we
+     do not invalidate entries which implies we have to verify that DEST
+     is unchanged between INSN and the point where we want to use DEST
+     to derive a new permutation constant.  */
+  rtx_insn *insn;
+
+  /* The bias of this entry used for normalization.  If this value is added
+     to each element in NORMALIZED_VEC we would have the original permutation
+     constant.  */
+  HOST_WIDE_INT bias;
+};
+
+struct const_vector_hasher : nofree_ptr_hash <vector_permconst_entry>
+{
+  static inline hashval_t hash (const vector_permconst_entry *);
+  static inline bool equal (const vector_permconst_entry *,
+                           const vector_permconst_entry *);
+};
+
+inline bool
+const_vector_hasher::equal (const vector_permconst_entry *vpe1,
+                           const vector_permconst_entry *vpe2)
+{
+  /* Do the cheap tests first, namely that the mode and number of entries
+     match between the two enries.  */
+  if (GET_MODE (vpe1->normalized_vec) != GET_MODE (vpe2->normalized_vec))
+    return false;
+
+  if (CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ()
+      != CONST_VECTOR_NUNITS (vpe2->normalized_vec).to_constant ())
+    return false;
+
+  /* Check the value of each entry in the vector.  We violate structure
+     sharing rules inside this pass, so while pointer equality would normally
+     be OK, it isn't here.  */
+  for (int i = 0;
+       i < CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ();
+       i++)
+    if (!rtx_equal_p (CONST_VECTOR_ELT (vpe1->normalized_vec, i),
+                     CONST_VECTOR_ELT (vpe2->normalized_vec, i)))
+      return false;
+
+  return true;
+}
+
+inline hashval_t
+const_vector_hasher::hash (const vector_permconst_entry *vpe)
+{
+  int do_not_record;
+  return hash_rtx (vpe->normalized_vec, Pmode, &do_not_record, NULL, false);
+}
+
+
+class vector_permconst : public rtl_opt_pass
+{
+public:
+  vector_permconst (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_vect_permconst, ctxt) {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) final override
+  {
+    return TARGET_VECTOR && optimize > 0;
+  }
+  virtual unsigned int execute (function *) final override;
+
+private:
+  void process_bb (basic_block);
+  hash_table<const_vector_hasher> *vector_permconst_table;
+}; // class pass_vector_permconst
+
+/* Try to optimize vector permutation constants in BB.  */
+void
+vector_permconst::process_bb (basic_block bb)
+{
+  vector_permconst_table = new hash_table<const_vector_hasher> (11);
+
+  /* Walk the insns in BB searching for vector loads from the constant pool
+     which can be satisfied by adjusting an earlier load with trivial
+     arithmetic.  */
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+       continue;
+
+      rtx set = single_set (insn);
+      if (!set)
+       continue;
+
+      rtx dest = SET_DEST (set);
+      if (GET_MODE_CLASS (GET_MODE (dest)) != MODE_VECTOR_INT)
+       continue;
+
+      rtx src = SET_SRC (set);
+      if (!MEM_P (src))
+       continue;
+
+      /* A load from the constant pool should have a REG_EQUAL
+        note with the vector contant in the note.  */
+      rtx note = find_reg_equal_equiv_note (insn);
+      if (!note
+         || REG_NOTE_KIND (note) != REG_EQUAL
+         || GET_CODE (XEXP (note, 0)) != CONST_VECTOR)
+       continue;
+
+      if (!CONST_VECTOR_NUNITS (XEXP (note, 0)).is_constant ())
+       continue;
+
+      /* XXX Do we need to consider other forms of constants?  */
+
+      /* We want to be selective about what gets past this point since
+        we make a copy of the vector and possibly enter it into the
+        hash table.  So reject cases that are not likely a permutation
+        constant.  ie, negative bias and large biases.  We arbitrarily
+        use 16k as the largest vector size in bits we try to optimize.
+
+        It may seem like a bias outside the range of vadd.vi should
+        be rejected, but what really matters is the difference of
+        biases across the two permutation constants.  */
+      rtx cvec = XEXP (note, 0);
+      HOST_WIDE_INT bias = INTVAL (CONST_VECTOR_ELT (cvec, 0));
+      if (bias < 0 || bias > 16384 / 8)
+       continue;
+
+      /* At this point we have a load of a constant integer vector from the
+        constant pool.  That constant integer vector is hopefully a
+        permutation constant.  We need to make a copy of the vector and
+        normalize it to zero.
+
+        XXX This violates structure sharing conventions.  */
+      rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ());
+
+      for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+       nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
+
+      rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
+
+      /* Now that we have a normalized vector, look it up in the hash table,
+        inserting it if it wasn't already in the table.  */
+      struct vector_permconst_entry tmp;
+      tmp.normalized_vec = copy;
+      struct vector_permconst_entry **slot
+       = vector_permconst_table->find_slot (&tmp, INSERT);
+      if (*slot == NULL)
+       {
+         /* This constant was not in the table, so initialize the hash table
+            entry.  */
+         *slot = XNEW (vector_permconst_entry);
+         (*slot)->normalized_vec = copy;
+         (*slot)->dest = dest;
+         (*slot)->bias = bias;
+         (*slot)->insn = insn;
+       }
+      else
+       {
+         /* A hit in the hash table.  We may be able to optimize this case.
+
+            If the difference in biases fits in the immediate range for
+            vadd.vi, then we may optimize.  */
+         HOST_WIDE_INT adjustment = bias - (*slot)->bias;
+         if (IN_RANGE (adjustment, -16, 15))
+           {
+             /* We also need to make sure the destination register was not
+                modified.  I've chosen to test for that at optimization time
+                rather than invalidate entries in the table.  This could be
+                changed to use REG_TICK like schemes or true invalidation if
+                this proves too compile-time costly.  */
+             if (!reg_set_between_p ((*slot)->dest, (*slot)->insn, insn))
+               {
+                 /* Instead of loading from the constant pool, adjust the
+                    output of the earlier insn into our destination.  */
+                 rtx x = gen_const_vec_duplicate (GET_MODE (copy),
+                                                  GEN_INT (adjustment));
+                 rtx plus = gen_rtx_PLUS (GET_MODE (copy), (*slot)->dest, x);
+                 rtx set = gen_rtx_SET (dest, plus);
+                 rtx_insn *new_insn = emit_insn_after (set, insn);
+                 /* XXX Should we copy over the REG_EQUAL note first?  */
+                 delete_insn (insn);
+                 insn = new_insn;
+               }
+           }
+
+         /* We always keep the hash table entry pointing to the most recent
+            INSN that could generate the normalized entry.  We can adjust
+            in the future if data says it's useful to do so.  This just
+            keeps things simple for now.
+
+            For example, we might want to keep multiple entries if they
+            have a different biases.  */
+         (*slot)->dest = dest;
+         (*slot)->bias = bias;
+         (*slot)->insn = insn;
+       }
+    }
+
+  /* We construct and tear down the table for each block.  This may
+     be overly expensive.  */
+  vector_permconst_table->empty ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+vector_permconst::execute (function *fn)
+{
+  /* Handle each block independently.  While this should work nicely on EBBs,
+     let's wait for real world cases where it matters before adding that
+     complexity.  */
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fn)
+    process_bb (bb);
+
+  return 0;
+}
+
+rtl_opt_pass *
+make_pass_vector_permconst (gcc::context *ctxt)
+{
+  return new vector_permconst (ctxt);
+}
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 6493087fe51..12e2b6e999a 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -104,6 +104,13 @@ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \
        $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                $(srcdir)/config/riscv/riscv-avlprop.cc
 
+riscv-vect-permconst.o: $(srcdir)/config/riscv/riscv-vect-permconst.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
+  $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \
+  tm-constrs.h insn-opinit.h cfgrtl.h
+       $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+               $(srcdir)/config/riscv/riscv-vect-permconst.cc
+
 riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
        $(COMPILE) $<

Reply via email to