On 28/06/11 15:14, Andrew Stubbs wrote:
On 28/06/11 13:33, Andrew Stubbs wrote:
On 23/06/11 15:41, Andrew Stubbs wrote:
If one or both of the inputs to a widening multiply are of unsigned type
then the compiler will attempt to use usmul_widen_optab or
umul_widen_optab, respectively.

That works fine, but only if the target supports those operations
directly. Otherwise, it just bombs out and reverts to the normal
inefficient non-widening multiply.

This patch attempts to catch these cases and use an alternative signed
widening multiply instruction, if one of those is available.

I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).

OK?

This update fixes the testsuite issue Janis pointed out.

And this one fixes up the wmul-5.c testcase also. The patch has changed
the correct result.

Here's an update for the context changed by the update to patch 3.

The content of the patch has not changed.

Andrew
2011-07-04  Andrew Stubbs  <a...@codesourcery.com>

	gcc/
	* Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency.
	* optabs.c (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this, and add new
	argument 'found_mode'.
	* optabs.h (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this.
	(find_widening_optab_handler): New macro.
	* tree-ssa-math-opts.c: Include langhooks.h
	(build_and_insert_cast): New function.
	(convert_mult_to_widen): Add new argument 'gsi'.
	Convert unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.
	(execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen.

	gcc/testsuite/
	* gcc.target/arm/wmul-5.c: Update expected result.
	* gcc.target/arm/wmul-6.c: New file.

--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
    $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
-   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h
+   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \
+   langhooks.h
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \
    $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
    non-widening optabs also.  */
 
 enum insn_code
-find_widening_optab_handler (optab op, enum machine_mode to_mode,
-			     enum machine_mode from_mode,
-			     int permit_non_widening)
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
 {
   for (; (permit_non_widening || from_mode != to_mode)
 	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
@@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode,
 						       from_mode);
 
       if (handler != CODE_FOR_nothing)
-	return handler;
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
     }
 
   return CODE_FOR_nothing;
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-extern enum insn_code find_widening_optab_handler (optab, enum machine_mode,
-						   enum machine_mode, int);
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
 
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
--- a/gcc/testsuite/gcc.target/arm/wmul-5.c
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -7,4 +7,4 @@ foo (long long a, char *b, char *c)
   return a + *b * *c;
 }
 
-/* { dg-final { scan-assembler "umlal" } } */
+/* { dg-final { scan-assembler "smlalbb" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "target.h"
 #include "gimple-pretty-print.h"
+#include "langhooks.h"
 
 /* FIXME: RTL headers have to be included here for optabs.  */
 #include "rtl.h"		/* Because optabs.h wants enum rtx_code.  */
@@ -1086,6 +1087,21 @@ build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TYPE, and put the result in
+   TARGET.  Insert the statement prior to GSI's current position, and
+   return the from SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val, tree type)
+{
+  tree result = make_ssa_name (target, NULL);
+  gimple stmt = gimple_build_assign (result, fold_convert (type, val));
+  gimple_set_location (stmt, loc);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return result;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -2047,7 +2063,7 @@ is_widening_mult_p (gimple stmt,
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
   tree lhs, rhs1, rhs2, type, type1, type2;
   enum insn_code handler;
@@ -2075,7 +2091,31 @@ convert_mult_to_widen (gimple stmt)
   handler = find_widening_optab_handler (op, to_mode, from_mode, 0);
 
   if (handler == CODE_FOR_nothing)
-    return false;
+    {
+      if (op != smul_widen_optab)
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+	    return false;
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+							  from_mode, 0,
+							  &from_mode);
+
+	  if (handler == CODE_FOR_nothing)
+	    return false;
+
+	  type1 = type2 = lang_hooks.types.type_for_mode (from_mode, 0);
+
+	  rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+					create_tmp_var (type1, NULL), rhs1, type1);
+	  rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+					create_tmp_var (type2, NULL), rhs2, type2);
+	}
+      else
+	return false;
+    }
 
   gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
   gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
@@ -2256,7 +2296,22 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
     return false;
 
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-    return false;
+    {
+      enum machine_mode mode = TYPE_MODE (type1);
+      mode = GET_MODE_WIDER_MODE (mode);
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (TYPE_MODE (type)))
+	{
+	  type1 = type2 = lang_hooks.types.type_for_mode (mode, 0);
+	  mult_rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+					     create_tmp_var (type1, NULL),
+					     mult_rhs1, type1);
+	  mult_rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+					     create_tmp_var (type2, NULL),
+					     mult_rhs2, type2);
+	}
+      else
+	return false;
+    }
 
   /* Verify that the convertions between the mult and the add doesn't do
      anything unexpected.  */
@@ -2489,7 +2544,7 @@ execute_optimize_widening_mul (void)
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))

Reply via email to