Based on patch [3/4], we can further optimize the vaarg gimplification
logic, this time not for redundant checks, but for redundant basic
blocks. Thus we can simplify the control graph and eventually generate
less branch instructions.

The current gimplification logic requires three basic blocks:

 // check if we already stepped into stack area
 if (vaarg_offset >= 0)
   {
     // we still in register area, but composite type will not
     // be passed partly in registers and partly on stack, make
     // sure the left register area is not left empty by composite
     // type. if it is, then skip them, and fetch from stack.
     if (vaarg_offset + arg_size > 0)
       fetch from stack
     else
       fetch from register
   }
else
  fetch from register

while we can further optimize the logic into the following to reduce BB
number into two:

if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0))
   fetch from stack
 else
   fetch from register

OK for trunk?

2016-05-06 Alan Lawrence  <alan.lawre...@arm.com>
           Jiong Wang  <jiong.w...@arm.com>

gcc/
  * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Use
  TRUTH_ORIF_EXPR.

gcc/testsuite/
  * gcc.target/aarch64/va_arg_5.c: New test.

>From d742eaa3469f28e4207034f3fe4ebd4d54b3dd42 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.w...@arm.com>
Date: Fri, 6 May 2016 14:38:00 +0100
Subject: [PATCH 4/4] 4

---
 gcc/config/aarch64/aarch64.c                | 53 +++++++++++++++++++++--------
 gcc/testsuite/gcc.target/aarch64/va_arg_5.c | 20 +++++++++++
 2 files changed, 58 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_5.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 06904d5..bd4a9fe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9577,7 +9577,32 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 }
 
-/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
+   The VA_ARG gimplify logic was:
+
+     // check if we already stepped into stack area
+     if (vaarg_offset >= 0)
+       {
+	 // we still in register area, but composite type will not
+	 // be passed partly in registers and partly on stack, make
+	 // sure the left register area is not left empty by composite
+	 // type. if it is, then skip them, and fetch from stack.
+	 if (vaarg_offset + arg_size > 0)
+	   fetch from stack
+	 else
+	   fetch from register
+       }
+    else
+      fetch from register
+
+   we can further optimize the logic into the following to reduce BB.
+
+     if (vaarg_offset < 0 || (vaarg_offset + arg_size > 0))
+       fetch from stack
+     else
+       fetch from register
+
+   the tree node TRUTH_ORIF_EXPR can express the condition we want.  */
 
 static tree
 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
@@ -9595,7 +9620,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, f_top, f_off, off, arg, roundup, on_stack;
   HOST_WIDE_INT size, rsize, adjust, align;
-  tree t, t1, u, cond1, cond2;
+  tree t, t1, u, cond1, pred1, pred2;
 
   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
   if (indirect_p)
@@ -9669,9 +9694,8 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   off = get_initialized_tmp_var (f_off, pre_p, NULL);
 
   /* Emit code to branch if off >= 0.  */
-  t = build2 (GE_EXPR, boolean_type_node, off,
-	      build_int_cst (TREE_TYPE (off), 0));
-  cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
+  pred1 = build2 (GE_EXPR, boolean_type_node, off,
+		  build_int_cst (TREE_TYPE (off), 0));
 
   if (composite_type_p)
     {
@@ -9696,16 +9720,16 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
       if (roundup)
 	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
 
-      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
-      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
-		  build_int_cst (TREE_TYPE (f_off), 0));
-      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+      /* [pred2] if (ap.__[g|v]r_offs > 0)  */
+      pred2 = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+		      build_int_cst (TREE_TYPE (f_off), 0));
+      pred2 = build2 (COMPOUND_EXPR, TREE_TYPE (pred2), t, pred2);
 
-      /* String up: make sure the assignment happens before the use.  */
-      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
-      COND_EXPR_ELSE (cond1) = t;
+      pred1 = build2 (TRUTH_ORIF_EXPR, boolean_type_node, pred1, pred2);
     }
 
+  cond1 = build3 (COND_EXPR, ptr_type_node, pred1, NULL_TREE, NULL_TREE);
+
   /* Prepare the trees handling the argument that is passed on the stack;
      the top level node will store in ON_STACK.  */
   arg = get_initialized_tmp_var (stack, pre_p, NULL);
@@ -9746,8 +9770,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 
   if (composite_type_p)
     {
-      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
-      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+      COND_EXPR_THEN (cond1) = on_stack;
 
       t = off;
     }
@@ -9854,7 +9877,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
     }
 
   if (composite_type_p)
-    COND_EXPR_ELSE (cond2) = t;
+    COND_EXPR_ELSE (cond1) = t;
   else
     {
       t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_5.c b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c
new file mode 100644
index 0000000..0d6daef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_5.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
+
+typedef struct A {
+    float a;
+} T;
+
+T
+foo (char *fmt, ...)
+{
+  T a;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  a = __builtin_va_arg (ap, T);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-tree-dump-times "ap.__stack =" 1 "lower_vaarg"} } */
+  return a;
+}
-- 
1.9.1

Reply via email to