Hello Everyone,
    This patch is for Cilkplus branch affecting mainly C and C++ compilers. It 
will clone a function that is marked as elemental function and pass the 
parameters and return values into the appropriate vector registers.

Thanking You,

Yours Sincerely,

Balaji V. Iyer. 
diff --git a/gcc/ChangeLog.cilk b/gcc/ChangeLog.cilk
index 8bfb689..6b7c176 100644
--- a/gcc/ChangeLog.cilk
+++ b/gcc/ChangeLog.cilk
@@ -1,3 +1,17 @@
+2012-04-05  Balaji V. Iyer  <balaji.v.i...@intel.com>
+
+       * config/i386/i386.c (type_natural_mode): Added a flag_enable_cilk
+       check.
+       (ix86_function_arg_boundary): Likewise.
+       * expr.c (expand_expr_real_1): Likewise.
+       * elem-function.c (create_processor_attribute): Added avx to target
+       string.
+       (create_elem_fn_nodes): called copy_node to new_decl's type.  Replaced
+       tree_function_versioning call with tree_elem_fn_versioning.
+       * tree-inline.c (elem_fn_add_local_variables): New function.
+       (elem_fn_copy_arguments_for_versioning): Likewise.
+       (tree_elem_fn_versioning): Likewise.
+
 2012-03-20  Balaji V. Iyer  <balaji.v.i...@intel.com>
 
        * elem-function.c (extract_elem_fn_values): Initialized proc_type field
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 7262859..66fd3c6 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -584,6 +584,8 @@ struct cgraph_node *cgraph_function_versioning (struct 
cgraph_node *,
                                                basic_block, const char *);
 void tree_function_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
                               bool, bitmap, bool, bitmap, basic_block);
+void tree_elem_fn_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
+                             bool, bitmap, bool, bitmap, basic_block, int);
 void record_references_in_initializer (tree, bool);
 bool cgraph_process_new_functions (void);
 void cgraph_process_same_body_aliases (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 92604ae..754d53d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5816,9 +5816,12 @@ type_natural_mode (const_tree type, const 
CUMULATIVE_ARGS *cum)
                        && !warnedavx
                        && cum->warn_avx)
                      {
-                       warnedavx = true;
-                       warning (0, "AVX vector argument without AVX "
-                                "enabled changes the ABI");
+                       if (!flag_enable_cilk)
+                         {
+                           warnedavx = true;
+                           warning (0, "AVX vector argument without AVX "
+                                    "enabled changes the ABI");
+                         }
                      }
                    return TYPE_MODE (type);
                  }
@@ -7203,11 +7206,14 @@ ix86_function_arg_boundary (enum machine_mode mode, 
const_tree type)
          && align != ix86_compat_function_arg_boundary (mode, type,
                                                         saved_align))
        {
-         warned = true;
-         inform (input_location,
-                 "The ABI for passing parameters with %d-byte"
-                 " alignment has changed in GCC 4.6",
-                 align / BITS_PER_UNIT);
+         if (!flag_enable_cilk)
+           {
+             warned = true;
+             inform (input_location,
+                     "The ABI for passing parameters with %d-byte"
+                     " alignment has changed in GCC 4.6",
+                     align / BITS_PER_UNIT);
+           }
        }
     }
 
diff --git a/gcc/elem-function.c b/gcc/elem-function.c
index a6bc4e3..dd3b75b 100644
--- a/gcc/elem-function.c
+++ b/gcc/elem-function.c
@@ -265,6 +265,8 @@ create_processor_attribute (elem_fn_info *elem_fn_values, 
tree *opposite_attr)
                     build_string (strlen ("arch=corei7"), "arch=corei7"));
       VEC_safe_push (tree, gc, proc_vec_list,
                     build_string (strlen ("sse4.2"), "sse4.2"));
+      VEC_safe_push (tree, gc, proc_vec_list,
+                    build_string (strlen ("avx"), "avx"));
       if (opposite_attr)
        {
          VEC_safe_push (tree, gc, opp_proc_vec_list,
@@ -307,7 +309,7 @@ create_optimize_attribute (int option)
   opt_attr = build_tree_list (get_identifier ("optimize"), opt_attr);
   return opt_attr;
 }
-  
+
 /* this function will find the appropriate mangling suffix for the vector
  * function */
 static char *
@@ -362,6 +364,7 @@ create_elem_fn_nodes (struct cgraph_node *node)
   
   old_decl = node->decl;
   new_decl = copy_node (old_decl);
+  TREE_TYPE (new_decl) = copy_node (TREE_TYPE (old_decl));
   elem_fn_values = extract_elem_fn_values (old_decl);
 
   if (elem_fn_values)
@@ -381,8 +384,8 @@ create_elem_fn_nodes (struct cgraph_node *node)
   new_node->local.externally_visible = node->local.externally_visible;
   new_node->lowered = true;
 
-  tree_function_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
-                           NULL);
+  tree_elem_fn_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
+                          NULL, elem_fn_values->vectorlength[0]);
   cgraph_call_function_insertion_hooks (new_node);
   DECL_STRUCT_FUNCTION (new_decl)->elem_fn_already_cloned = true;
   DECL_STRUCT_FUNCTION (new_decl)->curr_properties = cfun->curr_properties;
diff --git a/gcc/expr.c b/gcc/expr.c
index eaf67a1..920f9b4 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9137,7 +9137,9 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
            }
          else
            pmode = promote_decl_mode (exp, &unsignedp);
-         gcc_assert (GET_MODE (decl_rtl) == pmode);
+
+         if (!flag_enable_cilk)
+           gcc_assert (GET_MODE (decl_rtl) == pmode);
 
          temp = gen_lowpart_SUBREG (mode, decl_rtl);
          SUBREG_PROMOTED_VAR_P (temp) = 1;
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index b060ae2..c2d8c70 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3777,6 +3777,48 @@ add_local_variables (struct function *callee, struct 
function *caller,
       }
 }
 
+/* Add local variables from CALLEE to CALLER.  */
+
+static inline void
+elem_fn_add_local_variables (struct function *callee, struct function *caller,
+                            copy_body_data *id, bool check_var_ann,
+                            int vlength)
+{
+  tree var;
+  unsigned ix;
+
+  FOR_EACH_LOCAL_DECL (callee, ix, var)
+    if (TREE_STATIC (var) && !TREE_ASM_WRITTEN (var))
+      {
+       if (!check_var_ann
+           || (var_ann (var) && add_referenced_var (var)))
+         add_local_decl (caller, var);
+      }
+    else if (!can_be_nonlocal (var, id))
+      {
+        tree new_var = remap_decl (var, id);
+
+        /* Remap debug-expressions.  */
+       if (TREE_CODE (new_var) == VAR_DECL
+           && DECL_DEBUG_EXPR_IS_FROM (new_var)
+           && new_var != var)
+         {
+           tree tem = DECL_DEBUG_EXPR (var);
+           bool old_regimplify = id->regimplify;
+           id->remapping_type_depth++;
+           walk_tree (&tem, copy_tree_body_r, id, NULL);
+           id->remapping_type_depth--;
+           id->regimplify = old_regimplify;
+           SET_DECL_DEBUG_EXPR (new_var, tem);
+         }
+       TREE_TYPE (new_var) = copy_node (TREE_TYPE (new_var));
+       TREE_TYPE (new_var) =
+         build_vector_type (copy_node (TREE_TYPE (new_var)), vlength);
+       DECL_GIMPLE_REG_P (new_var) = 1;
+       add_local_decl (caller, new_var);
+      }
+}
+
 /* If STMT is a GIMPLE_CALL, replace it with its inline expansion.  */
 
 static bool
@@ -4925,6 +4967,52 @@ copy_arguments_for_versioning (tree orig_parm, 
copy_body_data * id,
   return new_parm;
 }
 
+/* Return a copy of the function's argument tree.  */
+static tree
+elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
+                                      bitmap args_to_skip, tree *vars,
+                                      int vlength)
+{
+  tree arg, *parg;
+  tree new_parm = NULL;
+  int i = 0;
+
+  parg = &new_parm;
+
+  for (arg = orig_parm; arg; arg = DECL_CHAIN (arg), i++)
+    if (!args_to_skip || !bitmap_bit_p (args_to_skip, i))
+      {
+        tree new_tree = remap_decl (arg, id);
+       if (TREE_CODE (new_tree) != PARM_DECL)
+         new_tree = id->copy_decl (arg, id);
+       /* bviyer; I am using a dummy value of 4 to make sure this works */
+       TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree));
+       TREE_TYPE (new_tree) =
+         build_vector_type (TREE_TYPE (new_tree), vlength);
+       DECL_ARG_TYPE (new_tree) =
+         build_vector_type (DECL_ARG_TYPE (new_tree), vlength);
+       DECL_GIMPLE_REG_P (new_tree) = 1;
+        lang_hooks.dup_lang_specific_decl (new_tree);
+        *parg = new_tree;
+       parg = &DECL_CHAIN (new_tree);
+      }
+    else if (!pointer_map_contains (id->decl_map, arg))
+      {
+       /* Make an equivalent VAR_DECL.  If the argument was used
+          as temporary variable later in function, the uses will be
+          replaced by local variable.  */
+       tree var = copy_decl_to_var (arg, id);
+       add_referenced_var (var);
+       insert_decl_map (id, arg, var);
+        /* Declare this new variable.  */
+        DECL_CHAIN (var) = *vars;
+        *vars = var;
+      }
+  return new_parm;
+}
+
+
+
 /* Return a copy of the function's static chain.  */
 static tree
 copy_static_chain (tree static_chain, copy_body_data * id)
@@ -5333,6 +5421,290 @@ tree_function_versioning (tree old_decl, tree new_decl,
   return;
 }
 
+void
+tree_elem_fn_versioning (tree old_decl, tree new_decl,
+                        VEC(ipa_replace_map_p,gc)* tree_map,
+                        bool update_clones, bitmap args_to_skip,
+                        bool skip_return, bitmap blocks_to_copy,
+                        basic_block new_entry, int vlength)
+{
+  struct cgraph_node *old_version_node;
+  struct cgraph_node *new_version_node;
+  copy_body_data id;
+  tree p;
+  unsigned i;
+  struct ipa_replace_map *replace_info;
+  basic_block old_entry_block, bb;
+  VEC (gimple, heap) *init_stmts = VEC_alloc (gimple, heap, 10);
+
+  tree old_current_function_decl = current_function_decl;
+  tree vars = NULL_TREE;
+
+  gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
+             && TREE_CODE (new_decl) == FUNCTION_DECL);
+  DECL_POSSIBLY_INLINED (old_decl) = 1;
+
+  old_version_node = cgraph_get_node (old_decl);
+  gcc_checking_assert (old_version_node);
+  new_version_node = cgraph_get_node (new_decl);
+  gcc_checking_assert (new_version_node);
+
+  if (TREE_TYPE (TREE_TYPE (old_decl)) != void_type_node)
+    {
+      TREE_TYPE (TREE_TYPE (new_decl)) =
+       copy_node (TREE_TYPE (TREE_TYPE (old_decl)));
+      TREE_TYPE (TREE_TYPE (new_decl)) =
+       build_vector_type (TREE_TYPE (TREE_TYPE (new_decl)), vlength);
+    }
+  
+  
+  /* Copy over debug args.  */
+  if (DECL_HAS_DEBUG_ARGS_P (old_decl))
+    {
+      VEC(tree, gc) **new_debug_args, **old_debug_args;
+      gcc_checking_assert (decl_debug_args_lookup (new_decl) == NULL);
+      DECL_HAS_DEBUG_ARGS_P (new_decl) = 0;
+      old_debug_args = decl_debug_args_lookup (old_decl);
+      if (old_debug_args)
+       {
+         new_debug_args = decl_debug_args_insert (new_decl);
+         *new_debug_args = VEC_copy (tree, gc, *old_debug_args);
+       }
+    }
+
+  /* Output the inlining info for this abstract function, since it has been
+     inlined.  If we don't do this now, we can lose the information about the
+     variables in the function when the blocks get blown away as soon as we
+     remove the cgraph node.  */
+  (*debug_hooks->outlining_inline_function) (old_decl);
+
+  DECL_ARTIFICIAL (new_decl) = 1;
+  DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
+  DECL_FUNCTION_PERSONALITY (new_decl) = DECL_FUNCTION_PERSONALITY (old_decl);
+
+  /* Prepare the data structures for the tree copy.  */
+  memset (&id, 0, sizeof (id));
+
+  /* Generate a new name for the new version. */
+  id.statements_to_fold = pointer_set_create ();
+
+  id.decl_map = pointer_map_create ();
+  id.debug_map = NULL;
+  id.src_fn = old_decl;
+  id.dst_fn = new_decl;
+  id.src_node = old_version_node;
+  id.dst_node = new_version_node;
+  id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
+  if (id.src_node->ipa_transforms_to_apply)
+    {
+      VEC(ipa_opt_pass,heap) * old_transforms_to_apply =
+       id.dst_node->ipa_transforms_to_apply;
+      unsigned int i;
+
+      id.dst_node->ipa_transforms_to_apply =
+       VEC_copy (ipa_opt_pass, heap, id.src_node->ipa_transforms_to_apply);
+      for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
+        VEC_safe_push (ipa_opt_pass, heap, 
id.dst_node->ipa_transforms_to_apply,
+                      VEC_index (ipa_opt_pass,
+                                 old_transforms_to_apply,
+                                 i));
+    }
+
+  id.copy_decl = copy_decl_no_change;
+  id.transform_call_graph_edges
+    = update_clones ? CB_CGE_MOVE_CLONES : CB_CGE_MOVE;
+  id.transform_new_cfg = true;
+  id.transform_return_to_modify = false;
+  id.transform_lang_insert_block = NULL;
+
+  current_function_decl = new_decl;
+  old_entry_block = ENTRY_BLOCK_PTR_FOR_FUNCTION
+    (DECL_STRUCT_FUNCTION (old_decl));
+  initialize_cfun (new_decl, old_decl,
+                  old_entry_block->count);
+  DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta
+    = id.src_cfun->gimple_df->ipa_pta;
+  push_cfun (DECL_STRUCT_FUNCTION (new_decl));
+
+  /* Copy the function's static chain.  */
+  p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
+  if (p)
+    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
+      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl,
+                        &id);
+
+  /* If there's a tree_map, prepare for substitution.  */
+  if (tree_map)
+    for (i = 0; i < VEC_length (ipa_replace_map_p, tree_map); i++)
+      {
+       gimple init;
+       replace_info = VEC_index (ipa_replace_map_p, tree_map, i);
+       if (replace_info->replace_p)
+         {
+           tree op = replace_info->new_tree;
+           if (!replace_info->old_tree)
+             {
+               int i = replace_info->parm_num;
+               tree parm;
+               for (parm = DECL_ARGUMENTS (old_decl); i;
+                    parm = DECL_CHAIN (parm))
+                 i --;
+               replace_info->old_tree = parm;
+             }
+               
+
+           STRIP_NOPS (op);
+
+           if (TREE_CODE (op) == VIEW_CONVERT_EXPR)
+             op = TREE_OPERAND (op, 0);
+
+           if (TREE_CODE (op) == ADDR_EXPR)
+             {
+               op = TREE_OPERAND (op, 0);
+               while (handled_component_p (op))
+                 op = TREE_OPERAND (op, 0);
+               if (TREE_CODE (op) == VAR_DECL)
+                 add_referenced_var (op);
+             }
+           gcc_assert (TREE_CODE (replace_info->old_tree) == PARM_DECL);
+           init = setup_one_parameter (&id, replace_info->old_tree,
+                                       replace_info->new_tree, id.src_fn,
+                                       NULL,
+                                       &vars);
+           if (init)
+             VEC_safe_push (gimple, heap, init_stmts, init);
+         }
+      }
+  /* Copy the function's arguments.  */
+  if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
+    DECL_ARGUMENTS (new_decl) =
+      elem_fn_copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
+                                            args_to_skip, &vars, vlength);
+
+  DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
+  BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
+
+  declare_inline_vars (DECL_INITIAL (new_decl), vars);
+
+  if (!VEC_empty (tree, DECL_STRUCT_FUNCTION (old_decl)->local_decls))
+    /* Add local vars.  */
+    elem_fn_add_local_variables (DECL_STRUCT_FUNCTION (old_decl), cfun, &id,
+                                false, vlength);
+
+  if (DECL_RESULT (old_decl) == NULL_TREE)
+    ;
+  else if (skip_return && !VOID_TYPE_P (TREE_TYPE (DECL_RESULT (old_decl))))
+    {
+      DECL_RESULT (new_decl)
+       = build_decl (DECL_SOURCE_LOCATION (DECL_RESULT (old_decl)),
+                     RESULT_DECL, NULL_TREE, void_type_node);
+      DECL_CONTEXT (DECL_RESULT (new_decl)) = new_decl;
+      cfun->returns_struct = 0;
+      cfun->returns_pcc_struct = 0;
+    }
+  else
+    {
+      tree old_name;
+      DECL_RESULT (new_decl) = remap_decl (DECL_RESULT (old_decl), &id);
+      /* bviyer; we are just using 4 for vectorlength just to see if it works 
*/
+      if (TREE_TYPE (DECL_RESULT (new_decl)) != void_type_node)
+       {
+         TREE_TYPE (DECL_RESULT (new_decl)) =
+           build_vector_type (copy_node (TREE_TYPE (DECL_RESULT (new_decl))),
+                              vlength);
+         DECL_MODE (DECL_RESULT (new_decl)) =
+           TYPE_MODE (TREE_TYPE (DECL_RESULT (new_decl)));
+       }
+      lang_hooks.dup_lang_specific_decl (DECL_RESULT (new_decl));
+      if (gimple_in_ssa_p (id.src_cfun)
+         && DECL_BY_REFERENCE (DECL_RESULT (old_decl))
+         && (old_name
+             = gimple_default_def (id.src_cfun, DECL_RESULT (old_decl))))
+       {
+         tree new_name = make_ssa_name (DECL_RESULT (new_decl), NULL);
+         insert_decl_map (&id, old_name, new_name);
+         SSA_NAME_DEF_STMT (new_name) = gimple_build_nop ();
+         set_default_def (DECL_RESULT (new_decl), new_name);
+       }
+    }
+
+  /* Copy the Function's body.  */
+  copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
+            ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
+
+  /* Renumber the lexical scoping (non-code) blocks consecutively.  */
+  number_blocks (new_decl);
+
+  /* We want to create the BB unconditionally, so that the addition of
+     debug stmts doesn't affect BB count, which may in the end cause
+     codegen differences.  */
+  bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR));
+  while (VEC_length (gimple, init_stmts))
+    insert_init_stmt (&id, bb, VEC_pop (gimple, init_stmts));
+  update_clone_info (&id);
+
+  /* Remap the nonlocal_goto_save_area, if any.  */
+  if (cfun->nonlocal_goto_save_area)
+    {
+      struct walk_stmt_info wi;
+
+      memset (&wi, 0, sizeof (wi));
+      wi.info = &id;
+      walk_tree (&cfun->nonlocal_goto_save_area, remap_gimple_op_r, &wi, NULL);
+    }
+
+  /* Clean up.  */
+  pointer_map_destroy (id.decl_map);
+  if (id.debug_map)
+    pointer_map_destroy (id.debug_map);
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  fold_marked_statements (0, id.statements_to_fold);
+  pointer_set_destroy (id.statements_to_fold);
+  fold_cond_expr_cond ();
+  delete_unreachable_blocks_update_callgraph (&id);
+  if (id.dst_node->analyzed)
+    cgraph_rebuild_references ();
+  update_ssa (TODO_update_ssa);
+
+  /* After partial cloning we need to rescale frequencies, so they are
+     within proper range in the cloned function.  */
+  if (new_entry)
+    {
+      struct cgraph_edge *e;
+      rebuild_frequencies ();
+
+      new_version_node->count = ENTRY_BLOCK_PTR->count;
+      for (e = new_version_node->callees; e; e = e->next_callee)
+       {
+         basic_block bb = gimple_bb (e->call_stmt);
+         e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
+                                                        bb);
+         e->count = bb->count;
+       }
+      for (e = new_version_node->indirect_calls; e; e = e->next_callee)
+       {
+         basic_block bb = gimple_bb (e->call_stmt);
+         e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
+                                                        bb);
+         e->count = bb->count;
+       }
+    }
+
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  gcc_assert (!id.debug_stmts);
+  VEC_free (gimple, heap, init_stmts);
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+  gcc_assert (!current_function_decl
+             || DECL_STRUCT_FUNCTION (current_function_decl) == cfun);
+  return;
+}
+
+
 /* EXP is CALL_EXPR present in a GENERIC expression tree.  Try to integrate
    the callee and return the inlined body on success.  */
 

Reply via email to