Hello Everyone,
This patch is for the Cilkplus branch mainly affecting the C compiler.
This patch will do the vector function mangling correctly for elemental
functions.
Thanking You,
Yours Sincerely,
Balaji V. Iyer.
diff --git a/gcc/ChangeLog.cilk b/gcc/ChangeLog.cilk
index 1398870..c1b1d71 100644
--- a/gcc/ChangeLog.cilk
+++ b/gcc/ChangeLog.cilk
@@ -1,3 +1,20 @@
+2012-03-11 Balaji V. Iyer <balaji.v.i...@intel.com>
+
+ * attribs.c (decl_attributes): Concatinated existing attributes with
+ vector attributes.
+ * c-decl.c (bind): Added a check if scope is not null.
+ * elem-function.c (rename_elm_fn): New function.
+ (is_elem_fn): Likewise.
+ (find_processor_code): Likewise.
+ (find_vlength_code): Likewise.
+ (create_processor_attribute): Likewise.
+ (create_optimize_attribute): Likewise.
+ (find_suffix): Likewise.
+ (create_elem_fn_nodes): Likewise.
+ (extract_elem_fn_values): Likewise.
+ (create_elem_vec_fn): Likewise.
+ * passes.c (init_optimization_passes): Added elemental function pass.
+
2012-03-09 Balaji V. Iyer <balaji.v.i...@intel.com>
* attribs.c (decl_attributes): Added a check for elemental function
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0da06b3..ada4090 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1453,6 +1453,7 @@ OBJS = \
cilk.o \
cilk-low.o \
array-notation-common.o \
+ elem-function.o \
$(out_object_file) \
$(EXTRA_OBJS) \
$(host_hook_obj)
@@ -3436,7 +3437,8 @@ lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H)
coretypes.h \
insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) $(OBSTACK_H) $(BITMAP_H) \
$(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h
-
+elem-function.o: elem-function.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(GIMPLE_H)
\
+ $(OPTABS_H) $(RECOG_H)
array-notation-common.o: array-notation-common.c $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(RTL_H) $(OPTABS_H) $(GIMPLE_H) $(RECOG_H)
diff --git a/gcc/attribs.c b/gcc/attribs.c
index 2ececc4..13c1417 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -327,6 +327,12 @@ decl_attributes (tree *node, tree attributes, int flags)
if (!is_elem_fn_attribute_p (name))
warning (OPT_Wattributes, "%qE attribute directive ignored",
name);
+ else
+ {
+ returned_attrs = tree_cons (name, args, returned_attrs);
+ DECL_ATTRIBUTES (*anode) = tree_cons (name, args,
+ DECL_ATTRIBUTES (*anode));
+ }
continue;
}
else if (list_length (args) < spec->min_length
diff --git a/gcc/c-decl.c b/gcc/c-decl.c
index 4abf738..6251bee 100644
--- a/gcc/c-decl.c
+++ b/gcc/c-decl.c
@@ -620,7 +620,8 @@ bind (tree name, tree decl, struct c_scope *scope, bool
invisible,
b->shadowed = 0;
b->decl = decl;
b->id = name;
- b->depth = scope->depth;
+ if (scope)
+ b->depth = scope->depth;
b->invisible = invisible;
b->nested = nested;
b->inner_comp = 0;
@@ -629,8 +630,11 @@ bind (tree name, tree decl, struct c_scope *scope, bool
invisible,
b->u.type = NULL;
- b->prev = scope->bindings;
- scope->bindings = b;
+ if (scope)
+ {
+ b->prev = scope->bindings;
+ scope->bindings = b;
+ }
if (decl_jump_unsafe (decl))
scope->has_jump_unsafe_decl = 1;
@@ -658,9 +662,11 @@ bind (tree name, tree decl, struct c_scope *scope, bool
invisible,
/* Locate the appropriate place in the chain of shadowed decls
to insert this binding. Normally, scope == current_scope and
this does nothing. */
- while (*here && (*here)->depth > scope->depth)
- here = &(*here)->shadowed;
-
+ if (scope)
+ {
+ while (*here && (*here)->depth > scope->depth)
+ here = &(*here)->shadowed;
+ }
b->shadowed = *here;
*here = b;
}
diff --git a/gcc/c-family/ChangeLog.cilk b/gcc/c-family/ChangeLog.cilk
index a4049d0..08d4c2d 100644
--- a/gcc/c-family/ChangeLog.cilk
+++ b/gcc/c-family/ChangeLog.cilk
@@ -1,3 +1,7 @@
+2012-03-11 Balaji V. Iyer <balaji.v.i...@intel.com>
+
+ * c-common.c (handle_vector_attribute): New function.
+
2012-01-20 Balaji V. Iyer <balaji.v.i...@intel.com>
* c-common.c (c_define_builtins): Added a call to
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index f84ccb9..47b5c54 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -381,6 +381,7 @@ static tree handle_type_generic_attribute (tree *, tree,
tree, int, bool *);
static tree handle_alloc_size_attribute (tree *, tree, tree, int, bool *);
static tree handle_target_attribute (tree *, tree, tree, int, bool *);
static tree handle_optimize_attribute (tree *, tree, tree, int, bool *);
+static tree handle_vector_attribute (tree *, tree, tree, int, bool *);
static tree ignore_attribute (tree *, tree, tree, int, bool *);
static tree handle_no_split_stack_attribute (tree *, tree, tree, int, bool *);
static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
@@ -741,6 +742,8 @@ const struct attribute_spec c_common_attribute_table[] =
handle_target_attribute, false },
{ "optimize", 1, -1, true, false, false,
handle_optimize_attribute, false },
+ { "vector", 1, -1, true, false, false,
+ handle_vector_attribute, false },
/* For internal use only. The leading '*' both prevents its usage in
source code and signals that it may be overridden by machine tables. */
{ "*tm regparm", 0, 0, false, true, true,
@@ -8268,6 +8271,22 @@ parse_optimize_options (tree args, bool attr_p)
return ret;
}
+static tree
+handle_vector_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
+ tree args ATTRIBUTE_UNUSED,
+ int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+ tree opt_list;
+ VEC(tree,gc) *opt_vec = NULL;
+ opt_vec = make_tree_vector ();
+ VEC_safe_push (tree, gc, opt_vec, build_string (2, "O3"));
+ opt_list = build_tree_list_vec (opt_vec);
+ release_tree_vector (opt_vec);
+ handle_optimize_attribute (node, get_identifier ("optimize"), opt_list,
+ flags, no_add_attrs);
+ return NULL_TREE;
+}
+
/* For handling "optimize" attribute. arguments as in
struct attribute_spec.handler. */
diff --git a/gcc/elem-function.c b/gcc/elem-function.c
new file mode 100755
index 0000000..a5a7b61
--- /dev/null
+++ b/gcc/elem-function.c
@@ -0,0 +1,594 @@
+/* This file is part of the Intel(R) Cilk(TM) Plus support
+ This file contains the functions for Elemental functions.
+
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ Written by Balaji V. Iyer <balaji.v.i...@intel.com>,
+ Intel Corporation
+
+ Many Thanks to Karthik Kumar for advice on the basic technique
+ about cloning functions.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "output.h"
+#include "c-family/c-common.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "flags.h"
+#include "tree-inline.h"
+#include "cgraph.h"
+#include "ipa-prop.h"
+#include "opts.h"
+#include "tree-iterator.h"
+#include "toplev.h"
+#include "options.h"
+#include "intl.h"
+#include "vec.h"
+
+#define MAX_VARS 50
+
+enum mask_options {
+ USE_MASK = 12345,
+ USE_NOMASK,
+ USE_BOTH
+};
+
+typedef struct
+{
+ char *proc_type;
+ enum mask_options mask;
+ int vectorlength[MAX_VARS];
+ int no_vlengths;
+ char *uniform_vars[MAX_VARS];
+ int no_uvars;
+ int uniform_location[MAX_VARS]; /* their location in parm list */
+ char *linear_vars[MAX_VARS];
+ int linear_steps[MAX_VARS];
+ int linear_location[MAX_VARS]; /* their location in parm list */
+ int no_lvars;
+ int private_location[MAX_VARS]; /* parm not in uniform or linear list */
+ int no_pvars;
+ char *func_prefix;
+ int total_no_args;
+} elem_fn_info;
+
+static elem_fn_info *extract_elem_fn_values (tree);
+static tree create_optimize_attribute (int);
+static tree create_processor_attribute (elem_fn_info *, tree *);
+
+/* this function will concatinate the suffix to the existing function decl */
+static tree
+rename_elem_fn (tree decl, const char *suffix)
+{
+ int length = 0;
+ const char *fn_name = IDENTIFIER_POINTER (DECL_NAME (decl));
+ char *new_fn_name;
+ tree new_decl = NULL_TREE;
+
+ if (!suffix || !fn_name)
+ return decl;
+ else
+ new_decl = decl;
+
+ length = strlen (fn_name) + strlen (suffix) + 1;
+ new_fn_name = (char *)xmalloc (length);
+ strcpy (new_fn_name, fn_name);
+ strcat (new_fn_name, suffix);
+
+ DECL_NAME (new_decl) = get_identifier (new_fn_name);
+ return new_decl;
+}
+
+/* this function will check to see if the node is part of an function that
+ * needs to be converted to its vector equivalent. */
+static bool
+is_elem_fn (struct cgraph_node *node)
+{
+ tree fndecl, ii_tree;
+ if (!node)
+ return false;
+
+ fndecl = node->decl;
+ for (ii_tree = DECL_ATTRIBUTES (fndecl); ii_tree;
+ ii_tree = TREE_CHAIN (ii_tree))
+ {
+ tree ii_value = TREE_PURPOSE (ii_tree);
+ if (TREE_CODE (ii_value) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (ii_value), "vector"))
+ return true;
+ }
+
+ /* If we are here, then we didn't find a vector keyword, so it is false */
+ return false;
+}
+
+/* This function will find the appropriate processor code in the function
+ * mangling vector function
+ */
+static char *
+find_processor_code (elem_fn_info *elem_fn_values)
+{
+ if (!elem_fn_values || !elem_fn_values->proc_type)
+ return NULL;
+
+ if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
+ return xstrdup ("B");
+ else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
+ return xstrdup ("D");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
+ return xstrdup ("E");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
+ return xstrdup ("F");
+ else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
+ return xstrdup ("H");
+ else
+ gcc_unreachable ();
+
+ return NULL; /* should never get here */
+}
+
+/* this function will return vectorlength, if specified, in string format -OR-
+ * it will give the default vector length for the specified architecture. */
+static char *
+find_vlength_code (elem_fn_info *elem_fn_values)
+{
+ char *vlength_code = (char *) xmalloc (sizeof (char) * 10);
+ if (!elem_fn_values)
+ return NULL;
+
+ memset (vlength_code, 10, 0);
+
+ if (elem_fn_values->no_vlengths != 0)
+ sprintf(vlength_code,"%d", elem_fn_values->vectorlength[0]);
+ else
+ {
+ if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
+ sprintf(vlength_code,"4");
+ else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
+ sprintf (vlength_code, "4");
+ else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
+ sprintf (vlength_code, "4");
+ else
+ gcc_unreachable ();
+ }
+ return vlength_code;
+}
+
+/* This function will create the appropriate __target__ attribute for the
+ * processor */
+static tree
+create_processor_attribute (elem_fn_info *elem_fn_values, tree *opposite_attr)
+{
+ /* you need the opposite attribute for the scalar code part */
+ tree proc_attr, opp_proc_attr;
+ VEC(tree,gc) *proc_vec_list = VEC_alloc (tree, gc, 4);
+ VEC(tree,gc) *opp_proc_vec_list = VEC_alloc (tree, gc, 4);
+
+ if (!elem_fn_values || !elem_fn_values->proc_type)
+ return NULL_TREE;
+
+ if (!strcmp (elem_fn_values->proc_type, "pentium_4"))
+ {
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("arch=pentium4"), "arch=pentium4"));
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("mmx"), "mmx"));
+ if (opposite_attr)
+ {
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("no-mmx"), "no-mmx"));
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("arch=pentium4"),
+ "arch=pentium4"));
+ }
+ }
+ else if (!strcmp (elem_fn_values->proc_type, "pentium4_sse3"))
+ {
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("arch=pentium4"), "arch=pentium4"));
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("sse3"), "sse3"));
+ if (opposite_attr)
+ {
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("arch=pentium4"),
+ "arch=pentium4"));
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("no-sse3"), "no-sse3"));
+ }
+ }
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_ssse3"))
+ {
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("ssse3"), "ssse3"));
+ if (opposite_attr)
+ {
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("no-ssse3"), "no-ssse3"));
+ }
+ }
+ else if (!strcmp (elem_fn_values->proc_type, "core2_duo_sse_4_1"))
+ {
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("sse4.1"), "sse4.1"));
+ if (opposite_attr)
+ {
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("arch=core2"), "arch=core2"));
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("no-sse4.1"), "no-sse4.1"));
+ }
+ }
+ else if (!strcmp (elem_fn_values->proc_type, "core_i7_sse4_2"))
+ {
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("arch=corei7"), "arch=corei7"));
+ VEC_safe_push (tree, gc, proc_vec_list,
+ build_string (strlen ("sse4.2"), "sse4.2"));
+ if (opposite_attr)
+ {
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("arch=corei7"), "arch=corei7"));
+ VEC_safe_push (tree, gc, opp_proc_vec_list,
+ build_string (strlen ("no-sse4.2"), "no-sse4.2"));
+ }
+ }
+ else
+ sorry ("Processor type not supported.");
+
+ proc_attr = build_tree_list_vec (proc_vec_list);
+ VEC_truncate (tree, proc_vec_list, 0);
+ proc_attr = build_tree_list (get_identifier ("__target__"), proc_attr);
+
+ if (opposite_attr)
+ {
+ opp_proc_attr = build_tree_list_vec (opp_proc_vec_list);
+ VEC_truncate (tree, opp_proc_vec_list, 0);
+ opp_proc_attr = build_tree_list (get_identifier ("__target__"),
+ opp_proc_attr);
+ *opposite_attr = opp_proc_attr;
+ }
+ return proc_attr;
+}
+
+/* this will create an optimize attribute for the vector function, to make sure
+ * the vectorizer is turned on and has its full capabilities */
+static tree
+create_optimize_attribute (int option)
+{
+ tree opt_attr;
+ VEC(tree,gc) *opt_vec = VEC_alloc (tree,gc, 4);
+ char optimization[2];
+ optimization[0] = 'O';
+ sprintf(&optimization[1], "%1d", option);
+ VEC_safe_push (tree, gc, opt_vec, build_string (2, optimization));
+ opt_attr = build_tree_list_vec (opt_vec);
+ VEC_truncate (tree, opt_vec, 0);
+ opt_attr = build_tree_list (get_identifier ("optimize"), opt_attr);
+ return opt_attr;
+}
+
+/* this function will find the appropriate mangling suffix for the vector
+ * function */
+static char *
+find_suffix (elem_fn_info *elem_fn_values, bool masked)
+{
+ char *suffix = (char*)xmalloc (100);
+ char tmp_str[10];
+ int arg_number, ii_pvar, ii_uvar, ii_lvar;
+ strcpy (suffix, "._simdsimd_");
+ strcat (suffix, find_processor_code (elem_fn_values));
+ strcat (suffix, find_vlength_code (elem_fn_values));
+ if (masked)
+ strcpy (suffix, "m");
+ else
+ strcat (suffix, "n");
+
+ for (arg_number = 1; arg_number <= elem_fn_values->total_no_args;
+ arg_number++)
+ {
+ for (ii_lvar = 0; ii_lvar < elem_fn_values->no_lvars; ii_lvar++)
+ {
+ if (elem_fn_values->linear_location[ii_lvar] == arg_number)
+ {
+ strcat (suffix, "_l");
+ sprintf(tmp_str, "%d", elem_fn_values->linear_steps[ii_lvar]);
+ strcat (suffix, tmp_str);
+ }
+ }
+ for (ii_uvar = 0; ii_uvar < elem_fn_values->no_uvars; ii_uvar++)
+ {
+ if (elem_fn_values->uniform_location[ii_uvar] == arg_number)
+ strcat (suffix, "_s1");
+ }
+ for (ii_pvar = 0; ii_pvar < elem_fn_values->no_pvars; ii_pvar++)
+ {
+ if (elem_fn_values->private_location[ii_pvar] == arg_number)
+ strcat (suffix, "_v1");
+ }
+ }
+ return suffix;
+}
+
+/* this function wil create the elemental vector function node */
+static struct cgraph_node *
+create_elem_fn_nodes (struct cgraph_node *node)
+{
+ tree new_decl, old_decl, new_decl_name, opt_attr;
+ tree proc_attr, opp_proc_attr = NULL_TREE;
+ struct cgraph_node *new_node;
+ elem_fn_info *elem_fn_values = NULL;
+ char *suffix = NULL;
+
+ old_decl = node->decl;
+ new_decl = copy_node (old_decl);
+ elem_fn_values = extract_elem_fn_values (old_decl);
+
+ if (elem_fn_values)
+ {
+ suffix = find_suffix (elem_fn_values, false);
+ }
+ else
+ return NULL;
+
+ new_decl_name = rename_elem_fn (new_decl, suffix);
+
+ SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME(new_decl_name));
+ SET_DECL_RTL (new_decl, NULL);
+ TREE_SYMBOL_REFERENCED (DECL_NAME (new_decl_name)) = 1;
+
+ new_node = cgraph_copy_node_for_versioning (node, new_decl, NULL, NULL);
+ new_node->local.externally_visible = node->local.externally_visible;
+ new_node->lowered = true;
+
+ tree_function_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
+ NULL);
+ cgraph_call_function_insertion_hooks (new_node);
+ DECL_STRUCT_FUNCTION (new_decl)->elem_fn_already_cloned = true;
+ DECL_STRUCT_FUNCTION (new_decl)->curr_properties = cfun->curr_properties;
+ DECL_ATTRIBUTES (cfun->decl) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (cfun->decl));
+ DECL_ATTRIBUTES (new_node->decl) =
+ remove_attribute ("vector", DECL_ATTRIBUTES (new_node->decl));
+
+ proc_attr = create_processor_attribute (elem_fn_values, &opp_proc_attr);
+
+ if (proc_attr)
+ decl_attributes (&new_node->decl, proc_attr, 0);
+ if (opp_proc_attr)
+ decl_attributes (&cfun->decl, opp_proc_attr, 0);
+
+ opt_attr = create_optimize_attribute (3); /* this will turn vectorizer on */
+ if (opt_attr)
+ decl_attributes (&new_node->decl, opt_attr, 0);
+
+ return new_node;
+}
+
+/* This function will extact the vector attribute and store the data in the
+ * elem_fn_info structure.
+ */
+static elem_fn_info *
+extract_elem_fn_values (tree decl)
+{
+ elem_fn_info *elem_fn_values = NULL;
+ int x = 0; /* this is a dummy variable */
+ int arg_number = 0, ii = 0;
+ tree ii_tree, jj_tree, kk_tree;
+ tree decl_attr = DECL_ATTRIBUTES (decl);
+
+ if (!decl_attr)
+ return NULL;
+
+ elem_fn_values = (elem_fn_info *)xmalloc (sizeof (elem_fn_info));
+ gcc_assert (elem_fn_values);
+
+ elem_fn_values->mask = USE_BOTH;
+ elem_fn_values->no_vlengths = 0;
+ elem_fn_values->no_uvars = 0;
+ elem_fn_values->no_lvars = 0;
+
+
+ for (ii_tree = decl_attr; ii_tree; ii_tree = TREE_CHAIN (ii_tree))
+ {
+ tree ii_purpose = TREE_PURPOSE (ii_tree);
+ tree ii_value = TREE_VALUE (ii_tree);
+ if (TREE_CODE (ii_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (ii_purpose), "vector"))
+ {
+ for (jj_tree = ii_value; jj_tree;
+ jj_tree = TREE_CHAIN (jj_tree))
+ {
+ tree jj_value = TREE_VALUE (jj_tree);
+ tree jj_purpose = TREE_PURPOSE (jj_value);
+ if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "processor"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ if (TREE_CODE (kk_value) == STRING_CST)
+ elem_fn_values->proc_type =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose),
+ "vectorlength"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ if (TREE_CODE (kk_value) == INTEGER_CST)
+ {
+ x = elem_fn_values->no_vlengths;
+ elem_fn_values->vectorlength[x] =
+ (int) TREE_INT_CST_LOW (kk_value);
+ elem_fn_values->no_vlengths++;
+ }
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "uniform"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->uniform_vars[elem_fn_values->no_uvars] =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ elem_fn_values->no_uvars++;
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "linear"))
+ {
+ for (kk_tree = TREE_VALUE (jj_value); kk_tree;
+ kk_tree = TREE_CHAIN (kk_tree))
+ {
+ tree kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->linear_vars[elem_fn_values->no_lvars] =
+ xstrdup (TREE_STRING_POINTER (kk_value));
+ kk_tree = TREE_CHAIN (kk_tree);
+ kk_value = TREE_VALUE (kk_tree);
+ elem_fn_values->linear_steps[elem_fn_values->no_lvars] =
+ TREE_INT_CST_LOW (kk_value);
+ elem_fn_values->no_lvars++;
+ }
+ }
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "mask"))
+ elem_fn_values->mask = USE_MASK;
+ else if (TREE_CODE (jj_purpose) == IDENTIFIER_NODE
+ && !strcmp (IDENTIFIER_POINTER (jj_purpose), "nomask"))
+ elem_fn_values->mask = USE_NOMASK;
+ }
+ }
+ }
+
+ for (ii_tree = DECL_ARGUMENTS (decl); ii_tree; ii_tree = DECL_CHAIN
(ii_tree))
+ {
+ arg_number++;
+ bool already_found = false;
+ for (ii = 0; ii < elem_fn_values->no_uvars; ii++)
+ {
+ if (DECL_NAME (ii_tree)
+ && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
+ elem_fn_values->uniform_vars[ii]))
+ {
+ already_found = true;
+ elem_fn_values->uniform_location[ii] = arg_number;
+ }
+ }
+ for (ii = 0; ii < elem_fn_values->no_lvars; ii++)
+ {
+ if (DECL_NAME (ii_tree)
+ && !strcmp (IDENTIFIER_POINTER (DECL_NAME (ii_tree)),
+ elem_fn_values->linear_vars[ii]))
+ {
+ if (already_found)
+ fatal_error
+ ("variable %s defined in both uniform and linear clause",
+ elem_fn_values->linear_vars[ii]);
+ else
+ {
+ already_found = true;
+ elem_fn_values->linear_location[ii] = arg_number;
+ }
+ }
+ }
+ if (!already_found) /* this means this variable is a private */
+ elem_fn_values->private_location[elem_fn_values->no_pvars++] =
+ arg_number;
+ }
+
+ elem_fn_values->total_no_args = arg_number;
+
+ return elem_fn_values;
+}
+
+/* Entry point function for creating the vector elemental function */
+static unsigned int
+create_elem_vec_fn (void)
+{
+ struct cgraph_node *ii_node, *copied_node;
+
+ for (ii_node = cgraph_nodes; ii_node != NULL; ii_node = ii_node->next)
+ {
+ if (is_elem_fn (ii_node)
+ && !DECL_STRUCT_FUNCTION (ii_node->decl)->elem_fn_already_cloned)
+ {
+ copied_node = create_elem_fn_nodes (ii_node);
+ if (DECL_RTL (ii_node->decl))
+ {
+ SET_DECL_RTL (copied_node->decl,
+ copy_rtx (DECL_RTL (ii_node->decl)));
+ XEXP (DECL_RTL (copied_node->decl), 0) =
+ gen_rtx_SYMBOL_REF
+ (GET_MODE (XEXP (DECL_RTL (ii_node->decl), 0)),
+ IDENTIFIER_POINTER (DECL_NAME (copied_node->decl)));
+ }
+
+ }
+ }
+ return 0;
+}
+
+
+struct gimple_opt_pass pass_elem_fn =
+ {
+ {
+ GIMPLE_PASS,
+ "tree_elem_fn", /* name */
+ 0, /* gate */
+ create_elem_vec_fn, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_NONE, /* tv_id */
+ PROP_gimple_any| PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func|TODO_verify_flow, /* todo_flags_finish */
+ }
+ };
diff --git a/gcc/function.c b/gcc/function.c
index 4508ae2..ed07a83 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -4418,6 +4418,8 @@ allocate_struct_function (tree fndecl, bool abstract_p)
/* ??? This could be set on a per-function basis by the front-end
but is this worth the hassle? */
cfun->can_throw_non_call_exceptions = flag_non_call_exceptions;
+
+ cfun->elem_fn_already_cloned = false;
}
}
diff --git a/gcc/function.h b/gcc/function.h
index b5b032a..d86a958 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -540,6 +540,8 @@ struct GTY(()) function {
/* In a Cilk function, the VAR_DECL for the frame descriptor. */
tree cilk_frame_decl;
+
+ bool elem_fn_already_cloned;
/* For md files. */
diff --git a/gcc/passes.c b/gcc/passes.c
index 520267a..cb9b552 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -1208,6 +1208,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_early_warn_uninitialized);
NEXT_PASS (pass_rebuild_cgraph_edges);
+ NEXT_PASS (pass_elem_fn);
NEXT_PASS (pass_inline_parameters);
NEXT_PASS (pass_early_inline);
NEXT_PASS (pass_all_early_optimizations);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index e871aeb..52e72ee 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -484,6 +484,7 @@ extern struct gimple_opt_pass pass_all_optimizations;
extern struct gimple_opt_pass pass_cleanup_cfg_post_optimizing;
extern struct gimple_opt_pass pass_init_datastructures;
extern struct gimple_opt_pass pass_fixup_cfg;
+extern struct gimple_opt_pass pass_elem_fn;
extern struct rtl_opt_pass pass_expand;
extern struct rtl_opt_pass pass_init_function;