On Fri, Nov 06, 2015 at 09:38:21AM +0100, Richard Biener wrote: > On Thu, 5 Nov 2015, Martin Jambor wrote: > > > Hi, > > > > in the previous email I wrote we need to "change behavior" of a few > > optimization passes. One was the flattening of GPU functions and the > > other two are in the patch below. It all comes to that, at the > > moment, we need to switch off the vectorizer (only for the GPU > > functions, of course). > > > > We are actually quite close to being able to handle gimple vector > > input in HSA back-end but not all the way yet, and before allowing the > > vectorizer again, we will have to make sure it never produces vectors > > bigger than 128bits (in GPU functions). > > Hmm. I'd rather have this modify > DECL_FUNCTION_SPECIFIC_OPTIMIZATION of the hsa function to get this > effect. I think I mentioned this to the OACC guys as well for a > similar needs of them.
I see, that is a good idea. I have reverted changes to tree-ssa-loop.c and tree-vectorizer.c and on top of that committed the following patch to the branch which makes modifications to HSA fndecls at a more convenient spot and disables vectorization in the following way: tree gdecl = gpu->decl; tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); if (fn_opts == NULL_TREE) fn_opts = optimization_default_node; fn_opts = copy_node (fn_opts); TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; I hope that is what you meant. I have also verified that it works. Thanks, Martin 2015-11-10 Martin Jambor <mjam...@suse.cz> * hsa.h (hsa_summary_t): Add a comment to method link_functions. (hsa_summary_t::link_functions): Moved... * hsa.c (hsa_summary_t::link_functions): ...here. Added common fndecl modifications. Include stringpool.h. * ipa-hsa.c (process_hsa_functions): Do not add flatten attribute here. Fixed comments. diff --git a/gcc/hsa.c b/gcc/hsa.c index ab05a1d..e63be95 100644 --- a/gcc/hsa.c +++ b/gcc/hsa.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "alloc-pool.h" #include "cgraph.h" #include "print-tree.h" +#include "stringpool.h" #include "symbol-summary.h" #include "hsa.h" @@ -693,6 +694,40 @@ hsa_get_declaration_name (tree decl) return NULL; } +/* Couple GPU and HOST as gpu-specific and host-specific implementation of the + same function. KIND determines whether GPU is a host-invokable kernel or + gpu-callable function. */ + +inline void +hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, + hsa_function_kind kind) +{ + hsa_function_summary *gpu_summary = get (gpu); + hsa_function_summary *host_summary = get (host); + + gpu_summary->m_kind = kind; + host_summary->m_kind = kind; + + gpu_summary->m_gpu_implementation_p = true; + host_summary->m_gpu_implementation_p = false; + + gpu_summary->m_binded_function = host; + host_summary->m_binded_function = gpu; + + tree gdecl = gpu->decl; + DECL_ATTRIBUTES (gdecl) + = tree_cons (get_identifier ("flatten"), NULL_TREE, + DECL_ATTRIBUTES (gdecl)); + + tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); + if (fn_opts == NULL_TREE) + fn_opts = optimization_default_node; + fn_opts = copy_node (fn_opts); + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; +} + /* Add a HOST function to HSA summaries. */ void diff --git a/gcc/hsa.h b/gcc/hsa.h index 025de67..b6855ea 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -1161,27 +1161,14 @@ public: hsa_summary_t (symbol_table *table): function_summary<hsa_function_summary *> (table) { } + /* Couple GPU and HOST as gpu-specific and host-specific implementation of + the same function. KIND determines whether GPU is a host-invokable kernel + or gpu-callable function. */ + void link_functions (cgraph_node *gpu, cgraph_node *host, hsa_function_kind kind); }; -inline void -hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, - hsa_function_kind kind) -{ - hsa_function_summary *gpu_summary = get (gpu); - hsa_function_summary *host_summary = get (host); - - gpu_summary->m_kind = kind; - host_summary->m_kind = kind; - - gpu_summary->m_gpu_implementation_p = true; - host_summary->m_gpu_implementation_p = false; - - gpu_summary->m_binded_function = host; - host_summary->m_binded_function = gpu; -} - /* in hsa.c */ extern struct hsa_function_representation *hsa_cfun; extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c index b4cb58e..d77fa6b 100644 --- a/gcc/ipa-hsa.c +++ b/gcc/ipa-hsa.c @@ -90,16 +90,12 @@ process_hsa_functions (void) cgraph_node *clone = node->create_virtual_clone (vec <cgraph_edge *> (), NULL, NULL, "hsa"); TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); - if (s->m_kind == HSA_KERNEL) - DECL_ATTRIBUTES (clone->decl) - = tree_cons (get_identifier ("flatten"), NULL_TREE, - DECL_ATTRIBUTES (clone->decl)); clone->force_output = true; hsa_summaries->link_functions (clone, node, s->m_kind); if (dump_file) - fprintf (dump_file, "HSA creates a new clone: %s, type: %s\n", + fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n", clone->name (), s->m_kind == HSA_KERNEL ? "kernel" : "function"); } @@ -116,7 +112,7 @@ process_hsa_functions (void) hsa_summaries->link_functions (clone, node, HSA_FUNCTION); if (dump_file) - fprintf (dump_file, "HSA creates a new function clone: %s\n", + fprintf (dump_file, "Created a new HSA function clone: %s\n", clone->name ()); } }