On Thu, Apr 14, 2022 at 06:06:24PM +0200, Marcel Vollweiler wrote:
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
>    struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
>  
>    if (teams == NULL_TREE)
> -    num_teams_upper = integer_one_node;
> +    num_teams_upper = integer_minus_two_node;

No, please don't introduce this, it is quite costly to have a GC trees
like integer_one_node, so they should stay for the most commonly used
numbers, -2 isn't like that.  Just build_int_cst (integer_type_node, -2).

> --- a/gcc/tree-core.h
> +++ b/gcc/tree-core.h
> @@ -642,6 +642,7 @@ enum tree_index {
>    TI_INTEGER_ONE,
>    TI_INTEGER_THREE,
>    TI_INTEGER_MINUS_ONE,
> +  TI_INTEGER_MINUS_TWO,
>    TI_NULL_POINTER,
>  
>    TI_SIZE_ZERO,
> diff --git a/gcc/tree.cc b/gcc/tree.cc
> index 8f83ea1..8cb474d 100644
> --- a/gcc/tree.cc
> +++ b/gcc/tree.cc
> @@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char)
>    integer_one_node = build_int_cst (integer_type_node, 1);
>    integer_three_node = build_int_cst (integer_type_node, 3);
>    integer_minus_one_node = build_int_cst (integer_type_node, -1);
> +  integer_minus_two_node = build_int_cst (integer_type_node, -2);
>  
>    size_zero_node = size_int (0);
>    size_one_node = size_int (1);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index cea49a5..1aeb009 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp)
>  #define integer_one_node             global_trees[TI_INTEGER_ONE]
>  #define integer_three_node              global_trees[TI_INTEGER_THREE]
>  #define integer_minus_one_node               
> global_trees[TI_INTEGER_MINUS_ONE]
> +#define integer_minus_two_node               
> global_trees[TI_INTEGER_MINUS_TWO]
>  #define size_zero_node                       global_trees[TI_SIZE_ZERO]
>  #define size_one_node                        global_trees[TI_SIZE_ONE]
>  #define bitsize_zero_node            global_trees[TI_BITSIZE_ZERO]

And drop the above 3 hunks.

> --- a/libgomp/config/gcn/icv-device.c
> +++ b/libgomp/config/gcn/icv-device.c
> @@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR;
>  volatile int GOMP_MAX_ACTIVE_LEVELS_VAR;
>  volatile omp_proc_bind_t GOMP_BIND_VAR;
>  volatile int GOMP_NTEAMS_VAR;
> +volatile int GOMP_TEAMS_THREAD_LIMIT_VAR;

I really don't like this copying of individual ICVs one by one to the
device, copy a struct containing them and access fields in that struct.

> --- a/libgomp/libgomp-plugin.h
> +++ b/libgomp/libgomp-plugin.h
> @@ -116,6 +116,7 @@ struct addr_pair
>  #define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels
>  #define GOMP_BIND_VAR __gomp_bind
>  #define GOMP_NTEAMS_VAR __gomp_nteams
> +#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var

Likewise here.

> @@ -527,13 +538,19 @@ struct gomp_icv_list {
>  
>  extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list,
>                                    int device_num);
> -extern struct gomp_icv_list *gomp_run_sched_var_dev_list;
> -extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list;
> +extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num,
> +                                                        size_t size,
> +                                                         struct 
> gomp_icv_list **list);
> +extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list;
> +extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list;
> +extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list;
> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list;
> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list;
> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list;
> +extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list;
> +
>  extern struct gomp_icv_list *gomp_nteams_var_dev_list;
> -extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list;
> -extern struct gomp_icv_list *gomp_proc_bind_var_dev_list;
> -extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list;
> -extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list;
> +extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list;

Nor these per-var lists.  For a specific device, walk the list with
all the vars in it, start with the most specific (matching dev number),
then just dev and then all and fill in from it what is going to be copied.
> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c
> @@ -572,7 +572,8 @@ static char *GOMP_ICV_STRINGS[] =
>    XSTRING (GOMP_DYN_VAR),
>    XSTRING (GOMP_MAX_ACTIVE_LEVELS_VAR),
>    XSTRING (GOMP_BIND_VAR),
> -  XSTRING (GOMP_NTEAMS_VAR)
> +  XSTRING (GOMP_NTEAMS_VAR),
> +  XSTRING (GOMP_TEAMS_THREAD_LIMIT_VAR)

Then you don't need to e.g. track the names of the individual vars, just
one for the whole ICV block.

        Jakub

Reply via email to