https://github.com/jtb20 updated https://github.com/llvm/llvm-project/pull/159774
>From 861dfa4279cb0080dc714e64b7a2b50ac74485c4 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Tue, 16 Sep 2025 04:16:15 -0500 Subject: [PATCH 1/5] [OpenMP] Make loop index unsigned in __kmpc_omp_task_with_deps/__kmp_omp_task NFC. Co-authored-by: Adrian Munera <adrian.mun...@bsc.es> --- openmp/runtime/src/kmp_taskdeps.cpp | 2 +- openmp/runtime/src/kmp_tasking.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index abbca752f0587..743d8ed093c61 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -714,7 +714,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, __kmp_free(old_record); - for (kmp_int i = old_size; i < new_size; i++) { + for (kmp_uint i = old_size; i < new_size; i++) { kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( __kmp_successors_size * sizeof(kmp_int32)); new_record[i].task = nullptr; diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 37836fb457537..a3c7439593d5c 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1816,7 +1816,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, __kmp_free(old_record); - for (kmp_int i = old_size; i < new_size; i++) { + for (kmp_uint i = old_size; i < new_size; i++) { kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( __kmp_successors_size * sizeof(kmp_int32)); new_record[i].task = nullptr; >From 94fe5ecc2f4b9a4a2a068b45358eff20feacfec6 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Mon, 15 Sep 2025 05:13:20 -0500 Subject: [PATCH 2/5] [OpenMP] Use ID not index to identify taskgraphs in libomp runtime In preparation for the following patches, this patch changes the key used to identify taskgraphs from a monotonic index into an ID (stored in a linear table). Co-authored-by: Adrian Munera <adrian.mun...@bsc.es> --- openmp/runtime/src/kmp.h | 5 ++- openmp/runtime/src/kmp_global.cpp | 2 +- openmp/runtime/src/kmp_tasking.cpp | 72 ++++++++++++++++++++---------- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 83afc0e83f231..4c4e9b44c1b2a 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2678,7 +2678,7 @@ typedef struct kmp_tdg_info { extern int __kmp_tdg_dot; extern kmp_int32 __kmp_max_tdgs; extern kmp_tdg_info_t **__kmp_global_tdgs; -extern kmp_int32 __kmp_curr_tdg_idx; +extern kmp_int32 __kmp_curr_tdg_id; extern kmp_int32 __kmp_successors_size; extern std::atomic<kmp_int32> __kmp_tdg_task_id; extern kmp_int32 __kmp_num_tdg; @@ -4392,6 +4392,9 @@ KMP_EXPORT kmp_int32 __kmpc_start_record_task(ident_t *loc, kmp_int32 gtid, kmp_int32 tdg_id); KMP_EXPORT void __kmpc_end_record_task(ident_t *loc, kmp_int32 gtid, kmp_int32 input_flags, kmp_int32 tdg_id); +KMP_EXPORT void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 input_flags, kmp_uint32 tdg_id, + void (*entry)(void *), void *args); #endif /* Interface to fast scalable reduce methods routines */ diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 323d13e948b42..fdf7569116578 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -556,7 +556,7 @@ int *__kmp_nesting_nth_level; int __kmp_tdg_dot = 0; kmp_int32 __kmp_max_tdgs = 100; kmp_tdg_info_t **__kmp_global_tdgs = NULL; -kmp_int32 __kmp_curr_tdg_idx = +kmp_int32 __kmp_curr_tdg_id = 0; // Id of the current TDG being recorded or executed kmp_int32 __kmp_num_tdg = 0; kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index a3c7439593d5c..a623f9f0be513 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1431,11 +1431,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, } #if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) && (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) { taskdata->is_taskgraph = 1; - taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; + taskdata->tdg = tdg; taskdata->td_task_id = KMP_GEN_TASK_ID(); taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); } @@ -2365,9 +2365,9 @@ without help of the runtime library. */ void *__kmpc_task_reduction_init(int gtid, int num, void *data) { #if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { - kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; + kmp_tdg_info_t *this_tdg = __kmp_find_tdg(__kmp_curr_tdg_id); this_tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num); this_tdg->rec_num_taskred = num; @@ -2392,14 +2392,11 @@ has two parameters, pointer to object to be initialized and pointer to omp_orig */ void *__kmpc_taskred_init(int gtid, int num, void *data) { #if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { - kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; - this_tdg->rec_taskred_data = - __kmp_allocate(sizeof(kmp_task_red_input_t) * num); - this_tdg->rec_num_taskred = num; - KMP_MEMCPY(this_tdg->rec_taskred_data, data, - sizeof(kmp_task_red_input_t) * num); + tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num); + tdg->rec_num_taskred = num; + KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num); } #endif return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data); @@ -2451,7 +2448,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { #if OMPX_TASKGRAPH if ((thread->th.th_current_task->is_taskgraph) && (!__kmp_tdg_is_recording( - __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) { + __kmp_find_tdg(__kmp_curr_tdg_id)->tdg_status))) { tg = thread->th.th_current_task->td_taskgroup; KMP_ASSERT(tg != NULL); KMP_ASSERT(tg->reduce_data != NULL); @@ -5232,6 +5229,24 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) { } #if OMPX_TASKGRAPH +// __kmpc_taskgraph: record or replay taskgraph +// loc_ref: Location of TDG, not used yet +// gtid: Global Thread ID of the encountering thread +// input_flags: Flags associated with the TDG +// tdg_id: ID of the TDG to record, for now, incremental integer +// entry: Pointer to the entry function +// args: Pointer to the function arguments +void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 input_flags, + kmp_uint32 tdg_id, void (*entry)(void *), void *args) { + kmp_int32 res = __kmpc_start_record_task(loc_ref, gtid, input_flags, tdg_id); + // When res = 1, we either start recording or only execute tasks + // without recording. Need to execute entry function in both cases. + if (res) + entry(args); + + __kmpc_end_record_task(loc_ref, gtid, input_flags, tdg_id); +} + // __kmp_find_tdg: identify a TDG through its ID // tdg_id: ID of the TDG // returns: If a TDG corresponding to this ID is found and not @@ -5245,9 +5260,14 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) { __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate( sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs); - if ((__kmp_global_tdgs[tdg_id]) && - (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE)) - res = __kmp_global_tdgs[tdg_id]; + for (kmp_int32 i = 0; i < __kmp_num_tdg; ++i) { + if ((__kmp_global_tdgs[i]) && (__kmp_global_tdgs[i]->tdg_id == tdg_id) && + (__kmp_global_tdgs[i]->tdg_status != KMP_TDG_NONE)) { + res = __kmp_global_tdgs[i]; + __kmp_curr_tdg_id = tdg_id; + break; + } + } return res; } @@ -5256,7 +5276,8 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) { // gtid: Global Thread ID void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { kmp_int32 tdg_id = tdg->tdg_id; - KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id)); + KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", + __kmp_get_gtid(), tdg_id)); char file_name[20]; sprintf(file_name, "tdg_%d.dot", tdg_id); @@ -5282,7 +5303,8 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { } } fprintf(tdg_file, "}"); - KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id)); + KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", + __kmp_get_gtid(), tdg_id)); } // __kmp_exec_tdg: launch the execution of a previous @@ -5347,7 +5369,7 @@ static inline void __kmp_start_record(kmp_int32 gtid, kmp_int32 tdg_id) { kmp_tdg_info_t *tdg = (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t)); - __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg; + __kmp_global_tdgs[__kmp_num_tdg - 1] = tdg; // Initializing the TDG structure tdg->tdg_id = tdg_id; tdg->map_size = INIT_MAPSIZE; @@ -5372,7 +5394,7 @@ static inline void __kmp_start_record(kmp_int32 gtid, KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0); } - __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map; + tdg->record_map = this_record_map; } // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark @@ -5406,10 +5428,14 @@ kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid, __kmp_exec_tdg(gtid, tdg); res = 0; } else { - __kmp_curr_tdg_idx = tdg_id; - KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs); - __kmp_start_record(gtid, flags, tdg_id); - __kmp_num_tdg++; + if (__kmp_num_tdg < __kmp_max_tdgs) { + __kmp_curr_tdg_id = tdg_id; + __kmp_num_tdg++; + KMP_DEBUG_ASSERT(__kmp_num_tdg <= __kmp_max_tdgs); + __kmp_start_record(gtid, flags, tdg_id); + } + // if no TDG found, need to execute the task + // even not recording res = 1; } KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n", >From d483705db4eafe47a4f70203cce66425f47dff02 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Mon, 15 Sep 2025 05:55:01 -0500 Subject: [PATCH 3/5] [OpenMP] Rename ompx_taskgraph->omp_taskgraph_experimental This patch renames the option to enable taskgraph support in the runtime from OMPX_TASKGRAPH to OMP_TASKGRAPH_EXPERIMENTAL, to reflect the feature's official status in OpenMP 6.0, but also the feature's current work-in-progress nature. --- openmp/runtime/CMakeLists.txt | 8 +-- openmp/runtime/src/kmp.h | 10 ++-- openmp/runtime/src/kmp_config.h.cmake | 4 +- openmp/runtime/src/kmp_global.cpp | 2 +- openmp/runtime/src/kmp_settings.cpp | 4 +- openmp/runtime/src/kmp_taskdeps.cpp | 14 ++--- openmp/runtime/src/kmp_taskdeps.h | 4 +- openmp/runtime/src/kmp_tasking.cpp | 52 +++++++++---------- openmp/runtime/test/CMakeLists.txt | 2 +- openmp/runtime/test/lit.cfg | 4 +- openmp/runtime/test/lit.site.cfg.in | 2 +- .../test/tasking/omp_record_replay.cpp | 2 +- .../test/tasking/omp_record_replay_deps.cpp | 2 +- .../omp_record_replay_deps_multi_succ.cpp | 2 +- .../tasking/omp_record_replay_multiTDGs.cpp | 2 +- .../tasking/omp_record_replay_print_dot.cpp | 2 +- .../tasking/omp_record_replay_taskloop.cpp | 2 +- 17 files changed, 59 insertions(+), 59 deletions(-) diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 93eb14f10a50a..76f9f1d5840c8 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -391,9 +391,9 @@ if(LIBOMP_OMPD_SUPPORT AND ((NOT LIBOMP_OMPT_SUPPORT) OR (NOT "${CMAKE_SYSTEM_NA set(LIBOMP_OMPD_SUPPORT FALSE) endif() -# OMPX Taskgraph support -# Whether to build with OMPX Taskgraph (e.g. task record & replay) -set(LIBOMP_OMPX_TASKGRAPH FALSE CACHE BOOL "OMPX-taskgraph (task record & replay)?") +# OMP Taskgraph support +# Whether to build with OMP Taskgraph (e.g. task record & replay) +set(LIBOMP_TASKGRAPH_EXPERIMENTAL FALSE CACHE BOOL "Experimental OMP taskgraph (task record & replay)") # Error check hwloc support after config-ix has run if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC)) @@ -464,7 +464,7 @@ if(${OPENMP_STANDALONE_BUILD}) libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}") libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}") - libomp_say("Use OMPX-taskgraph -- ${LIBOMP_OMPX_TASKGRAPH}") + libomp_say("Use OMP taskgraph -- ${LIBOMP_TASKGRAPH_EXPERIMENTAL}") endif() add_subdirectory(src) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 4c4e9b44c1b2a..2c89bee9f8c2b 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2627,7 +2627,7 @@ typedef struct { } ed; } kmp_event_t; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // Initial number of allocated nodes while recording #define INIT_MAPSIZE 50 @@ -2687,7 +2687,7 @@ extern kmp_int32 __kmp_num_tdg; typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL unsigned reserved31 : 4; unsigned onced : 1; #else @@ -2746,7 +2746,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ unsigned target : 1; unsigned hidden_helper : 1; /* 1 == hidden helper task */ -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */ unsigned reserved31 : 4; /* reserved for library use */ #else @@ -2801,7 +2801,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation */ #if OMPT_SUPPORT ompt_task_info_t ompt_task_info; #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL bool is_taskgraph = 0; // whether the task is within a TDG kmp_tdg_info_t *tdg; // used to associate task with a TDG kmp_int32 td_tdg_task_id; // local task id in its TDG @@ -4379,7 +4379,7 @@ KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // Taskgraph's Record & Replay mechanism // __kmp_tdg_is_recording: check whether a given TDG is recording // status: the tdg's current status diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 40f1087fd7f27..1f966008c60a5 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -46,8 +46,8 @@ #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT #cmakedefine01 LIBOMP_OMPD_SUPPORT #define OMPD_SUPPORT LIBOMP_OMPD_SUPPORT -#cmakedefine01 LIBOMP_OMPX_TASKGRAPH -#define OMPX_TASKGRAPH LIBOMP_OMPX_TASKGRAPH +#cmakedefine01 LIBOMP_TASKGRAPH_EXPERIMENTAL +#define OMP_TASKGRAPH_EXPERIMENTAL LIBOMP_TASKGRAPH_EXPERIMENTAL #cmakedefine01 LIBOMP_PROFILING_SUPPORT #define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index fdf7569116578..bcc318a180995 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -551,7 +551,7 @@ int __kmp_nesting_mode = 0; int __kmp_nesting_mode_nlevels = 1; int *__kmp_nesting_nth_level; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // TDG record & replay int __kmp_tdg_dot = 0; kmp_int32 __kmp_max_tdgs = 100; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index b9d615f43b570..36d8d0ffa8bb3 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1266,7 +1266,7 @@ static void __kmp_stg_parse_num_threads(char const *name, char const *value, K_DIAG(1, ("__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth)); } // __kmp_stg_parse_num_threads -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL static void __kmp_stg_parse_max_tdgs(char const *name, char const *value, void *data) { __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_max_tdgs); @@ -5742,7 +5742,7 @@ static kmp_setting_t __kmp_stg_table[] = { {"LIBOMP_NUM_HIDDEN_HELPER_THREADS", __kmp_stg_parse_num_hidden_helper_threads, __kmp_stg_print_num_hidden_helper_threads, NULL, 0, 0}, -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL {"KMP_MAX_TDGS", __kmp_stg_parse_max_tdgs, __kmp_std_print_max_tdgs, NULL, 0, 0}, {"KMP_TDG_DOT", __kmp_stg_parse_tdg_dot, __kmp_stg_print_tdg_dot, NULL, 0, diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 743d8ed093c61..b1a0848fc722f 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -222,7 +222,7 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); if (source->dn.task && sink_task) { @@ -311,7 +311,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, // link node as successor of list elements for (kmp_depnode_list_t *p = plist; p; p = p->next) { kmp_depnode_t *dep = p->node; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_status tdg_status = KMP_TDG_NONE; if (task) { kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); @@ -325,7 +325,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, KMP_ACQUIRE_DEPNODE(gtid, dep); if (dep->dn.task) { if (!dep->dn.successors || dep->dn.successors->node != node) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif __kmp_track_dependence(gtid, dep, node, task); @@ -352,7 +352,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, if (!sink) return 0; kmp_int32 npredecessors = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_status tdg_status = KMP_TDG_NONE; kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); if (task) { @@ -367,7 +367,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, KMP_ACQUIRE_DEPNODE(gtid, sink); if (sink->dn.task) { if (!sink->dn.successors || sink->dn.successors->node != source) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif __kmp_track_dependence(gtid, sink, source, task); @@ -376,7 +376,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, "%p\n", gtid, KMP_TASK_TO_TASKDATA(sink->dn.task), KMP_TASK_TO_TASKDATA(task))); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (__kmp_tdg_is_recording(tdg_status)) { kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task); if (tdd->is_taskgraph) { @@ -694,7 +694,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // record TDG with deps if (new_taskdata->is_taskgraph && __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h index f6bfb39218a21..0792baf67f162 100644 --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -96,7 +96,7 @@ extern void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start); static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (task->is_taskgraph && !(__kmp_tdg_is_recording(task->tdg->tdg_status))) { kmp_node_info_t *TaskInfo = &(task->tdg->record_map[task->td_tdg_task_id]); @@ -140,7 +140,7 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { gtid, task)); KMP_ACQUIRE_DEPNODE(gtid, node); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!task->is_taskgraph || (task->is_taskgraph && !__kmp_tdg_is_recording(task->tdg->tdg_status))) #endif diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index a623f9f0be513..69bc7a194e50d 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -37,7 +37,7 @@ static void __kmp_alloc_task_deque(kmp_info_t *thread, static int __kmp_realloc_task_threads_data(kmp_info_t *thread, kmp_task_team_t *task_team); static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id); int __kmp_taskloop_task(int gtid, void *ptask); #endif @@ -70,7 +70,7 @@ static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, } // Check mutexinoutset dependencies, acquire locks kmp_depnode_t *node = tasknew->td_depnode; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { #else if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { @@ -665,7 +665,7 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, task->data2.priority = 0; taskdata->td_flags.freed = 1; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // do not free tasks in taskgraph if (!taskdata->is_taskgraph) { #endif @@ -675,7 +675,7 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, #else /* ! USE_FAST_MEMORY */ __kmp_thread_free(thread, taskdata); #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL } else { taskdata->td_flags.complete = 0; taskdata->td_flags.started = 0; @@ -779,7 +779,7 @@ static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) { flags.detachable == TASK_DETACHABLE || flags.hidden_helper; ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->td_taskgroup && taskdata->is_taskgraph) ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0; #endif @@ -802,7 +802,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_info_t *thread = __kmp_threads[gtid]; kmp_task_team_t *task_team = thread->th.th_task_team; // might be NULL for serial teams... -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla taskloop bool is_taskgraph; #endif @@ -815,7 +815,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL is_taskgraph = taskdata->is_taskgraph; #endif @@ -923,7 +923,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, if (completed) { taskdata->td_flags.complete = 1; // mark the task as completed -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 1; // mark the task as ran once already #endif @@ -942,7 +942,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, #endif KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks); KMP_DEBUG_ASSERT(children >= 0); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->td_taskgroup && !taskdata->is_taskgraph) #else if (taskdata->td_taskgroup) @@ -985,7 +985,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); resumed_task->td_flags.executing = 1; // resume previous task -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (is_taskgraph && __kmp_track_children_task(taskdata) && taskdata->td_taskgroup) { // TDG: we only release taskgroup barrier here because @@ -1113,7 +1113,7 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, task->td_flags.executing = 1; task->td_flags.complete = 0; task->td_flags.freed = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL task->td_flags.onced = 0; #endif @@ -1159,7 +1159,7 @@ void __kmp_finish_implicit_task(kmp_info_t *thread) { if (task->td_dephash) { int children; task->td_flags.complete = 1; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL task->td_flags.onced = 1; #endif children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks); @@ -1390,7 +1390,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, taskdata->td_flags.executing = 0; taskdata->td_flags.complete = 0; taskdata->td_flags.freed = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 0; taskdata->is_taskgraph = 0; taskdata->tdg = nullptr; @@ -1430,7 +1430,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, } } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) && (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) { @@ -1795,7 +1795,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (new_taskdata->is_taskgraph && __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { kmp_tdg_info_t *tdg = new_taskdata->tdg; @@ -2364,7 +2364,7 @@ the reduction either does not use omp_orig object, or the omp_orig is accessible without help of the runtime library. */ void *__kmpc_task_reduction_init(int gtid, int num, void *data) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { kmp_tdg_info_t *this_tdg = __kmp_find_tdg(__kmp_curr_tdg_id); @@ -2391,7 +2391,7 @@ Note: this entry supposes the optional compiler-generated initializer routine has two parameters, pointer to object to be initialized and pointer to omp_orig */ void *__kmpc_taskred_init(int gtid, int num, void *data) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num); @@ -2445,7 +2445,7 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { kmp_int32 num; kmp_int32 tid = thread->th.th_info.ds.ds_tid; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if ((thread->th.th_current_task->is_taskgraph) && (!__kmp_tdg_is_recording( __kmp_find_tdg(__kmp_curr_tdg_id)->tdg_status))) { @@ -4223,7 +4223,7 @@ static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) { KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); taskdata->td_flags.complete = 1; // mark the task as completed -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 1; #endif @@ -4428,7 +4428,7 @@ void __kmp_fulfill_event(kmp_event_t *event) { // indicating whether we need to update task->td_task_id // returns: a pointer to the allocated kmp_task_t structure (task). kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL , int taskloop_recur #endif ) { @@ -4459,7 +4459,7 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src task = KMP_TASKDATA_TO_TASK(taskdata); // Initialize new task (only specific fields not affected by memcpy) -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->is_taskgraph && !taskloop_recur && __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status)) taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); @@ -4692,7 +4692,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, } } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 0); #else next_task = __kmp_task_dup_alloc(thread, task); // allocate new task @@ -4894,7 +4894,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, lb1 = ub0 + st; // create pattern task for 2nd half of the loop -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 1); #else @@ -4932,7 +4932,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, p->codeptr_ra = codeptr_ra; #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task); new_task_data->tdg = taskdata->tdg; new_task_data->is_taskgraph = 0; @@ -4977,7 +4977,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, __kmpc_taskgroup(loc, gtid); } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL KMP_ATOMIC_DEC(&__kmp_tdg_task_id); #endif // ========================================================================= @@ -5228,7 +5228,7 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) { return taskdata->td_task_team != NULL; } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // __kmpc_taskgraph: record or replay taskgraph // loc_ref: Location of TDG, not used yet // gtid: Global Thread ID of the encountering thread diff --git a/openmp/runtime/test/CMakeLists.txt b/openmp/runtime/test/CMakeLists.txt index a7790804542b7..377c6edf1ddb3 100644 --- a/openmp/runtime/test/CMakeLists.txt +++ b/openmp/runtime/test/CMakeLists.txt @@ -30,7 +30,7 @@ update_test_compiler_features() pythonize_bool(LIBOMP_USE_HWLOC) pythonize_bool(LIBOMP_OMPT_SUPPORT) pythonize_bool(LIBOMP_OMPT_OPTIONAL) -pythonize_bool(LIBOMP_OMPX_TASKGRAPH) +pythonize_bool(LIBOMP_TASKGRAPH_EXPERIMENTAL) pythonize_bool(LIBOMP_HAVE_LIBM) pythonize_bool(LIBOMP_HAVE_LIBATOMIC) pythonize_bool(OPENMP_STANDALONE_BUILD) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 4a5aff241765c..7e2dc54cd518a 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -108,8 +108,8 @@ if config.has_ompt: # for callback.h config.test_flags += " -I " + config.test_source_root + "/ompt" -if config.has_ompx_taskgraph: - config.available_features.add("ompx_taskgraph") +if config.has_omp_taskgraph_experimental: + config.available_features.add("omp_taskgraph_experimental") if config.operating_system == 'AIX': config.available_features.add("aix") diff --git a/openmp/runtime/test/lit.site.cfg.in b/openmp/runtime/test/lit.site.cfg.in index fc65289e4ce64..ee3d4fb904901 100644 --- a/openmp/runtime/test/lit.site.cfg.in +++ b/openmp/runtime/test/lit.site.cfg.in @@ -16,7 +16,7 @@ config.target_triple = "@LLVM_TARGET_TRIPLE@" config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@" config.using_hwloc = @LIBOMP_USE_HWLOC@ config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@ -config.has_ompx_taskgraph = @LIBOMP_OMPX_TASKGRAPH@ +config.has_omp_taskgraph_experimental = @LIBOMP_TASKGRAPH_EXPERIMENTAL@ config.has_libm = @LIBOMP_HAVE_LIBM@ config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@ config.is_standalone_build = @OPENMP_STANDALONE_BUILD@ diff --git a/openmp/runtime/test/tasking/omp_record_replay.cpp b/openmp/runtime/test/tasking/omp_record_replay.cpp index 69ad98003a0d6..4fea22e081da9 100644 --- a/openmp/runtime/test/tasking/omp_record_replay.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <iostream> #include <cassert> diff --git a/openmp/runtime/test/tasking/omp_record_replay_deps.cpp b/openmp/runtime/test/tasking/omp_record_replay_deps.cpp index 9b6b370b30efc..4c06ae3f7b273 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_deps.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_deps.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <iostream> #include <cassert> diff --git a/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp b/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp index 906fab335f510..6bcd3dee56030 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <omp.h> #include <cassert> diff --git a/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp b/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp index 03252843689c4..1864d5d89cc70 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <iostream> #include <cassert> diff --git a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp index 2fe55f0815429..7f1f5ccd77d37 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <iostream> #include <fstream> diff --git a/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp b/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp index 3d88faeeb28ee..163a1b4192d85 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include <iostream> #include <cassert> >From 07510cdad3b78b6dbc0cce85ad17a6e6bcfc2769 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Mon, 15 Sep 2025 05:25:55 -0500 Subject: [PATCH 4/5] [OpenMP] Taskgraph frontend support This is a version of the 'ompx taskgraph' support posted in PR66919, adapted to the official OpenMP 6.0 spelling of 'omp taskgraph', and with the 'ompx' extension parts removed. Co-authored-by: Adrian Munera <adrian.mun...@bsc.es> Co-authored-by: Jose M Monsalve Diaz <josem.monsalved...@amd.com> --- clang/include/clang-c/Index.h | 6 +- clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 49 ++++++++++++ clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 4 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 15 ++++ clang/lib/AST/StmtPrinter.cpp | 4 + clang/lib/AST/StmtProfile.cpp | 5 ++ clang/lib/Basic/OpenMPKinds.cpp | 3 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 74 +++++++++++++++++++ clang/lib/CodeGen/CGOpenMPRuntime.h | 8 ++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 2 + clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 6 ++ clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 31 ++++++++ clang/lib/Sema/TreeTransform.h | 11 +++ clang/lib/Serialization/ASTReaderStmt.cpp | 10 +++ clang/lib/Serialization/ASTWriterStmt.cpp | 6 ++ clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/tools/libclang/CIndex.cpp | 2 + clang/tools/libclang/CXCursor.cpp | 3 + .../include/llvm/Frontend/OpenMP/OMPKinds.def | 1 + 25 files changed, 250 insertions(+), 1 deletion(-) diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index be038d9165fc6..4bf1501e27382 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2218,7 +2218,11 @@ enum CXCursorKind { */ CXCursor_OpenACCCacheConstruct = 333, - CXCursor_LastStmt = CXCursor_OpenACCCacheConstruct, + /** OpenMP taskgraph directive. + */ + CXCursor_OMPTaskgraphDirective = 334, + + CXCursor_LastStmt = CXCursor_OMPTaskgraphDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 1d1b7f183f75a..516ddbe62c9d9 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3232,6 +3232,9 @@ DEF_TRAVERSE_STMT(OMPBarrierDirective, DEF_TRAVERSE_STMT(OMPTaskwaitDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPTaskgraphDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPTaskgroupDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index d9f87f1e49b40..3750ccae41fb4 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -2729,6 +2729,55 @@ class OMPTaskwaitDirective : public OMPExecutableDirective { } }; +/// This represents '#pragma omp taskgraph' directive. +/// Available with OpenMP 6.0. +/// +/// \code +/// #pragma omp taskgraph +/// \endcode +/// +class OMPTaskgraphDirective final : public OMPExecutableDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// + OMPTaskgraphDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPExecutableDirective(OMPTaskgraphDirectiveClass, + llvm::omp::OMPD_taskgraph, StartLoc, EndLoc) {} + + /// Build an empty directive. + /// + explicit OMPTaskgraphDirective() + : OMPExecutableDirective(OMPTaskgraphDirectiveClass, + llvm::omp::OMPD_taskgraph, SourceLocation(), + SourceLocation()) {} + +public: + /// Creates directive. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// + static OMPTaskgraphDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt); + + /// Creates an empty directive. + /// + /// \param C AST context. + /// + static OMPTaskgraphDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPTaskgraphDirectiveClass; + } +}; + /// This represents '#pragma omp taskgroup' directive. /// /// \code diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index dd1a24405fae7..7fa54727de14c 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -253,6 +253,7 @@ def OMPTaskDirective : StmtNode<OMPExecutableDirective>; def OMPTaskyieldDirective : StmtNode<OMPExecutableDirective>; def OMPBarrierDirective : StmtNode<OMPExecutableDirective>; def OMPTaskwaitDirective : StmtNode<OMPExecutableDirective>; +def OMPTaskgraphDirective : StmtNode<OMPExecutableDirective>; def OMPTaskgroupDirective : StmtNode<OMPExecutableDirective>; def OMPFlushDirective : StmtNode<OMPExecutableDirective>; def OMPDepobjDirective : StmtNode<OMPExecutableDirective>; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index c0fd7a6d63611..2aafaac3ac3f8 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -550,6 +550,10 @@ class SemaOpenMP : public SemaBase { /// Called on well-formed '\#pragma omp barrier'. StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp taskgraph'. + StmtResult ActOnOpenMPTaskgraphDirective(ArrayRef<OMPClause *> Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp taskwait'. StmtResult ActOnOpenMPTaskwaitDirective(ArrayRef<OMPClause *> Clauses, SourceLocation StartLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 441047d64f48c..09432273834d1 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1968,6 +1968,7 @@ enum StmtCode { STMT_OMP_ERROR_DIRECTIVE, STMT_OMP_BARRIER_DIRECTIVE, STMT_OMP_TASKWAIT_DIRECTIVE, + STMT_OMP_TASKGRAPH_DIRECTIVE, STMT_OMP_FLUSH_DIRECTIVE, STMT_OMP_DEPOBJ_DIRECTIVE, STMT_OMP_SCAN_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 1f6586f95a9f8..4fafedfb8602c 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -882,6 +882,21 @@ OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C, return createEmptyDirective<OMPTaskwaitDirective>(C, NumClauses); } +OMPTaskgraphDirective *OMPTaskgraphDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) { + auto *Dir = createDirective<OMPTaskgraphDirective>( + C, Clauses, AssociatedStmt, /*NumChildren=*/1, StartLoc, EndLoc); + return Dir; +} + +OMPTaskgraphDirective *OMPTaskgraphDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + EmptyShell) { + return createEmptyDirective<OMPTaskgraphDirective>( + C, NumClauses, /*HasAssociatedStmt=*/true, /*NumChildren=*/1); +} + OMPTaskgroupDirective *OMPTaskgroupDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *ReductionRef) { diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 0030300521128..a9e42d50e3ea8 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -891,6 +891,10 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) { void StmtPrinter::VisitOMPAssumeDirective(OMPAssumeDirective *Node) { Indent() << "#pragma omp assume"; +} + +void StmtPrinter::VisitOMPTaskgraphDirective(OMPTaskgraphDirective *Node) { + Indent() << "#pragma omp taskgraph"; PrintOMPExecutableDirective(Node); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 37c4d43ec0b2f..f08a2da81325f 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1103,9 +1103,14 @@ void StmtProfiler::VisitOMPAssumeDirective(const OMPAssumeDirective *S) { VisitOMPExecutableDirective(S); } +void StmtProfiler::VisitOMPTaskgraphDirective(const OMPTaskgraphDirective *S) { + VisitOMPExecutableDirective(S); +} + void StmtProfiler::VisitOMPErrorDirective(const OMPErrorDirective *S) { VisitOMPExecutableDirective(S); } + void StmtProfiler::VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *S) { VisitOMPExecutableDirective(S); if (const Expr *E = S->getReductionRef()) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 508685883364c..8a0466b354f4d 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -869,6 +869,9 @@ void clang::getOpenMPCaptureRegions( case OMPD_taskloop: CaptureRegions.push_back(OMPD_taskloop); break; + case OMPD_taskgraph: + CaptureRegions.push_back(OMPD_taskgraph); + break; case OMPD_loop: // TODO: 'loop' may require different capture regions depending on the // bind clause or the parent directive when there is no bind clause. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a503aaf613e30..91f9254c662d9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -58,6 +58,8 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { ParallelOutlinedRegion, /// Region with outlined function for standalone 'task' directive. TaskOutlinedRegion, + /// Region with outlined function for standalone 'taskgraph' directive. + TaskgraphOutlinedRegion, /// Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, @@ -232,6 +234,26 @@ class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { const UntiedTaskActionTy &Action; }; +/// API for captured statement code generation in OpenMP taskgraphs. +class CGOpenMPTaskgraphRegionInfo final : public CGOpenMPRegionInfo { +public: + CGOpenMPTaskgraphRegionInfo(const CapturedStmt &CS, + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(CS, TaskgraphOutlinedRegion, CodeGen, + llvm::omp::OMPD_taskgraph, false) {} + + const VarDecl *getThreadIDVariable() const override { return 0; } + + /// Get the name of the capture helper. + StringRef getHelperName() const override { return "taskgraph.omp_outlined."; } + + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == + TaskgraphOutlinedRegion; + } +}; + /// API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { @@ -5954,6 +5976,48 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, Region->emitUntiedSwitch(CGF); } +void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF, + SourceLocation Loc, + const OMPExecutableDirective &D) { + if (!CGF.HaveInsertPoint()) + return; + + // Building kmp_taskgraph_flags_t flags for kmpc_taskgraph. C.f., kmp.h + enum { + NowaitFlag = 0x1, // Not used yet. + ReRecordFlag = 0x2, + }; + + unsigned Flags = 0; + + CodeGenFunction OutlinedCGF(CGM, true); + + const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + + auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(CS->getCapturedStmt()); + }; + + LValue CapStruct = CGF.InitCapturedStruct(*CS); + CGOpenMPTaskgraphRegionInfo TaskgraphRegion(*CS, BodyGen); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(OutlinedCGF, + &TaskgraphRegion); + llvm::Function *FnT = OutlinedCGF.GenerateCapturedStmtFunction(*CS); + + std::vector<llvm::Value *> Args{ + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), + CGF.Builder.getInt32(Flags), + CGF.Builder.getInt32(D.getBeginLoc().getHashValue()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(FnT, CGM.VoidPtrTy), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy)}; + + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskgraph), + Args); +} + void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnerKind, const RegionCodeGenTy &CodeGen, @@ -6389,6 +6453,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: @@ -9402,6 +9467,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: @@ -10048,6 +10114,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: @@ -10615,6 +10682,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: @@ -12355,6 +12423,12 @@ void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitTaskgraphCall(CodeGenFunction &CGF, + SourceLocation Loc, + const OMPExecutableDirective &D) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitCancellationPointCall( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index eb04eceee236c..7e8dc651a5b9a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1360,6 +1360,10 @@ class CGOpenMPRuntime { virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data); + /// Emit code for 'taskgraph' directive. + virtual void emitTaskgraphCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D); + /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be /// emitted. @@ -2186,6 +2190,10 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override; + /// Emit code for 'taskgraph' directive. + void emitTaskgraphCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D) override; + /// Emit code for 'cancellation point' construct. /// \param CancelRegion Region kind for which the cancellation point must be /// emitted. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 44a091e1b3c75..03eef1bf44a6d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -572,6 +572,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: @@ -660,6 +661,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_taskyield: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index aeff73d525c10..d18832e74ca35 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -282,6 +282,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPTaskwaitDirectiveClass: EmitOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S)); break; + case Stmt::OMPTaskgraphDirectiveClass: + EmitOMPTaskgraphDirective(cast<OMPTaskgraphDirective>(*S)); + break; case Stmt::OMPTaskgroupDirectiveClass: EmitOMPTaskgroupDirective(cast<OMPTaskgroupDirective>(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d72cd8fbfd608..bc14dac537337 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1418,6 +1418,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( case OMPD_error: case OMPD_barrier: case OMPD_taskwait: + case OMPD_taskgraph: case OMPD_taskgroup: case OMPD_flush: case OMPD_depobj: @@ -5586,6 +5587,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); } +void CodeGenFunction::EmitOMPTaskgraphDirective( + const OMPTaskgraphDirective &S) { + CGM.getOpenMPRuntime().emitTaskgraphCall(*this, S.getBeginLoc(), S); +} + static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { return T.clauses().empty(); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 727487b46054f..863d098c8b373 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3879,6 +3879,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPErrorDirective(const OMPErrorDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); + void EmitOMPTaskgraphDirective(const OMPTaskgraphDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); void EmitOMPDepobjDirective(const OMPDepobjDirective &S); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 552c92996dc2e..a38e08b6e67e4 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1511,6 +1511,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPScopeDirectiveClass: case Stmt::OMPTaskDirectiveClass: case Stmt::OMPTaskgroupDirectiveClass: + case Stmt::OMPTaskgraphDirectiveClass: case Stmt::OMPTaskLoopDirectiveClass: case Stmt::OMPTaskLoopSimdDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 48e06d1dc7579..1e94aa9c2f806 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4467,6 +4467,14 @@ getUnknownRegionParams(Sema &SemaRef) { return Params; } +static SmallVector<SemaOpenMP::CapturedParamNameType> +getTaskgraphRegionParams(Sema &SemaRef) { + SmallVector<SemaOpenMP::CapturedParamNameType> Params{ + std::make_pair(StringRef(), QualType()) // __context with shared vars + }; + return Params; +} + static SmallVector<SemaOpenMP::CapturedParamNameType> getTaskloopRegionParams(Sema &SemaRef) { ASTContext &Context = SemaRef.getASTContext(); @@ -4540,6 +4548,10 @@ static void processCapturedRegions(Sema &SemaRef, OpenMPDirectiveKind DKind, // function directly. MarkAsInlined(SemaRef.getCurCapturedRegion()); break; + case OMPD_taskgraph: + SemaRef.ActOnCapturedRegionStart( + Loc, CurScope, CR_OpenMP, getTaskgraphRegionParams(SemaRef), Level); + break; case OMPD_target: SemaRef.ActOnCapturedRegionStart(Loc, CurScope, CR_OpenMP, getTargetRegionParams(SemaRef), Level); @@ -6496,6 +6508,12 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( "No associated statement allowed for 'omp taskwait' directive"); Res = ActOnOpenMPTaskwaitDirective(ClausesWithImplicit, StartLoc, EndLoc); break; + case OMPD_taskgraph: + assert(AStmt != nullptr && + "Associated statement required for 'omp taskgraph' directive"); + Res = ActOnOpenMPTaskgraphDirective(ClausesWithImplicit, AStmt, StartLoc, + EndLoc); + break; case OMPD_taskgroup: Res = ActOnOpenMPTaskgroupDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); @@ -11330,6 +11348,19 @@ SemaOpenMP::ActOnOpenMPTaskwaitDirective(ArrayRef<OMPClause *> Clauses, Clauses); } +StmtResult +SemaOpenMP::ActOnOpenMPTaskgraphDirective(ArrayRef<OMPClause *> Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc) { + if (!AStmt) + return StmtError(); + + assert(isa<CapturedStmt>(AStmt) && "Captured statement expected"); + + return OMPTaskgraphDirective::Create(getASTContext(), StartLoc, EndLoc, + Clauses, AStmt); +} + StmtResult SemaOpenMP::ActOnOpenMPTaskgroupDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 242ffb09af006..c47f24b2fa034 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9804,6 +9804,17 @@ TreeTransform<Derived>::TransformOMPAssumeDirective(OMPAssumeDirective *D) { return Res; } +template <typename Derived> +StmtResult TreeTransform<Derived>::TransformOMPTaskgraphDirective( + OMPTaskgraphDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + OMPD_taskgraph, DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template <typename Derived> StmtResult TreeTransform<Derived>::TransformOMPErrorDirective(OMPErrorDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 213c2c2148f64..aee318bce166e 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2572,6 +2572,11 @@ void ASTStmtReader::VisitOMPAssumeDirective(OMPAssumeDirective *D) { VisitOMPExecutableDirective(D); } +void ASTStmtReader::VisitOMPTaskgraphDirective(OMPTaskgraphDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); +} + void ASTStmtReader::VisitOMPErrorDirective(OMPErrorDirective *D) { VisitStmt(D); // The NumClauses field was read in ReadStmtFromStream. @@ -3715,6 +3720,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { Context, Record[ASTStmtReader::NumStmtFields], Empty); break; + case STMT_OMP_TASKGRAPH_DIRECTIVE: + S = OMPTaskgraphDirective::CreateEmpty( + Context, Record[ASTStmtReader::NumStmtFields], Empty); + break; + case STMT_OMP_ERROR_DIRECTIVE: S = OMPErrorDirective::CreateEmpty( Context, Record[ASTStmtReader::NumStmtFields], Empty); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 21c04ddbc2c7a..b3fd2084a76aa 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2653,6 +2653,12 @@ void ASTStmtWriter::VisitOMPAssumeDirective(OMPAssumeDirective *D) { Code = serialization::STMT_OMP_ASSUME_DIRECTIVE; } +void ASTStmtWriter::VisitOMPTaskgraphDirective(OMPTaskgraphDirective *D) { + VisitStmt(D); + VisitOMPExecutableDirective(D); + Code = serialization::STMT_OMP_TASKGRAPH_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPErrorDirective(OMPErrorDirective *D) { VisitStmt(D); Record.push_back(D->getNumClauses()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 785cdfa15bf04..5e01d45fbef84 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1767,6 +1767,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTaskyieldDirectiveClass: case Stmt::OMPBarrierDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: + case Stmt::OMPTaskgraphDirectiveClass: case Stmt::OMPErrorDirectiveClass: case Stmt::OMPTaskgroupDirectiveClass: case Stmt::OMPFlushDirectiveClass: diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 5aab74348967d..bae1cc7b69b72 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -6311,6 +6311,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPTaskwaitDirective"); case CXCursor_OMPAssumeDirective: return cxstring::createRef("OMPAssumeDirective"); + case CXCursor_OMPTaskgraphDirective: + return cxstring::createRef("OMPTaskgraphDirective"); case CXCursor_OMPErrorDirective: return cxstring::createRef("OMPErrorDirective"); case CXCursor_OMPTaskgroupDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 3c4062410eac1..2ee37874344fb 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -738,6 +738,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPTaskwaitDirectiveClass: K = CXCursor_OMPTaskwaitDirective; break; + case Stmt::OMPTaskgraphDirectiveClass: + K = CXCursor_OMPTaskgraphDirective; + break; case Stmt::OMPErrorDirectiveClass: K = CXCursor_OMPErrorDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 01ca8da759ef7..89e37735b7d37 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -354,6 +354,7 @@ __OMP_RTL(__kmpc_omp_task_alloc, false, /* kmp_task_t */ VoidPtr, IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr) __OMP_RTL(__kmpc_omp_task, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr) +__OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, Int32, Int32, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_end_taskgroup, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_taskgroup, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_task_begin_if0, false, Void, IdentPtr, Int32, >From ffe5866df672f937c24264641cba8c360248075c Mon Sep 17 00:00:00 2001 From: Julian Brown <julian.br...@amd.com> Date: Mon, 15 Sep 2025 05:54:11 -0500 Subject: [PATCH 5/5] [OpenMP] New/derived taskgraph tests This patch adds new tests for 'omp taskgraph' functionality, but (unlike the patches posted in PR66919) leave the existing tests using the internal runtime API for record and replay as-is. I have changed the 'print_dot' tests to use FileCheck instead of their own internal checking, though. Co-authored-by: Adrian Munera <adrian.mun...@bsc.es> --- .../tasking/omp_record_replay_print_dot.cpp | 51 ++++++-------- openmp/runtime/test/tasking/omp_taskgraph.cpp | 35 ++++++++++ .../test/tasking/omp_taskgraph_deps.cpp | 52 +++++++++++++++ .../test/tasking/omp_taskgraph_multiTDGs.cpp | 66 +++++++++++++++++++ .../test/tasking/omp_taskgraph_print_dot.cpp | 58 ++++++++++++++++ .../test/tasking/omp_taskgraph_taskloop.cpp | 39 +++++++++++ 6 files changed, 270 insertions(+), 31 deletions(-) create mode 100644 openmp/runtime/test/tasking/omp_taskgraph.cpp create mode 100644 openmp/runtime/test/tasking/omp_taskgraph_deps.cpp create mode 100644 openmp/runtime/test/tasking/omp_taskgraph_multiTDGs.cpp create mode 100644 openmp/runtime/test/tasking/omp_taskgraph_print_dot.cpp create mode 100644 openmp/runtime/test/tasking/omp_taskgraph_taskloop.cpp diff --git a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp index 7f1f5ccd77d37..fcf9868db3f51 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp @@ -1,8 +1,9 @@ // REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run -#include <iostream> -#include <fstream> -#include <sstream> +// RUN: cat tdg_0.dot | FileCheck %s +// RUN: rm -f tdg_0.dot + +#include <cstdlib> #include <cassert> // Compiler-generated code (emulation) @@ -23,28 +24,13 @@ void func(int *num_exec) { (*num_exec)++; } -std::string tdg_string= "digraph TDG {\n" -" compound=true\n" -" subgraph cluster {\n" -" label=TDG_0\n" -" 0[style=bold]\n" -" 1[style=bold]\n" -" 2[style=bold]\n" -" 3[style=bold]\n" -" }\n" -" 0 -> 1 \n" -" 1 -> 2 \n" -" 1 -> 3 \n" -"}"; - int main() { int num_exec = 0; int x, y; - setenv("KMP_TDG_DOT","TRUE",1); - remove("tdg_0.dot"); + setenv("KMP_TDG_DOT", "TRUE", 1); - #pragma omp parallel +#pragma omp parallel #pragma omp single { int gtid = __kmpc_global_thread_num(nullptr); @@ -65,16 +51,19 @@ int main() { assert(num_exec == 4); - std::ifstream tdg_file("tdg_0.dot"); - assert(tdg_file.is_open()); - - std::stringstream tdg_file_stream; - tdg_file_stream << tdg_file.rdbuf(); - int equal = tdg_string.compare(tdg_file_stream.str()); - - assert(equal == 0); - - std::cout << "Passed" << std::endl; return 0; } -// CHECK: Passed + +// CHECK: digraph TDG { +// CHECK-NEXT: compound=true +// CHECK-NEXT: subgraph cluster { +// CHECK-NEXT: label=TDG_0 +// CHECK-NEXT: 0[style=bold] +// CHECK-NEXT: 1[style=bold] +// CHECK-NEXT: 2[style=bold] +// CHECK-NEXT: 3[style=bold] +// CHECK-NEXT: } +// CHECK-NEXT: 0 -> 1 +// CHECK-NEXT: 1 -> 2 +// CHECK-NEXT: 1 -> 3 +// CHECK-NEXT: } diff --git a/openmp/runtime/test/tasking/omp_taskgraph.cpp b/openmp/runtime/test/tasking/omp_taskgraph.cpp new file mode 100644 index 0000000000000..363a7da8c145a --- /dev/null +++ b/openmp/runtime/test/tasking/omp_taskgraph.cpp @@ -0,0 +1,35 @@ +// REQUIRES: omp_taskgraph_experimental +// RUN: %libomp-cxx-compile-and-run +#include <iostream> +#include <cassert> +#define NT 100 + +// Compiler-generated code (emulation) +typedef struct ident { + void *dummy; +} ident_t; + +void func(int *num_exec) { (*num_exec)++; } + +int main() { + int num_exec = 0; + int num_tasks = 0; + int x = 0; +#pragma omp parallel +#pragma omp single + for (int iter = 0; iter < NT; ++iter) { +#pragma omp taskgraph + { + num_tasks++; +#pragma omp task + func(&num_exec); + } + } + + assert(num_tasks == 1); + assert(num_exec == NT); + + std::cout << "Passed" << std::endl; + return 0; +} +// CHECK: Passed diff --git a/openmp/runtime/test/tasking/omp_taskgraph_deps.cpp b/openmp/runtime/test/tasking/omp_taskgraph_deps.cpp new file mode 100644 index 0000000000000..3341b019a5095 --- /dev/null +++ b/openmp/runtime/test/tasking/omp_taskgraph_deps.cpp @@ -0,0 +1,52 @@ +// REQUIRES: omp_taskgraph_experimental +// RUN: %libomp-cxx-compile-and-run +#include <iostream> +#include <cassert> +#define NT 100 +#define MULTIPLIER 100 +#define DECREMENT 5 + +int val; +// Compiler-generated code (emulation) +typedef struct ident { + void *dummy; +} ident_t; + +void sub() { +#pragma omp atomic + val -= DECREMENT; +} + +void add() { +#pragma omp atomic + val += DECREMENT; +} + +void mult() { + // no atomicity needed, can only be executed by 1 thread + // and no concurrency with other tasks possible + val *= MULTIPLIER; +} + +int main() { + val = 0; + int *x, *y; +#pragma omp parallel +#pragma omp single + for (int iter = 0; iter < NT; ++iter) { +#pragma omp taskgraph + { +#pragma omp task depend(out : y) + add(); +#pragma omp task depend(out : x) + sub(); +#pragma omp task depend(in : x, y) + mult(); + } + } + assert(val == 0); + + std::cout << "Passed" << std::endl; + return 0; +} +// CHECK: Passed diff --git a/openmp/runtime/test/tasking/omp_taskgraph_multiTDGs.cpp b/openmp/runtime/test/tasking/omp_taskgraph_multiTDGs.cpp new file mode 100644 index 0000000000000..98a4ee27d0d5b --- /dev/null +++ b/openmp/runtime/test/tasking/omp_taskgraph_multiTDGs.cpp @@ -0,0 +1,66 @@ +// REQUIRES: omp_taskgraph_experimental +// RUN: %libomp-cxx-compile-and-run +#include <iostream> +#include <cassert> +#define NT 20 +#define MULTIPLIER 100 +#define DECREMENT 5 + +// Compiler-generated code (emulation) +typedef struct ident { + void *dummy; +} ident_t; + +int val; + +void sub() { +#pragma omp atomic + val -= DECREMENT; +} + +void add() { +#pragma omp atomic + val += DECREMENT; +} + +void mult() { + // no atomicity needed, can only be executed by 1 thread + // and no concurrency with other tasks possible + val *= MULTIPLIER; +} + +int main() { + int num_tasks = 0; + int *x, *y; +#pragma omp parallel +#pragma omp single + for (int iter = 0; iter < NT; ++iter) { +#pragma omp taskgraph + { + num_tasks++; +#pragma omp task depend(out : y) + add(); +#pragma omp task depend(out : x) + sub(); +#pragma omp task depend(in : x, y) + mult(); + } +#pragma omp taskgraph + { + num_tasks++; +#pragma omp task depend(out : y) + add(); +#pragma omp task depend(out : x) + sub(); +#pragma omp task depend(in : x, y) + mult(); + } + } + + assert(num_tasks == 2); + assert(val == 0); + + std::cout << "Passed" << std::endl; + return 0; +} +// CHECK: Passed diff --git a/openmp/runtime/test/tasking/omp_taskgraph_print_dot.cpp b/openmp/runtime/test/tasking/omp_taskgraph_print_dot.cpp new file mode 100644 index 0000000000000..11bbcc94427d5 --- /dev/null +++ b/openmp/runtime/test/tasking/omp_taskgraph_print_dot.cpp @@ -0,0 +1,58 @@ +// REQUIRES: omp_taskgraph_experimental +// RUN: %libomp-cxx-compile-and-run +// RUN: cat tdg_17723.dot | FileCheck %s +// RUN: rm -f tdg_17723.dot + +#include <cstdlib> +#include <cassert> + +// Compiler-generated code (emulation) +typedef struct ident { + void *dummy; +} ident_t; + +void func(int *num_exec) { +#pragma omp atomic + (*num_exec)++; +} + +int main() { + int num_exec = 0; + int x, y; + + setenv("KMP_TDG_DOT", "TRUE", 1); + +#pragma omp parallel +#pragma omp single + { +#pragma omp taskgraph + { +#pragma omp task depend(out : x) + func(&num_exec); +#pragma omp task depend(in : x) depend(out : y) + func(&num_exec); +#pragma omp task depend(in : y) + func(&num_exec); +#pragma omp task depend(in : y) + func(&num_exec); + } + } + + assert(num_exec == 4); + + return 0; +} + +// CHECK: digraph TDG { +// CHECK-NEXT: compound=true +// CHECK-NEXT: subgraph cluster { +// CHECK-NEXT: label=TDG_17723 +// CHECK-NEXT: 0[style=bold] +// CHECK-NEXT: 1[style=bold] +// CHECK-NEXT: 2[style=bold] +// CHECK-NEXT: 3[style=bold] +// CHECK-NEXT: } +// CHECK-NEXT: 0 -> 1 +// CHECK-NEXT: 1 -> 2 +// CHECK-NEXT: 1 -> 3 +// CHECK-NEXT: } diff --git a/openmp/runtime/test/tasking/omp_taskgraph_taskloop.cpp b/openmp/runtime/test/tasking/omp_taskgraph_taskloop.cpp new file mode 100644 index 0000000000000..bbea64a2e92af --- /dev/null +++ b/openmp/runtime/test/tasking/omp_taskgraph_taskloop.cpp @@ -0,0 +1,39 @@ +// REQUIRES: omp_taskgraph_experimental +// RUN: %libomp-cxx-compile-and-run +#include <iostream> +#include <cassert> + +#define NT 20 +#define N 128 * 128 + +typedef struct ident { + void *dummy; +} ident_t; + +int main() { + int num_tasks = 0; + + int array[N]; + for (int i = 0; i < N; ++i) + array[i] = 1; + + long sum = 0; +#pragma omp parallel +#pragma omp single + for (int iter = 0; iter < NT; ++iter) { +#pragma omp taskgraph + { + num_tasks++; +#pragma omp taskloop reduction(+ : sum) num_tasks(4096) + for (int i = 0; i < N; ++i) { + sum += array[i]; + } + } + } + assert(sum == N * NT); + assert(num_tasks == 1); + + std::cout << "Passed" << std::endl; + return 0; +} +// CHECK: Passed _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits