Hi, all, This is the fixes for several OpenMP bugs. Could gatekeeper help to review it? I attached the test case for the fix.
I have fixed the following bugs: 1. bug of driver (see test_has_openmp.c) The case works with -mp but fails with -fopenmp. Since the command line options should be compatible with gcc, -fopenmp should also work. I fixed this bug by change osprey/driver/OPTION. 2. bug of atomic directive (see test_atomic.c) Atomic directive did not work correctly sometimes. Without this fix, atomic directive was lowered to INTRN_COMPARE_AND_SWAP_XX. The lowered code checks whether its result is non-zero, to decide whether to try again. So INTRN_COMPARE_AND_SWAP_XX should be replaced by INTRN_BOOL_COMPARE_AND_SWAP in the lowered code. The change for omp_lower.cxx is for this bug. 3. bug of variable declared in nested block (see test_local_var.c) For code like this: int s; #pragma parallel { int t; .... } In the parallel region, the variable s should be shared by default, and the variable t should be private. However, in WHIRL, all the local variables are put in the symbol table of current function, so the declaring block for local variables such as s and t cannot be distinguished. Therefore, in the backend, both s and t are treated as shared variable, which is not correct. I fixed this bug by adding some code in wgen. By this fix, wgen will insert an explicit private pragma for local variables such as t. 4. bug of nested OpenMP directives (see test_nested_critical.c) Some OpenMP directives, like critical, master, can be nested with each other. Most OpenMP directives are lowered by lower_mp() (in osprey/be/com/wn_mp.cxx). lower_mp() is called by lower_block() (in osprey/be/com/wn_lower.cxx). lower_mp() returns the first statement of the replacing block, so lower_block() will call lower_mp() again for the nested directives. I believe that is the right way to deal with the nested OpenMP directives ( another way may be run lower_mp() recursively, however, since lower_mp() uses many global variables, I think it was not designed to be called recursively). However, there is a class Verify_MP_Lowered in wn_mp.cxx to check whether all the generated code in lower_mp() has not OpenMP directives. Since for nested OpenMP directives, the generated code in the first invocation of lower_mp() would certainly contain OpenMP directives (which will be lowered in the next invocation of lower_mp()), I think we should remove that verifier here. Most changes of wn_mp.cxx is for that. After applying the fixes, all the cases work. Thanks. Regards, Jiangzhou -- Jiangzhou HE (何江舟) Institute of High-Performance Computing Department of Computer Science and Technology Tsinghua University, Beijing, China
diff --git a/osprey/be/be/omp_lower.cxx b/osprey/be/be/omp_lower.cxx index 12e74e9..4e8f13a 100644 --- a/osprey/be/be/omp_lower.cxx +++ b/osprey/be/be/omp_lower.cxx @@ -2272,10 +2272,10 @@ Atomic_Using_Swap(WN *atomic, WN *store, WN *operation, WN *parent, WN *c_s; if (swap_type == MTYPE_I4) { c_s=WN_Create_Intrinsic(OPC_U4INTRINSIC_CALL, - INTRN_COMPARE_AND_SWAP_I4,3,kids); + INTRN_BOOL_COMPARE_AND_SWAP_I4,3,kids); } else { c_s=WN_Create_Intrinsic(OPC_U8INTRINSIC_CALL, - INTRN_COMPARE_AND_SWAP_I8,3,kids); + INTRN_BOOL_COMPARE_AND_SWAP_I8,3,kids); } WN_Set_Call_Parm_Mod(c_s); WN_Set_Call_Parm_Ref(c_s); @@ -2444,8 +2444,8 @@ Get_ATOMIC_Update_LDA(WN *wn) case INTRN_FETCH_AND_OR_I8: case INTRN_FETCH_AND_XOR_I8: // from Atomic_Using_Swap() - case INTRN_COMPARE_AND_SWAP_I4: - case INTRN_COMPARE_AND_SWAP_I8: + case INTRN_BOOL_COMPARE_AND_SWAP_I4: + case INTRN_BOOL_COMPARE_AND_SWAP_I8: break; default: return NULL; diff --git a/osprey/be/com/wn_mp.cxx b/osprey/be/com/wn_mp.cxx index 6a86fc4..feb93df 100644 --- a/osprey/be/com/wn_mp.cxx +++ b/osprey/be/com/wn_mp.cxx @@ -822,109 +822,6 @@ static ST_IDX mpr_sts [MPRUNTIME_LAST + 1] = { #define MPSP_STATUS_PREG_NAME "mpsp_status" #define IS_MASTER_PREG_NAME "mp_is_master" - -/* -After returning from lower_mp(), all MP constructs should have been lowered -in both the Whirl tree passed to lower_mp() and the nested PU (if one was -created). If all MP constructs have been lowered in a Whirl tree, the tree -will contain no MP pragmas, MP IF's, or non-POD finalization IF's. This -class verifies this post- condition just before returning from lower_mp(). - -This class could easily be extended to perform additional verification on -the lowered Whirl. -*/ - -class Verify_MP_Lowered { - BOOL replace_block_set; - WN *replace_block_start; - // i.e. WN_next() of last node in replace_block - WN *replace_block_sibling; - BOOL nested_pu_set; - WN *nested_pu; - - static void Verify_No_MP(WN *tree); - -public: - Verify_MP_Lowered() : replace_block_set(FALSE), nested_pu_set(FALSE) { } - - void Set_replace_block(WN *replace, WN *sibling) { - Is_True(!replace_block_set, ("Set_replace_block() called already")); - replace_block_set = TRUE; - replace_block_start = replace; - replace_block_sibling = sibling; - } - - void Set_nested_pu_tree(WN *pu) { - Is_True(!nested_pu_set, ("Set_nested_pu_tree() called already")); - nested_pu_set = TRUE; - nested_pu = pu; - } - - void Set_replace_block_and_nested_pu(WN *replace, WN *sibling, WN *pu) { - Is_True(!replace_block_set && !nested_pu_set, - ("replace_block_start and/or nested_pu set already")); - replace_block_set = TRUE; - replace_block_start = replace; - replace_block_sibling = sibling; - nested_pu_set = TRUE; - nested_pu = pu; - } - - ~Verify_MP_Lowered(); -}; - -/* -Destructor verifies that both the replace_block and nested PU have been set -(either may be NULL), then verifies that neither contains any MP pragmas, -MP IF's, or non-POD finalization IF's. If compiled without debugging -support, it does nothing. -*/ - -Verify_MP_Lowered::~Verify_MP_Lowered() -{ - Is_True(replace_block_set, ("replace_block_start not set")); - Is_True(nested_pu_set, ("nested_pu not set")); - -#ifdef Is_True_On - for (WN *wn = replace_block_start; wn && wn != replace_block_sibling; - wn = WN_next(wn)) - Verify_No_MP(wn); - - if (nested_pu) - Verify_No_MP(nested_pu); -#endif -} - - // Verify that tree contains no MP pragmas or IF's -void Verify_MP_Lowered::Verify_No_MP(WN *tree) -{ - WN_ITER *wni = WN_WALK_TreeIter(tree); - - for ( ; wni; wni = WN_WALK_TreeNext(wni)) { - WN *wn = WN_ITER_wn(wni); - OPERATOR opr = WN_operator(wn); - - if ((opr == OPR_PRAGMA || opr == OPR_XPRAGMA) && - WN_pragmas[WN_pragma(wn)].users & PUSER_MP) - Fail_FmtAssertion("Verify_MP_Lowered: unlowered MP pragma %d, " - "node %#lx, tree %#lx", WN_pragma(wn), (unsigned long) wn, - (unsigned long) tree); - - if (opr == OPR_IF && WN_Is_If_MpVersion(wn)) - Fail_FmtAssertion("Verify_MP_Lowered: unlowered MP IF, node %#lx, " - "tree %#lx", (unsigned long) wn, (unsigned long) tree); - - BOOL first_and_last; - if (Is_Nonpod_Finalization_IF(wn, &first_and_last)) - Fail_FmtAssertion("Verify_MP_Lowered: unlowered non-POD finalization " - "IF, node %#lx, tree %#lx", - (unsigned long) wn, (unsigned long) tree); - } -} - -Verify_MP_Lowered *verify_mp_lowered_ptr; // set upon entry to lower_mp() - - /* Forward function declarations. */ static WN * Gen_MP_Load_Store ( ST * from_st, WN_OFFSET from_offset, @@ -2823,7 +2720,6 @@ is inheriting pu_recursive OK? PU_Info *parallel_pu = TYPE_MEM_POOL_ALLOC ( PU_Info, Malloc_Mem_Pool ); PU_Info_init ( parallel_pu ); Set_PU_Info_tree_ptr (parallel_pu, func_entry ); - verify_mp_lowered_ptr->Set_nested_pu_tree(func_entry); PU_Info_proc_sym(parallel_pu) = ST_st_idx(parallel_proc); PU_Info_maptab(parallel_pu) = cmaptab = WN_MAP_TAB_Create(MEM_pu_pool_ptr); @@ -4216,7 +4112,6 @@ Create_Copyfunc(ST *struct_st) PU_Info *copy_pu = TYPE_MEM_POOL_ALLOC ( PU_Info, Malloc_Mem_Pool ); PU_Info_init ( copy_pu ); Set_PU_Info_tree_ptr (copy_pu, func_entry ); - //verify_mp_lowered_ptr->Set_nested_pu_tree(func_entry); PU_Info_proc_sym(copy_pu) = ST_st_idx(parallel_proc); PU_Info_maptab(copy_pu) = cp_cmaptab = WN_MAP_TAB_Create(MEM_pu_pool_ptr); @@ -11909,9 +11804,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) Is_True(PU_Info_proc_sym(Current_PU_Info) == last_pu_proc_sym, ("LowerMP_PU_Init() not called for this PU")); - Verify_MP_Lowered verify_mp_lowered; - verify_mp_lowered_ptr = &verify_mp_lowered; - /* Special case handling of PU-scope pragmas. */ if (block == NULL) { @@ -11927,7 +11819,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) } - verify_mp_lowered.Set_replace_block_and_nested_pu(NULL, NULL, NULL); return (NULL); } @@ -12047,18 +11938,14 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN *return_wn = WN_first( store_gtid ); WN_DELETE_Tree(node); WN_Delete( store_gtid ); - verify_mp_lowered.Set_replace_block_and_nested_pu( return_wn, - WN_next(call), NULL); return return_wn; } else if (WN_pragma(node) == WN_PRAGMA_CHUNKSIZE) { pu_chunk_node = node; - verify_mp_lowered.Set_replace_block_and_nested_pu(NULL, NULL, NULL); return (WN_next(node)); } else if (WN_pragma(node) == WN_PRAGMA_MPSCHEDTYPE) { pu_mpsched_node = node; - verify_mp_lowered.Set_replace_block_and_nested_pu(NULL, NULL, NULL); return (WN_next(node)); } else if (WN_pragma(node) == WN_PRAGMA_COPYIN) { @@ -12070,7 +11957,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) wn = WN_next(node); WN_DELETE_Tree ( node ); - verify_mp_lowered.Set_replace_block_and_nested_pu(NULL, NULL, NULL); return (wn); } else if (WN_pragma(node) == WN_PRAGMA_CRITICAL_SECTION_BEGIN) { @@ -12094,8 +11980,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN *return_wn = WN_first( store_gtid ); WN_DELETE_Tree(node); WN_Delete( store_gtid ); - verify_mp_lowered.Set_replace_block_and_nested_pu( return_wn, - WN_next(call), NULL); return return_wn; } else if (WN_pragma(node) == WN_PRAGMA_ORDERED_END) { @@ -12108,8 +11992,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_next(call) = wn; if (wn) WN_prev(wn) = call; WN_DELETE_Tree(node); - verify_mp_lowered.Set_replace_block_and_nested_pu(call, - WN_next(call), NULL); return call; } else @@ -12142,8 +12024,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_DELETE_Tree(WN_region_exits(node)); RID_Delete(Current_Map_Tab, node); WN_Delete(node); - verify_mp_lowered.Set_replace_block_and_nested_pu(wn, WN_next(wn), - NULL); return wn; case WN_PRAGMA_MASTER_BEGIN: @@ -12155,8 +12035,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_DELETE_Tree(WN_region_exits(node)); RID_Delete(Current_Map_Tab, node); WN_Delete(node); - verify_mp_lowered.Set_replace_block_and_nested_pu(wn, WN_next(wn), - NULL); return wn; case WN_PRAGMA_DOACROSS: @@ -12166,7 +12044,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) case WN_PRAGMA_PDO_BEGIN: mpt = MPP_ORPHANED_PDO; - verify_mp_lowered.Set_nested_pu_tree(NULL); break; case WN_PRAGMA_PARALLEL_BEGIN: @@ -12497,7 +12374,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_INSERT_BlockLast ( replace_block, Gen_MP_Copyin ( FALSE ) ); - verify_mp_lowered.Set_nested_pu_tree(NULL); } else if (mpt == MPP_CRITICAL_SECTION) { @@ -12540,8 +12416,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_DELETE_Tree ( node ); WN_Delete ( cur_node ); - verify_mp_lowered.Set_nested_pu_tree(NULL); - } else if (mpt == MPP_PARALLEL_DO) { BOOL is_omp = WN_pragma_omp(WN_first(WN_region_pragmas(node))); @@ -12581,7 +12455,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_Delete ( node ); local_count = 0; WN_INSERT_BlockLast ( replace_block, do_preamble_block ); - verify_mp_lowered_ptr->Set_nested_pu_tree(NULL); goto finish_processing; } else Fail_FmtAssertion @@ -12732,7 +12605,6 @@ lower_mp ( WN * block, WN * node, INT32 actions ) WN_Delete ( node ); local_count = 0; WN_INSERT_BlockLast ( replace_block, do_preamble_block ); - verify_mp_lowered_ptr->Set_nested_pu_tree(NULL); goto finish_processing; } else Fail_FmtAssertion @@ -13084,8 +12956,6 @@ finish_processing: /* For mp if's return the entire replacement block and the caller will handle it from there. */ - verify_mp_lowered.Set_replace_block(replace_block, - WN_next(WN_last(replace_block))); return_nodes = replace_block; } else { @@ -13102,7 +12972,6 @@ finish_processing: return_nodes = cont_nodes; WN_Delete ( replace_block ); - verify_mp_lowered.Set_replace_block(return_nodes, cont_nodes); } return (return_nodes); diff --git a/osprey/driver/OPTIONS b/osprey/driver/OPTIONS index ed4acce..79d3442 100644 --- a/osprey/driver/OPTIONS +++ b/osprey/driver/OPTIONS @@ -962,8 +962,8 @@ I-xall ; FTN ffe self -openmp ; FTN,Cc ffe -mp "enable the multiprocessing directives (same as -mp)" #ifdef KEY --fopenmp ; Cc cfe self - "" +-fopenmp toggle(&mpkind,NORMAL_MP); FTN,Cc ffe,cfe self + "enable the multiprocessing directives" #endif #ifndef KEY -dsm_clone ; ALL ipl,be,d self diff --git a/osprey/libhugetlbfs/version b/osprey/libhugetlbfs/version index 7e32cd5..fe1f25d 100644 --- a/osprey/libhugetlbfs/version +++ b/osprey/libhugetlbfs/version @@ -1 +1 @@ -1.3 +commit<4e83f0bb3949b0fad7bce693490628d95a7076e3> diff --git a/osprey/wgen/wgen_decl.cxx b/osprey/wgen/wgen_decl.cxx index 3b0f942..7fe1ab6 100644 --- a/osprey/wgen/wgen_decl.cxx +++ b/osprey/wgen/wgen_decl.cxx @@ -95,6 +95,7 @@ extern "C" { #include "wgen_dst.h" // DST_enter_member_function #include "dwarf_DST_dump.h" #include "targ_sim.h" // PUSH_RETURN_ADDRESS_ON_STACK +#include "wgen_omp_directives.h" #ifdef KEY #include <ext/hash_map> @@ -707,6 +708,7 @@ void WGEN_Expand_Decl(gs_t decl, BOOL can_skip) Init_Deferred_Function_Stack(); } */ + ST *var_st; switch (gs_tree_code(decl)) { case GS_CONST_DECL: @@ -885,7 +887,9 @@ void WGEN_Expand_Decl(gs_t decl, BOOL can_skip) expanded_decl(decl) = TRUE; #endif - (void) Get_ST(decl); + var_st = Get_ST(decl); + if (!gs_tree_static(decl) && !gs_decl_external(decl)) + WGEN_register_local_variable(var_st); if (gs_decl_initial(decl) && !gs_decl_external(decl)) { gs_t init = gs_decl_initial(decl); gs_code_t code = gs_tree_code(init); diff --git a/osprey/wgen/wgen_omp_directives.cxx b/osprey/wgen/wgen_omp_directives.cxx index 8afe645..c82a7fa 100644 --- a/osprey/wgen/wgen_omp_directives.cxx +++ b/osprey/wgen/wgen_omp_directives.cxx @@ -87,6 +87,8 @@ std::stack<WN *> lastlocal_node_stack; // vector for storing DO-loop side-effects, to be emitted before the loop. std::vector<WN *> doloop_side_effects; +static std::stack<WN *> omp_construct_stack; + BOOL Trace_Omp = FALSE; // Put in per-file OpenMP specific initializations here. @@ -840,6 +842,7 @@ void WGEN_expand_start_parallel (gs_t stmt) /* create a region on current block */ WN * region = WGEN_region(REGION_KIND_MP); + omp_construct_stack.push(region); WN *wn; @@ -892,6 +895,7 @@ void WGEN_expand_end_parallel () WGEN_maybe_do_eh_cleanups (); } + omp_construct_stack.pop(); WGEN_Stmt_Pop (wgen_stmk_scope); WGEN_CS_pop (wgen_omp_parallel); }; @@ -1255,6 +1259,7 @@ void WGEN_expand_start_parallel_for (gs_t stmt) /* create a region on current block */ WN * region = WGEN_region(REGION_KIND_MP); + omp_construct_stack.push(region); WN *wn; @@ -1298,6 +1303,7 @@ void WGEN_expand_end_parallel_for () { WN *wn = WGEN_Stmt_Top (); //WGEN_check_parallel_for (wn); + omp_construct_stack.pop(); WGEN_Stmt_Pop (wgen_stmk_scope); WGEN_CS_pop(wgen_omp_parallel_for); } @@ -1312,6 +1318,7 @@ void WGEN_expand_start_parallel_sections (gs_t stmt) /* create a region on current block */ WN * region = WGEN_region(REGION_KIND_MP); + omp_construct_stack.push(region); WN *wn; @@ -1357,6 +1364,7 @@ void WGEN_expand_end_parallel_sections () { WN *wn = WGEN_Stmt_Top (); // WGEN_check_parallel_sections (wn); + omp_construct_stack.pop(); WGEN_Stmt_Pop (wgen_stmk_scope); WGEN_CS_pop(wgen_omp_parallel_sections); @@ -1819,3 +1827,13 @@ void WGEN_expand_end_do_loop (void) WGEN_Stmt_Pop (wgen_stmk_for_cond); } +void WGEN_register_local_variable(ST * st) +{ + if (omp_construct_stack.empty()) + return; + WN * last_region = omp_construct_stack.top(); + WN * pragmas = WN_region_pragmas(last_region); + WN * private_pragma = WN_CreatePragma(WN_PRAGMA_LOCAL, st, 0, 0); + WN_set_pragma_omp(private_pragma); + WN_INSERT_BlockLast(pragmas, private_pragma); +} diff --git a/osprey/wgen/wgen_omp_directives.h b/osprey/wgen/wgen_omp_directives.h index 7077e33..8127b20 100644 --- a/osprey/wgen/wgen_omp_directives.h +++ b/osprey/wgen/wgen_omp_directives.h @@ -99,6 +99,8 @@ extern void WGEN_expand_start_do_loop (WN *, WN *, WN *, WN *); extern void WGEN_expand_end_do_loop (void); +extern void WGEN_register_local_variable(ST * st); + extern BOOL Trace_Omp; #endif
openmp-fix-cases.tgz
Description: GNU Zip compressed data
------------------------------------------------------------------------------ This SF.net Dev2Dev email is sponsored by: Show off your parallel programming skills. Enter the Intel(R) Threading Challenge 2010. http://p.sf.net/sfu/intel-thread-sfd
_______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel