Author: pallavimathew Date: 2011-04-21 19:47:46 -0400 (Thu, 21 Apr 2011) New Revision: 3567
Modified: trunk/osprey/be/cg/cg_spill.cxx trunk/osprey/be/cg/whirl2ops.cxx trunk/osprey/be/com/data_layout.cxx Log: This patch 1. Fixes an alignment bug. The problem can be explained by the following f90 snippet. Statemnt 22 (which is a loop with 3 iterations) is vectorized. Neither input%field2%arr nor input%field1%arr is aligned properly. However, compiler generates aligned load instructions for them causing segmentation fault at runtime. 1 module foobar 2 implicit none 3 4 type t1 5 integer :: pad1 6 integer :: pad2 7 integer :: pad3 8 real*8, dimension(3) :: arr 9 end type t1; 10 11 type t2 12 type(t1) :: field1 13 type(t1) :: field2 14 end type t2; 15 16 contains 17 18 subroutine foo(input) 19 type(t2) :: input 20 real*8, dimension(3) :: arr2; 21 22 arr2 = input%field2%arr - input%field1%arr 23 call bar (arr2); 24 end subroutine foo 25 26 end module foobar This fix updates: osprey/be/cg/whirl2ops.cxx 2. Fixes a problem illustrated with the snippet shown below built with -O3. <mydata> is accessed using aligned SIMD instruction no matter this snippet is built with -m32 or -m64. With -m32, the SP is not guaranteed to land at 16-byte boundary, therefore we are not able to *statically* allocate a local var at 16-byte boundary. ------------------------------ void foo (void) { int i, mydata[4]; for (i= 0; i < 4; i++) mydata[i] = 1; extern void bar (int*); bar (&mydata); } ------------------------------- The root cause is that with -m32, stack frame is 8-byte aligned, and therefore local variables cannot be statically aligned at a boundary more stringent than 8-byte alingment (unless one use alloca() to allocate space for local var). Using aligned SSE instructions to load/store local arrays will incur segmentation fault. There are two problems fixed by the change: i) Register allocation doesn't respect a variable's alignement when it spills/restores to/from its home location. ii) local variables should not have more stringent alginement than stack frame. This fix updates: osprey/be/cg/whirl2ops.cxx osprey/be/cg/cg_spill.cxx osprey/be/com/data_layout.cxx Modified: trunk/osprey/be/cg/cg_spill.cxx =================================================================== --- trunk/osprey/be/cg/cg_spill.cxx 2011-04-21 23:43:17 UTC (rev 3566) +++ trunk/osprey/be/cg/cg_spill.cxx 2011-04-21 23:47:46 UTC (rev 3567) @@ -174,6 +174,8 @@ static LOCAL_SPILLS lra_int32_spills; #endif +extern VARIANT Memop_Variant(WN *); + /* * only rematerialize LDID's homeable by gra if spilling in service * to gra. @@ -743,7 +745,7 @@ { OPCODE opcode = WN_opcode(home); Exp_Load (OPCODE_rtype(opcode), OPCODE_desc(opcode), tn, WN_st(home), - WN_offset(home), &OPs, V_NONE); + WN_offset(home), &OPs, home ? Memop_Variant (home) : V_NONE); #ifdef TARG_IA64 ld_2_ld_fill (&OPs) ; #endif @@ -837,7 +839,7 @@ case OPR_LDID: /* homing load */ Exp_Load (OPCODE_rtype(opcode), OPCODE_desc(opcode), tn, - WN_st(home), WN_offset(home), ops, V_NONE); + WN_st(home), WN_offset(home), ops, home ? Memop_Variant (home) : V_NONE); if (Trace_Remat && !TN_is_gra_homeable(tn)) { #pragma mips_frequency_hint NEVER fprintf(TFile, "<Rematerialize> LDID for rematerializeable TN%d\n", @@ -961,7 +963,7 @@ WN *home = TN_home(src_tn); if (WN_operator(home) == OPR_LDID) { Exp_Store (OPCODE_desc(WN_opcode(home)), src_tn, WN_st(home), - WN_offset(home), ops, V_NONE); + WN_offset(home), ops, home ? Memop_Variant (home) : V_NONE); #ifdef TARG_IA64 st_2_st_spill (ops) ; #endif Modified: trunk/osprey/be/cg/whirl2ops.cxx =================================================================== --- trunk/osprey/be/cg/whirl2ops.cxx 2011-04-21 23:43:17 UTC (rev 3566) +++ trunk/osprey/be/cg/whirl2ops.cxx 2011-04-21 23:47:46 UTC (rev 3567) @@ -1785,7 +1785,7 @@ /* * Determine the Exp_OP variant for a memory operation. */ -static VARIANT Memop_Variant(WN *memop) +VARIANT Memop_Variant(WN *memop) { VARIANT variant = V_NONE; #if defined(TARG_SL) @@ -1834,12 +1834,10 @@ if (TY_kind(ty) == KIND_POINTER) ty = TY_pointed(ty); ty_align = TY_align(ty); offset = WN_load_offset(memop); -#if defined(TARG_SL) if (offset) { INT offset_align = offset % required_alignment; if (offset_align) ty_align = MIN(ty_align, offset_align); } -#endif } break; case OPR_ISTORE: Modified: trunk/osprey/be/com/data_layout.cxx =================================================================== --- trunk/osprey/be/com/data_layout.cxx 2011-04-21 23:43:17 UTC (rev 3566) +++ trunk/osprey/be/com/data_layout.cxx 2011-04-21 23:47:46 UTC (rev 3567) @@ -2493,6 +2493,14 @@ base = SP_Sym; else base = FP_Sym; + + if (ST_class (st) == CLASS_VAR) { + INT16 align = Adjusted_Alignment (st); + TY_IDX ty = ST_type(st); + Set_TY_align (ty, align); + Set_ST_type (*st, ty); + } + ST_Block_Merge (base, st, 0, 0, MAX_FRAME_OFFSET); } ------------------------------------------------------------------------------ Fulfilling the Lean Software Promise Lean software platforms are now widely adopted and the benefits have been demonstrated beyond question. Learn why your peers are replacing JEE containers with lightweight application servers - and what you can gain from the move. http://p.sf.net/sfu/vmware-sfemails _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel