Author: pallavimathew
Date: 2011-04-21 19:47:46 -0400 (Thu, 21 Apr 2011)
New Revision: 3567
Modified:
trunk/osprey/be/cg/cg_spill.cxx
trunk/osprey/be/cg/whirl2ops.cxx
trunk/osprey/be/com/data_layout.cxx
Log:
This patch
1. Fixes an alignment bug. The problem can be explained by the following f90
snippet.
Statemnt 22 (which is a loop with 3 iterations) is vectorized. Neither
input%field2%arr nor
input%field1%arr is aligned properly. However, compiler generates aligned load
instructions for
them causing segmentation fault at runtime.
1 module foobar
2 implicit none
3
4 type t1
5 integer :: pad1
6 integer :: pad2
7 integer :: pad3
8 real*8, dimension(3) :: arr
9 end type t1;
10
11 type t2
12 type(t1) :: field1
13 type(t1) :: field2
14 end type t2;
15
16 contains
17
18 subroutine foo(input)
19 type(t2) :: input
20 real*8, dimension(3) :: arr2;
21
22 arr2 = input%field2%arr - input%field1%arr
23 call bar (arr2);
24 end subroutine foo
25
26 end module foobar
This fix updates:
osprey/be/cg/whirl2ops.cxx
2. Fixes a problem illustrated with the snippet shown below built with -O3.
<mydata> is accessed using aligned SIMD instruction no matter this snippet
is built with -m32 or -m64. With -m32, the SP is not guaranteed to land at
16-byte boundary, therefore we are not able to *statically* allocate a local
var at 16-byte boundary.
------------------------------
void foo (void) {
int i, mydata[4];
for (i= 0; i < 4; i++)
mydata[i] = 1;
extern void bar (int*);
bar (&mydata);
}
-------------------------------
The root cause is that with -m32, stack frame is 8-byte aligned, and
therefore local variables cannot be statically aligned at a boundary more
stringent than 8-byte alingment (unless one use alloca() to allocate
space for local var). Using aligned SSE instructions to load/store
local arrays will incur segmentation fault.
There are two problems fixed by the change:
i) Register allocation doesn't respect a variable's alignement when
it spills/restores to/from its home location.
ii) local variables should not have more stringent alginement than stack frame.
This fix updates:
osprey/be/cg/whirl2ops.cxx
osprey/be/cg/cg_spill.cxx
osprey/be/com/data_layout.cxx
Modified: trunk/osprey/be/cg/cg_spill.cxx
===================================================================
--- trunk/osprey/be/cg/cg_spill.cxx 2011-04-21 23:43:17 UTC (rev 3566)
+++ trunk/osprey/be/cg/cg_spill.cxx 2011-04-21 23:47:46 UTC (rev 3567)
@@ -174,6 +174,8 @@
static LOCAL_SPILLS lra_int32_spills;
#endif
+extern VARIANT Memop_Variant(WN *);
+
/*
* only rematerialize LDID's homeable by gra if spilling in service
* to gra.
@@ -743,7 +745,7 @@
{
OPCODE opcode = WN_opcode(home);
Exp_Load (OPCODE_rtype(opcode), OPCODE_desc(opcode), tn, WN_st(home),
- WN_offset(home), &OPs, V_NONE);
+ WN_offset(home), &OPs, home ? Memop_Variant (home) : V_NONE);
#ifdef TARG_IA64
ld_2_ld_fill (&OPs) ;
#endif
@@ -837,7 +839,7 @@
case OPR_LDID:
/* homing load */
Exp_Load (OPCODE_rtype(opcode), OPCODE_desc(opcode), tn,
- WN_st(home), WN_offset(home), ops, V_NONE);
+ WN_st(home), WN_offset(home), ops, home ? Memop_Variant (home)
: V_NONE);
if (Trace_Remat && !TN_is_gra_homeable(tn)) {
#pragma mips_frequency_hint NEVER
fprintf(TFile, "<Rematerialize> LDID for rematerializeable TN%d\n",
@@ -961,7 +963,7 @@
WN *home = TN_home(src_tn);
if (WN_operator(home) == OPR_LDID) {
Exp_Store (OPCODE_desc(WN_opcode(home)), src_tn, WN_st(home),
- WN_offset(home), ops, V_NONE);
+ WN_offset(home), ops, home ? Memop_Variant (home) : V_NONE);
#ifdef TARG_IA64
st_2_st_spill (ops) ;
#endif
Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx 2011-04-21 23:43:17 UTC (rev 3566)
+++ trunk/osprey/be/cg/whirl2ops.cxx 2011-04-21 23:47:46 UTC (rev 3567)
@@ -1785,7 +1785,7 @@
/*
* Determine the Exp_OP variant for a memory operation.
*/
-static VARIANT Memop_Variant(WN *memop)
+VARIANT Memop_Variant(WN *memop)
{
VARIANT variant = V_NONE;
#if defined(TARG_SL)
@@ -1834,12 +1834,10 @@
if (TY_kind(ty) == KIND_POINTER) ty = TY_pointed(ty);
ty_align = TY_align(ty);
offset = WN_load_offset(memop);
-#if defined(TARG_SL)
if (offset) {
INT offset_align = offset % required_alignment;
if (offset_align) ty_align = MIN(ty_align, offset_align);
}
-#endif
}
break;
case OPR_ISTORE:
Modified: trunk/osprey/be/com/data_layout.cxx
===================================================================
--- trunk/osprey/be/com/data_layout.cxx 2011-04-21 23:43:17 UTC (rev 3566)
+++ trunk/osprey/be/com/data_layout.cxx 2011-04-21 23:47:46 UTC (rev 3567)
@@ -2493,6 +2493,14 @@
base = SP_Sym;
else
base = FP_Sym;
+
+ if (ST_class (st) == CLASS_VAR) {
+ INT16 align = Adjusted_Alignment (st);
+ TY_IDX ty = ST_type(st);
+ Set_TY_align (ty, align);
+ Set_ST_type (*st, ty);
+ }
+
ST_Block_Merge (base, st, 0, 0, MAX_FRAME_OFFSET);
}
------------------------------------------------------------------------------
Fulfilling the Lean Software Promise
Lean software platforms are now widely adopted and the benefits have been
demonstrated beyond question. Learn why your peers are replacing JEE
containers with lightweight application servers - and what you can gain
from the move. http://p.sf.net/sfu/vmware-sfemails
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel