http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444
--- Comment #12 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-16
15:25:06 UTC ---
Testcase w/o includes that fails with 4.6 and 4.7:
typedef long long __m128i __attribute__ ((__vector_size__ (16),
__may_alias__));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef unsigned int uint32_t;
typedef struct {
uint32_t v[4];
} a4x32;
a4x32* incr(a4x32* x)
{
x->v[0] += 1;
return x;
}
typedef struct {
__m128i m;
} a1xm128i;
static inline a1xm128i ssefunc( a1xm128i in, a1xm128i k)
{
a1xm128i ret;
ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m);
return ret;
}
static a4x32 caster( a4x32 c4x32, a1xm128i k)
{
a1xm128i c1x128;
if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort();
__builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128));
c1x128 = ssefunc(c1x128, k);
__builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32));
return c4x32;
}
typedef struct {
a1xm128i key;
a4x32 c;
__SIZE_TYPE__ elem;
a4x32 v;
} Engine;
void ctor(Engine *e)
{
e->elem = 0;
e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 };
e->c.v[0] = 0;
e->c.v[1] = 0;
e->c.v[2] = 0;
e->c.v[3] = 0;
}
uint32_t method( Engine *e)
{
if( e->elem == 0 )
{
e->v = caster(*incr(&e->c), e->key);
e->elem = 4;
}
return e->v.v[--e->elem];
}
int main()
{
Engine e4; ctor(&e4);
Engine e5; ctor(&e5);
if(method(&e4)!=method(&e5))
__builtin_abort ();
return 0;
}
and the problematic SRA is indeed happening during ESRA in caster () which
looks like (before SRA):
<bb 2>:
MEM[(char * {ref-all})&c1x128] = MEM[(char * {ref-all})&c4x32];
in = c1x128;
k = k;
D.1785_7 = k.m;
D.1784_8 = in.m;
D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
D.1782.m = D.1783_9;
D.1780 = D.1782;
c1x128 = D.1780;
MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];
D.1760 = c4x32;
c1x128 ={v} {CLOBBER};
return D.1760;
and after SRA:
<bb 2>:
c4x32$m_4 = MEM[(struct *)&c4x32].m;
c1x128$m_14 = c4x32$m_4;
in$m_13 = c1x128$m_14;
k$m_12 = MEM[(struct *)&k].m;
D.1785_7 = k$m_12;
D.1784_8 = in$m_13;
D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
SR.6_11 = D.1783_9;
SR.7_10 = SR.6_11;
c1x128$m_2 = SR.7_10;
c4x32$m_15 = c1x128$m_2;
MEM[(struct *)&D.1760].m = c4x32$m_15;
c1x128$m_16 = { 0, 0 };
return D.1760;
notice that D.1760 is of type a4x32 and thus has the alignment of an
integer. But SRA constructs in-place the object of type c1x128.
SRA analysis should have seen the alignment breaking copy
MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];
which uses a properly aligned type for the store. Similarly the
prevailing store
D.1760 = c4x32;
has the alignment of D.1760.
D.1760 already has a bogus type in lacc->type. We can easily avoid
translating across aggregate copies that would transfer bogusly aligned types
to an access via
Index: tree-sra.c
===================================================================
--- tree-sra.c (revision 183205)
+++ tree-sra.c (working copy)
@@ -2290,7 +2290,9 @@ propagate_subaccesses_across_link (struc
if (is_gimple_reg_type (racc->type))
{
- if (!lacc->first_child && !racc->first_child)
+ if (!lacc->first_child && !racc->first_child
+ && (get_object_alignment (lacc->expr)
+ >= get_object_alignment (racc->expr)))
{
tree t = lacc->base;
or make sure to transfer the alignment to a constructed bare(!) MEM_REF
from lacc->expr before overwriting that (assuming it retains the original
form up until here).