This creates a get_pointer_alignment_1 function, similar to get_object_alignment_1 for use by SRA (and for consistency).
SRA needs to be conservative with its alignment assertions when building MEM_REFs. The following patch changes the single place where we build them. Together with the posted patch to fix expanding of stores to unaligned accesses wrapped inside component-refs this fixes this PR. Bootstrapped and tested on x86_64-unknown-linux-gnu. I'm queuing these patches to apply them in a useful order, but consider this one quite obvious. A followup (or rather a patch that I'd apply before) will be to reduce the number of non-scalarized loads/stores SRA rewrites (which it ideally should not do at all). Richard. 2012-01-24 Richard Guenther <rguent...@suse.de> PR tree-optimization/50444 * tree.h (get_pointer_alignment_1): Declare. * builtins.c (get_pointer_alignment_1): New function. (get_pointer_alignment): Use it. * tree-sra.c (build_ref_for_offset): Properly adjust the MEM_REF type for unaligned accesses. * gcc.dg/torture/pr50444.c: New testcase. Index: gcc/tree.h =================================================================== *** gcc/tree.h (revision 183470) --- gcc/tree.h (working copy) *************** extern bool is_builtin_fn (tree); *** 5450,5455 **** --- 5450,5456 ---- extern unsigned int get_object_alignment_1 (tree, unsigned HOST_WIDE_INT *); extern unsigned int get_object_alignment (tree); extern unsigned int get_object_or_type_alignment (tree); + extern unsigned int get_pointer_alignment_1 (tree, unsigned HOST_WIDE_INT *); extern unsigned int get_pointer_alignment (tree); extern tree fold_call_stmt (gimple, bool); extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function); Index: gcc/builtins.c =================================================================== *** gcc/builtins.c (revision 183470) --- gcc/builtins.c (working copy) *************** get_object_or_type_alignment (tree exp) *** 477,513 **** return align; } ! /* Return the alignment in bits of EXP, a pointer valued expression. ! The alignment returned is, by default, the alignment of the thing that ! EXP points to. If it is not a POINTER_TYPE, 0 is returned. ! Otherwise, look at the expression to see if we can do better, i.e., if the ! expression is actually pointing at an object whose alignment is tighter. */ unsigned int ! get_pointer_alignment (tree exp) { STRIP_NOPS (exp); if (TREE_CODE (exp) == ADDR_EXPR) ! return get_object_alignment (TREE_OPERAND (exp, 0)); else if (TREE_CODE (exp) == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (exp))) { struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp); - unsigned align; if (!pi) ! return BITS_PER_UNIT; ! if (pi->misalign != 0) ! align = (pi->misalign & -pi->misalign); ! else ! align = pi->align; ! return align * BITS_PER_UNIT; } return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0; } /* Compute the length of a C string. TREE_STRING_LENGTH is not the right way, because it could contain a zero byte in the middle. TREE_STRING_LENGTH is the size of the character array, not the string. --- 477,535 ---- return align; } ! /* For a pointer valued expression EXP compute values M and N such that ! M divides (EXP - N) and such that N < M. Store N in *BITPOSP and return M. ! If EXP is not a pointer, 0 is returned. */ unsigned int ! get_pointer_alignment_1 (tree exp, unsigned HOST_WIDE_INT *bitposp) { STRIP_NOPS (exp); if (TREE_CODE (exp) == ADDR_EXPR) ! return get_object_alignment_1 (TREE_OPERAND (exp, 0), bitposp); else if (TREE_CODE (exp) == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (exp))) { struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp); if (!pi) ! { ! *bitposp = 0; ! return BITS_PER_UNIT; ! } ! *bitposp = pi->misalign * BITS_PER_UNIT; ! return pi->align * BITS_PER_UNIT; } + *bitposp = 0; return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0; } + /* Return the alignment in bits of EXP, a pointer valued expression. + The alignment returned is, by default, the alignment of the thing that + EXP points to. If it is not a POINTER_TYPE, 0 is returned. + + Otherwise, look at the expression to see if we can do better, i.e., if the + expression is actually pointing at an object whose alignment is tighter. */ + + unsigned int + get_pointer_alignment (tree exp) + { + unsigned HOST_WIDE_INT bitpos = 0; + unsigned int align; + + align = get_pointer_alignment_1 (exp, &bitpos); + + /* align and bitpos now specify known low bits of the pointer. + ptr & (align - 1) == bitpos. */ + + if (bitpos != 0) + align = (bitpos & -bitpos); + + return align; + } + /* Compute the length of a C string. TREE_STRING_LENGTH is not the right way, because it could contain a zero byte in the middle. TREE_STRING_LENGTH is the size of the character array, not the string. Index: gcc/tree-sra.c =================================================================== *** gcc/tree-sra.c (revision 183470) --- gcc/tree-sra.c (working copy) *************** build_ref_for_offset (location_t loc, tr *** 1461,1466 **** --- 1461,1468 ---- tree prev_base = base; tree off; HOST_WIDE_INT base_offset; + unsigned HOST_WIDE_INT misalign; + unsigned int align; gcc_checking_assert (offset % BITS_PER_UNIT == 0); *************** build_ref_for_offset (location_t loc, tr *** 1506,1511 **** --- 1508,1530 ---- base = build_fold_addr_expr (unshare_expr (base)); } + /* If prev_base were always an originally performed access + we can extract more optimistic alignment information + by looking at the access mode. That would constrain the + alignment of base + base_offset which we would need to + adjust according to offset. + ??? But it is not at all clear that prev_base is an access + that was in the IL that way, so be conservative for now. */ + align = get_pointer_alignment_1 (base, &misalign); + misalign += (double_int_sext (tree_to_double_int (off), + TYPE_PRECISION (TREE_TYPE (off))).low + * BITS_PER_UNIT); + misalign = misalign & (align - 1); + if (misalign != 0) + align = (misalign & -misalign); + if (align < TYPE_ALIGN (exp_type)) + exp_type = build_aligned_type (exp_type, align); + return fold_build2_loc (loc, MEM_REF, exp_type, base, off); } Index: gcc/testsuite/gcc.dg/torture/pr50444.c =================================================================== *** gcc/testsuite/gcc.dg/torture/pr50444.c (revision 0) --- gcc/testsuite/gcc.dg/torture/pr50444.c (revision 0) *************** *** 0 **** --- 1,76 ---- + /* { dg-require-effective-target sse2_runtime } */ + /* { dg-do run } */ + /* { dg-options "-msse2" } */ + + typedef long long __m128i __attribute__ ((__vector_size__ (16), + __may_alias__)); + typedef int __v4si __attribute__ ((__vector_size__ (16))); + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + typedef unsigned int uint32_t; + + typedef struct { + uint32_t v[4]; + } a4x32; + + a4x32* incr(a4x32* x) + { + x->v[0] += 1; + return x; + } + + typedef struct { + __m128i m; + } a1xm128i; + + static inline a1xm128i ssefunc( a1xm128i in, a1xm128i k) + { + a1xm128i ret; + ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m); + return ret; + } + + static a4x32 caster( a4x32 c4x32, a1xm128i k) + { + a1xm128i c1x128; + if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort(); + __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128)); + c1x128 = ssefunc(c1x128, k); + __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32)); + return c4x32; + } + + typedef struct { + a1xm128i key; + a4x32 c; + __SIZE_TYPE__ elem; + a4x32 v; + } Engine; + + void ctor(Engine *e) + { + e->elem = 0; + e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 }; + e->c.v[0] = 0; + e->c.v[1] = 0; + e->c.v[2] = 0; + e->c.v[3] = 0; + } + + uint32_t method( Engine *e) + { + if( e->elem == 0 ) + { + e->v = caster(*incr(&e->c), e->key); + e->elem = 4; + } + return e->v.v[--e->elem]; + } + + int main() + { + Engine e4; ctor(&e4); + Engine e5; ctor(&e5); + if(method(&e4)!=method(&e5)) + __builtin_abort (); + return 0; + }