This consolidates alignment folding with get_pointer_alignment_1 thereby also making it stronger, using SSA name alignment info when available.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-07-02 Richard Biener <rguent...@suse.de> * builtins.c (get_pointer_alignment_1): Handle POINTER_PLUS_EXPR. * fold-const.c (get_pointer_modulus_and_residue): Remove. (fold_binary_loc): Implement (T)ptr & CST in terms of get_pointer_alignment_1. * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Make sure to build the alignment test on a SSA name without final alignment info valid only after the prologue. Index: gcc/builtins.c =================================================================== *** gcc/builtins.c (revision 225309) --- gcc/builtins.c (working copy) *************** get_pointer_alignment_1 (tree exp, unsig *** 473,478 **** --- 473,500 ---- if (TREE_CODE (exp) == ADDR_EXPR) return get_object_alignment_2 (TREE_OPERAND (exp, 0), alignp, bitposp, true); + else if (TREE_CODE (exp) == POINTER_PLUS_EXPR) + { + unsigned int align; + unsigned HOST_WIDE_INT bitpos; + bool res = get_pointer_alignment_1 (TREE_OPERAND (exp, 0), + &align, &bitpos); + if (TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) + bitpos += TREE_INT_CST_LOW (TREE_OPERAND (exp, 1)) * BITS_PER_UNIT; + else + { + unsigned int trailing_zeros = tree_ctz (TREE_OPERAND (exp, 1)); + if (trailing_zeros < HOST_BITS_PER_INT) + { + unsigned int inner = (1U << trailing_zeros) * BITS_PER_UNIT; + if (inner) + align = MIN (align, inner); + } + } + *alignp = align; + *bitposp = bitpos & (align - 1); + return res; + } else if (TREE_CODE (exp) == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (exp))) { Index: gcc/fold-const.c =================================================================== *** gcc/fold-const.c (revision 225309) --- gcc/fold-const.c (working copy) *************** fold_mult_zconjz (location_t loc, tree t *** 9350,9432 **** } - /* Subroutine of fold_binary. If P is the value of EXPR, computes - power-of-two M and (arbitrary) N such that M divides (P-N). This condition - guarantees that P and N have the same least significant log2(M) bits. - N is not otherwise constrained. In particular, N is not normalized to - 0 <= N < M as is common. In general, the precise value of P is unknown. - M is chosen as large as possible such that constant N can be determined. - - Returns M and sets *RESIDUE to N. - - If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into - account. This is not always possible due to PR 35705. - */ - - static unsigned HOST_WIDE_INT - get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue, - bool allow_func_align) - { - enum tree_code code; - - *residue = 0; - - code = TREE_CODE (expr); - if (code == ADDR_EXPR) - { - unsigned int bitalign; - get_object_alignment_1 (TREE_OPERAND (expr, 0), &bitalign, residue); - *residue /= BITS_PER_UNIT; - return bitalign / BITS_PER_UNIT; - } - else if (code == POINTER_PLUS_EXPR) - { - tree op0, op1; - unsigned HOST_WIDE_INT modulus; - enum tree_code inner_code; - - op0 = TREE_OPERAND (expr, 0); - STRIP_NOPS (op0); - modulus = get_pointer_modulus_and_residue (op0, residue, - allow_func_align); - - op1 = TREE_OPERAND (expr, 1); - STRIP_NOPS (op1); - inner_code = TREE_CODE (op1); - if (inner_code == INTEGER_CST) - { - *residue += TREE_INT_CST_LOW (op1); - return modulus; - } - else if (inner_code == MULT_EXPR) - { - op1 = TREE_OPERAND (op1, 1); - if (TREE_CODE (op1) == INTEGER_CST) - { - unsigned HOST_WIDE_INT align; - - /* Compute the greatest power-of-2 divisor of op1. */ - align = TREE_INT_CST_LOW (op1); - align &= -align; - - /* If align is non-zero and less than *modulus, replace - *modulus with align., If align is 0, then either op1 is 0 - or the greatest power-of-2 divisor of op1 doesn't fit in an - unsigned HOST_WIDE_INT. In either case, no additional - constraint is imposed. */ - if (align) - modulus = MIN (modulus, align); - - return modulus; - } - } - } - - /* If we get here, we were unable to determine anything useful about the - expression. */ - return 1; - } - /* Helper function for fold_vec_perm. Store elements of VECTOR_CST or CONSTRUCTOR ARG into array ELTS and return true if successful. */ --- 9350,9355 ---- *************** fold_binary_loc (location_t loc, *** 11149,11167 **** /* If arg0 is derived from the address of an object or function, we may be able to fold this expression using the object or function's alignment. */ ! if (POINTER_TYPE_P (TREE_TYPE (arg0)) && tree_fits_uhwi_p (arg1)) { ! unsigned HOST_WIDE_INT modulus, residue; ! unsigned HOST_WIDE_INT low = tree_to_uhwi (arg1); ! modulus = get_pointer_modulus_and_residue (arg0, &residue, ! integer_onep (arg1)); /* This works because modulus is a power of 2. If this weren't the case, we'd have to replace it by its greatest power-of-2 divisor: modulus & -modulus. */ ! if (low < modulus) ! return build_int_cst (type, residue & low); } goto associate; --- 11072,11091 ---- /* If arg0 is derived from the address of an object or function, we may be able to fold this expression using the object or function's alignment. */ ! if (POINTER_TYPE_P (TREE_TYPE (arg0)) && TREE_CODE (arg1) == INTEGER_CST) { ! unsigned int align; ! unsigned HOST_WIDE_INT bitpos; ! get_pointer_alignment_1 (arg0, &align, &bitpos); /* This works because modulus is a power of 2. If this weren't the case, we'd have to replace it by its greatest power-of-2 divisor: modulus & -modulus. */ ! if (wi::ltu_p (arg1, align / BITS_PER_UNIT)) ! return wide_int_to_tree (type, ! wi::bit_and (arg1, ! bitpos / BITS_PER_UNIT)); } goto associate; Index: gcc/tree-vect-loop-manip.c =================================================================== *** gcc/tree-vect-loop-manip.c (revision 225309) --- gcc/tree-vect-loop-manip.c (working copy) *************** vect_gen_niters_for_prolog_loop (loop_ve *** 1877,1883 **** gimple_seq new_stmts = NULL; bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; tree offset = negative ! ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE; tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, offset, loop); tree type = unsigned_type_for (TREE_TYPE (start_addr)); --- 1877,1883 ---- gimple_seq new_stmts = NULL; bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; tree offset = negative ! ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node; tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, offset, loop); tree type = unsigned_type_for (TREE_TYPE (start_addr));