Richard Guenther wrote:
> On Tue, Aug 7, 2012 at 4:56 PM, Ulrich Weigand <uweig...@de.ibm.com> wrote:
> > Would it be OK to backport this to 4.7 and possibly 4.6?
 
> I'll defer the decision to the target maintainers.  But please double-check
> for any changes in the vectorizer parts when backporting to 4.6.

And here the change as committed to 4.6.  I didn't find any relevant changes
in vectorizer code; however, I had to back-port a couple of testcase changes
(to the vect-peel-* tests) to avoid seeing regressions there.

Bye,
Ulrich

ChangeLog:

        Backport from mainline
        2012-07-30  Ulrich Weigand  <ulrich.weig...@linaro.org>
                    Richard Earnshaw  <rearn...@arm.com>

        * target.def (vector_alignment): New target hook.
        * doc/tm.texi.in (TARGET_VECTOR_ALIGNMENT): Document new hook.
        * doc/tm.texi: Regenerate.
        * targhooks.c (default_vector_alignment): New function.
        * targhooks.h (default_vector_alignment): Add prototype.
        * stor-layout.c (layout_type): Use targetm.vector_alignment.
        * config/arm/arm.c (arm_vector_alignment): New function.
        (TARGET_VECTOR_ALIGNMENT): Define.

        * tree-vect-data-refs.c (vect_update_misalignment_for_peel): Use
        vector type alignment instead of size.
        * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Use
        element type size directly instead of computing it from alignment.
        Fix variable naming and comment.


testsuite/ChangeLog:

        Backport from mainline
        2012-07-30  Ulrich Weigand  <ulrich.weig...@linaro.org>

        * lib/target-supports.exp
        (check_effective_target_vect_natural_alignment): New function.
        * gcc.dg/align-2.c: Only run on targets with natural alignment
        of vector types.
        * gcc.dg/vect/slp-25.c: Adjust tests for targets without natural
        alignment of vector types.

        2011-12-21  Michael Zolotukhin  <michael.v.zolotuk...@intel.com>

        * gcc.dg/vect/vect-peel-1.c: Adjust test diag-scans to fix fail on AVX.
        * gcc.dg/vect/vect-peel-2.c: Ditto.

        2011-06-21  Ira Rosen  <ira.ro...@linaro.org>

        PR testsuite/49443
        * gcc.dg/vect/vect-peel-3.c: Expect to fail on vect_no_align
        targets.
        * gcc.dg/vect/vect-peel-4.c: Likewise.

        2011-06-14  Ira Rosen  <ira.ro...@linaro.org>

        * gcc.dg/vect/vect-peel-3.c: Adjust misalignment values
        for double-word vectors.
        * gcc.dg/vect/vect-peel-4.c: Likewise.


Index: gcc/doc/tm.texi
===================================================================
*** gcc/doc/tm.texi     (revision 190202)
--- gcc/doc/tm.texi     (working copy)
*************** make it all fit in fewer cache lines.
*** 1118,1123 ****
--- 1118,1131 ----
  If the value of this macro has a type, it should be an unsigned type.
  @end defmac
  
+ @deftypefn {Target Hook} HOST_WIDE_INT TARGET_VECTOR_ALIGNMENT (const_tree 
@var{type})
+ This hook can be used to define the alignment for a vector of type
+ @var{type}, in order to comply with a platform ABI.  The default is to
+ require natural alignment for vector types.  The alignment returned by
+ this hook must be a power-of-two multiple of the default alignment of
+ the vector element type.
+ @end deftypefn
+ 
  @defmac STACK_SLOT_ALIGNMENT (@var{type}, @var{mode}, @var{basic-align})
  If defined, a C expression to compute the alignment for stack slot.
  @var{type} is the data type, @var{mode} is the widest mode available,
Index: gcc/doc/tm.texi.in
===================================================================
*** gcc/doc/tm.texi.in  (revision 190202)
--- gcc/doc/tm.texi.in  (working copy)
*************** make it all fit in fewer cache lines.
*** 1108,1113 ****
--- 1108,1115 ----
  If the value of this macro has a type, it should be an unsigned type.
  @end defmac
  
+ @hook TARGET_VECTOR_ALIGNMENT
+ 
  @defmac STACK_SLOT_ALIGNMENT (@var{type}, @var{mode}, @var{basic-align})
  If defined, a C expression to compute the alignment for stack slot.
  @var{type} is the data type, @var{mode} is the widest mode available,
Index: gcc/targhooks.c
===================================================================
*** gcc/targhooks.c     (revision 190202)
--- gcc/targhooks.c     (working copy)
*************** tree default_mangle_decl_assembler_name 
*** 979,984 ****
--- 979,991 ----
     return id;
  }
  
+ /* Default to natural alignment for vector types.  */
+ HOST_WIDE_INT
+ default_vector_alignment (const_tree type)
+ {
+   return tree_low_cst (TYPE_SIZE (type), 0);
+ }
+ 
  bool
  default_builtin_vector_alignment_reachable (const_tree type, bool is_packed)
  {
Index: gcc/targhooks.h
===================================================================
*** gcc/targhooks.h     (revision 190202)
--- gcc/targhooks.h     (working copy)
*************** extern int default_builtin_vectorization
*** 85,90 ****
--- 85,92 ----
  
  extern tree default_builtin_reciprocal (unsigned int, bool, bool);
  
+ extern HOST_WIDE_INT default_vector_alignment (const_tree);
+ 
  extern bool default_builtin_vector_alignment_reachable (const_tree, bool);
  extern bool
  default_builtin_support_vector_misalignment (enum machine_mode mode,
Index: gcc/target.def
===================================================================
*** gcc/target.def      (revision 190202)
--- gcc/target.def      (working copy)
*************** DEFHOOK
*** 1611,1616 ****
--- 1611,1626 ----
   bool, (enum machine_mode mode),
   hook_bool_mode_false)
  
+ DEFHOOK
+ (vector_alignment,
+  "This hook can be used to define the alignment for a vector of type\n\
+ @var{type}, in order to comply with a platform ABI.  The default is to\n\
+ require natural alignment for vector types.  The alignment returned by\n\
+ this hook must be a power-of-two multiple of the default alignment of\n\
+ the vector element type.",
+  HOST_WIDE_INT, (const_tree type),
+  default_vector_alignment)
+ 
  /* Compute cost of moving data from a register of class FROM to one of
     TO, using MODE.  */
  DEFHOOK
Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c  (revision 190202)
--- gcc/tree-vect-loop-manip.c  (working copy)
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 2008,2014 ****
     If the misalignment of DR is known at compile time:
       addr_mis = int mis = DR_MISALIGNMENT (dr);
     Else, compute address misalignment in bytes:
!      addr_mis = addr & (vectype_size - 1)
  
     prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
  
--- 2008,2014 ----
     If the misalignment of DR is known at compile time:
       addr_mis = int mis = DR_MISALIGNMENT (dr);
     Else, compute address misalignment in bytes:
!      addr_mis = addr & (vectype_align - 1)
  
     prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
  
*************** vect_gen_niters_for_prolog_loop (loop_ve
*** 2065,2073 ****
        tree ptr_type = TREE_TYPE (start_addr);
        tree size = TYPE_SIZE (ptr_type);
        tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
!       tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
!       tree elem_size_log =
!         build_int_cst (type, exact_log2 (vectype_align/nelements));
        tree nelements_minus_1 = build_int_cst (type, nelements - 1);
        tree nelements_tree = build_int_cst (type, nelements);
        tree byte_misalign;
--- 2065,2074 ----
        tree ptr_type = TREE_TYPE (start_addr);
        tree size = TYPE_SIZE (ptr_type);
        tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
!       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
!       HOST_WIDE_INT elem_size =
!               int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
!       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
        tree nelements_minus_1 = build_int_cst (type, nelements - 1);
        tree nelements_tree = build_int_cst (type, nelements);
        tree byte_misalign;
*************** vect_gen_niters_for_prolog_loop (loop_ve
*** 2076,2085 ****
        new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
        gcc_assert (!new_bb);
  
!       /* Create:  byte_misalign = addr & (vectype_size - 1)  */
        byte_misalign =
          fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), 
!                      vectype_size_minus_1);
  
        /* Create:  elem_misalign = byte_misalign / element_size  */
        elem_misalign =
--- 2077,2086 ----
        new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
        gcc_assert (!new_bb);
  
!       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
        byte_misalign =
          fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), 
!                      vectype_align_minus_1);
  
        /* Create:  elem_misalign = byte_misalign / element_size  */
        elem_misalign =
Index: gcc/testsuite/lib/target-supports.exp
===================================================================
*** gcc/testsuite/lib/target-supports.exp       (revision 190202)
--- gcc/testsuite/lib/target-supports.exp       (working copy)
*************** proc check_effective_target_natural_alig
*** 2976,2981 ****
--- 2976,3001 ----
      return $et_natural_alignment_64_saved
  }
  
+ # Return 1 if all vector types are naturally aligned (aligned to their
+ # type-size), 0 otherwise.
+ #
+ # This won't change for different subtargets so cache the result.
+ 
+ proc check_effective_target_vect_natural_alignment { } {
+     global et_vect_natural_alignment
+ 
+     if [info exists et_vect_natural_alignment_saved] {
+         verbose "check_effective_target_vect_natural_alignment: using cached 
result" 2
+     } else {
+         set et_vect_natural_alignment_saved 1
+         if { [check_effective_target_arm_eabi] } {
+             set et_vect_natural_alignment_saved 0
+         }
+     }
+     verbose "check_effective_target_vect_natural_alignment: returning 
$et_vect_natural_alignment_saved" 2
+     return $et_vect_natural_alignment_saved
+ }
+ 
  # Return 1 if vector alignment (for types of size 32 bit or less) is 
reachable, 0 otherwise.
  #
  # This won't change for different subtargets so cache the result.
Index: gcc/testsuite/gcc.dg/align-2.c
===================================================================
*** gcc/testsuite/gcc.dg/align-2.c      (revision 190202)
--- gcc/testsuite/gcc.dg/align-2.c      (working copy)
***************
*** 1,5 ****
  /* PR 17962 */
! /* { dg-do compile } */
  /* { dg-options "" } */
  
  typedef float v4 __attribute__((vector_size(sizeof(float)*4)));
--- 1,5 ----
  /* PR 17962 */
! /* { dg-do compile { target vect_natural_alignment } } */
  /* { dg-options "" } */
  
  typedef float v4 __attribute__((vector_size(sizeof(float)*4)));
Index: gcc/testsuite/gcc.dg/vect/vect-peel-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-peel-1.c     (revision 190202)
--- gcc/testsuite/gcc.dg/vect/vect-peel-1.c     (working copy)
*************** int main (void)
*** 49,54 ****
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
! /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect" { target vect_element_align } } } */
  /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 49,54 ----
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
! /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */
  /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-peel-2.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-peel-2.c     (revision 190202)
--- gcc/testsuite/gcc.dg/vect/vect-peel-2.c     (working copy)
*************** int main (void)
*** 50,55 ****
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
! /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect" { target vect_element_align } } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target vect_element_align } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 50,55 ----
  }
  
  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
! /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } 
} } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-peel-3.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-peel-3.c     (revision 190202)
--- gcc/testsuite/gcc.dg/vect/vect-peel-3.c     (working copy)
***************
*** 4,12 ****
  #include "tree-vect.h"
  
  #define N 128
! #define RES 21888 
! 
! /* unaligned store.  */
  
  int ib[N+10];
  int ia[N+10];
--- 4,10 ----
  #include "tree-vect.h"
  
  #define N 128
! #define RES 21640 
  
  int ib[N+10];
  int ia[N+10];
*************** int main1 ()
*** 18,28 ****
    int i, suma = 0, sumb = 0, sumc = 0;
  
    /* ib and ic have same misalignment, we peel to align them.  */
!   for (i = 1; i <= N; i++)
      {
        suma += ia[i];
!       sumb += ib[i+6];
!       sumc += ic[i+2];
      }
  
    /* check results:  */
--- 16,26 ----
    int i, suma = 0, sumb = 0, sumc = 0;
  
    /* ib and ic have same misalignment, we peel to align them.  */
!   for (i = 0; i <= N; i++)
      {
        suma += ia[i];
!       sumb += ib[i+5];
!       sumc += ic[i+1];
      }
  
    /* check results:  */
*************** int main (void)
*** 49,55 ****
    return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect"  { xfail vect_no_align } } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 47,53 ----
    return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail 
vect_no_align } } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect"  { xfail vect_no_align } } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 1 "vect" { xfail vect_no_align } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-peel-4.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-peel-4.c     (revision 190202)
--- gcc/testsuite/gcc.dg/vect/vect-peel-4.c     (working copy)
*************** int main1 ()
*** 16,28 ****
    /* Don't peel keeping one load and the store aligned.  */
    for (i = 0; i <= N; i++)
      {
!       ia[i] = ib[i] + ib[i+6];
      }
  
    /* check results:  */
    for (i = 1; i <= N; i++)
      {
!       if (ia[i] != ib[i] + ib[i+6])
          abort ();
      }
  
--- 16,28 ----
    /* Don't peel keeping one load and the store aligned.  */
    for (i = 0; i <= N; i++)
      {
!       ia[i] = ib[i] + ib[i+5];
      }
  
    /* check results:  */
    for (i = 1; i <= N; i++)
      {
!       if (ia[i] != ib[i] + ib[i+5])
          abort ();
      }
  
*************** int main (void)
*** 44,50 ****
    return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect"  { xfail vect_no_align } } } */
  /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 0 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 44,50 ----
    return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail 
vect_no_align } } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 
"vect"  { xfail vect_no_align } } } */
  /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 0 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-25.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-25.c  (revision 190202)
--- gcc/testsuite/gcc.dg/vect/slp-25.c  (working copy)
*************** int main (void)
*** 57,61 ****
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 2 "vect" { xfail { vect_no_align } } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 57,61 ----
  
  /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using 
peeling" 2 "vect" { xfail { vect_no_align || { ! vect_natural_alignment } } } } 
} */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/stor-layout.c
===================================================================
*** gcc/stor-layout.c   (revision 190202)
--- gcc/stor-layout.c   (working copy)
*************** layout_type (tree type)
*** 1927,1935 ****
        TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
                                            bitsize_int (nunits), 0);
  
!       /* Always naturally align vectors.  This prevents ABI changes
!          depending on whether or not native vector modes are supported.  */
!       TYPE_ALIGN (type) = tree_low_cst (TYPE_SIZE (type), 0);
          break;
        }
  
--- 1927,1943 ----
        TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
                                            bitsize_int (nunits), 0);
  
!       /* For vector types, we do not default to the mode's alignment.
!          Instead, query a target hook, defaulting to natural alignment.
!          This prevents ABI changes depending on whether or not native
!          vector modes are supported.  */
!       TYPE_ALIGN (type) = targetm.vector_alignment (type);
! 
!       /* However, if the underlying mode requires a bigger alignment than
!          what the target hook provides, we cannot use the mode.  For now,
!          simply reject that case.  */
!       gcc_assert (TYPE_ALIGN (type)
!                   >= GET_MODE_ALIGNMENT (TYPE_MODE (type)));
          break;
        }
  
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c   (revision 190202)
--- gcc/tree-vect-data-refs.c   (working copy)
*************** vect_update_misalignment_for_peel (struc
*** 1019,1025 ****
        int misal = DR_MISALIGNMENT (dr);
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        misal += negative ? -npeel * dr_size : npeel * dr_size;
!       misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
        SET_DR_MISALIGNMENT (dr, misal);
        return;
      }
--- 1019,1025 ----
        int misal = DR_MISALIGNMENT (dr);
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        misal += negative ? -npeel * dr_size : npeel * dr_size;
!       misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
        SET_DR_MISALIGNMENT (dr, misal);
        return;
      }
Index: gcc/config/arm/arm.c
===================================================================
*** gcc/config/arm/arm.c        (revision 190202)
--- gcc/config/arm/arm.c        (working copy)
*************** static bool xscale_sched_adjust_cost (rt
*** 243,248 ****
--- 243,249 ----
  static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
  static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
  static bool arm_class_likely_spilled_p (reg_class_t);
+ static HOST_WIDE_INT arm_vector_alignment (const_tree type);
  static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
  static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
                                                     const_tree type,
*************** static const struct default_options arm_
*** 579,584 ****
--- 580,588 ----
  #undef TARGET_CLASS_LIKELY_SPILLED_P
  #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
  
+ #undef TARGET_VECTOR_ALIGNMENT
+ #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
+ 
  #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
  #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
    arm_vector_alignment_reachable
*************** arm_function_arg (CUMULATIVE_ARGS *pcum,
*** 4693,4698 ****
--- 4697,4714 ----
    return gen_rtx_REG (mode, pcum->nregs);
  }
  
+ /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
+ static HOST_WIDE_INT
+ arm_vector_alignment (const_tree type)
+ {
+   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
+ 
+   if (TARGET_AAPCS_BASED)
+     align = MIN (align, 64);
+ 
+   return align;
+ }
+ 
  static unsigned int
  arm_function_arg_boundary (enum machine_mode mode, const_tree type)
  {


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com

Reply via email to