Richard Biener <richard.guent...@gmail.com> writes:
> On Fri, Jul 20, 2018 at 12:57 PM Richard Sandiford
> <richard.sandif...@arm.com> wrote:
>>
>> We could vectorise:
>>
>>      for (...)
>>        {
>>          a[0] = ...;
>>          a[1] = ...;
>>          a[2] = ...;
>>          a[3] = ...;
>>          a += stride;
>>        }
>>
>> (including the case when stride == 8) but not:
>>
>>      for (...)
>>        {
>>          a[0] = ...;
>>          a[1] = ...;
>>          a[2] = ...;
>>          a[3] = ...;
>>          a += 8;
>>        }
>>
>> (where the stride is always 8).  The former was treated as a "grouped
>> and strided" store, while the latter was treated as grouped store with
>> gaps, which we don't support.
>>
>> This patch makes us treat groups of stores with gaps at the end as
>> strided groups too.  I tried to go through all uses of STMT_VINFO_STRIDED_P
>> and all vector uses of DR_STEP to see whether there were any hard-baked
>> assumptions, but couldn't see any.  I wondered whether we should relax:
>>
>>   /* We do not have to consider dependences between accesses that belong
>>      to the same group, unless the stride could be smaller than the
>>      group size.  */
>>   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
>>       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
>>           == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
>>       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
>>     return false;
>>
>> for cases in which the step is constant and the absolute step is known
>> to be greater than the group size, but data dependence analysis should
>> already return chrec_known for those cases.
>>
>> The new test is a version of vect-avg-15.c with the variable step
>> replaced by a constant one.
>>
>> A natural follow-on would be to do the same for groups with gaps in
>> the middle:
>>
>>           /* Check that the distance between two accesses is equal to the 
>> type
>>              size. Otherwise, we have gaps.  */
>>           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
>>                   - TREE_INT_CST_LOW (prev_init)) / type_size;
>>           if (diff != 1)
>>             {
>>               [...]
>>               if (DR_IS_WRITE (data_ref))
>>                 {
>>                   if (dump_enabled_p ())
>>                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>                                      "interleaved store with gaps\n");
>>                   return false;
>>                 }
>>
>> But I think we should do that separately and see what the fallout
>> from this change is first.
>
> Agreed.
>
>> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
>> and x86_64-linux-gnu.  OK to install?
>
> Do you need to set STMT_VINFO_STRIDED_P on all stmts?  I think
> it is enough to set it on the first group element.

get_load_store_type tests STMT_VINFO_STRIDED_P on the stmt it's
given rather than the first element, but I guess that's my bug...

I'm hoping this weekend to finally finish off the series to get
rid of vinfo_for_stmt, which will make it slightly easier to do this.

Thanks,
Richard

> OK otherwise.
> Thanks,
> Richard.
>
>> Richard
>>
>>
>> 2018-07-20  Richard Sandiford  <richard.sandif...@arm.com>
>>
>> gcc/
>>         * tree-vect-data-refs.c (vect_analyze_group_access_1): Convert
>>         grouped stores with gaps to a strided group.
>>
>> gcc/testsuite/
>>         * gcc.dg/vect/vect-avg-16.c: New test.
>>         * gcc.dg/vect/slp-37.c: Expect the loop to be vectorized.
>>         * gcc.dg/vect/vect-strided-u8-i8-gap4.c,
>>         * gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Likewise for
>>         the second loop in main1.
>>
>> Index: gcc/tree-vect-data-refs.c
>> ===================================================================
>> --- gcc/tree-vect-data-refs.c   2018-06-30 13:44:38.567611988 +0100
>> +++ gcc/tree-vect-data-refs.c   2018-07-20 11:55:22.570911497 +0100
>> @@ -2632,10 +2632,14 @@ vect_analyze_group_access_1 (struct data
>>        if (groupsize != count
>>           && !DR_IS_READ (dr))
>>          {
>> -         if (dump_enabled_p ())
>> -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> -                            "interleaved store with gaps\n");
>> -         return false;
>> +         groupsize = count;
>> +         STMT_VINFO_STRIDED_P (stmt_info) = true;
>> +         stmt_vec_info next_info = stmt_info;
>> +         while (DR_GROUP_NEXT_ELEMENT (next_info))
>> +           {
>> +             next_info = vinfo_for_stmt (DR_GROUP_NEXT_ELEMENT (next_info));
>> +             STMT_VINFO_STRIDED_P (next_info) = 1;
>> +           }
>>         }
>>
>>        /* If there is a gap after the last load in the group it is the
>> @@ -2651,6 +2655,8 @@ vect_analyze_group_access_1 (struct data
>>                            "Detected interleaving ");
>>           if (DR_IS_READ (dr))
>>             dump_printf (MSG_NOTE, "load ");
>> +         else if (STMT_VINFO_STRIDED_P (stmt_info))
>> +           dump_printf (MSG_NOTE, "strided store ");
>>           else
>>             dump_printf (MSG_NOTE, "store ");
>>           dump_printf (MSG_NOTE, "of size %u starting with ",
>> Index: gcc/testsuite/gcc.dg/vect/vect-avg-16.c
>> ===================================================================
>> --- /dev/null   2018-07-09 14:52:09.234750850 +0100
>> +++ gcc/testsuite/gcc.dg/vect/vect-avg-16.c     2018-07-20 
>> 11:55:22.570911497 +0100
>> @@ -0,0 +1,52 @@
>> +/* { dg-additional-options "-O3" } */
>> +/* { dg-require-effective-target vect_int } */
>> +
>> +#include "tree-vect.h"
>> +
>> +#define N 80
>> +
>> +void __attribute__ ((noipa))
>> +f (signed char *restrict a, signed char *restrict b,
>> +   signed char *restrict c, int n)
>> +{
>> +  for (int j = 0; j < n; ++j)
>> +    {
>> +      for (int i = 0; i < 16; ++i)
>> +       a[i] = (b[i] + c[i]) >> 1;
>> +      a += 20;
>> +      b += 20;
>> +      c += 20;
>> +    }
>> +}
>> +
>> +#define BASE1 -126
>> +#define BASE2 -42
>> +
>> +signed char a[N], b[N], c[N];
>> +
>> +int
>> +main (void)
>> +{
>> +  check_vect ();
>> +
>> +  for (int i = 0; i < N; ++i)
>> +    {
>> +      a[i] = i;
>> +      b[i] = BASE1 + i * 3;
>> +      c[i] = BASE2 + i * 2;
>> +      asm volatile ("" ::: "memory");
>> +    }
>> +  f (a, b, c, N / 20);
>> +  for (int i = 0; i < N; ++i)
>> +    {
>> +      int d = (BASE1 + BASE2 + i * 5) >> 1;
>> +      if (a[i] != (i % 20 < 16 ? d : i))
>> +       __builtin_abort ();
>> +    }
>> +  return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" 
>> "vect" } } */
>> +/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } 
>> } } */
>> +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { 
>> target vect_avg_qi } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target 
>> vect_avg_qi } } } */
>> Index: gcc/testsuite/gcc.dg/vect/slp-37.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/vect/slp-37.c  2018-05-02 08:37:49.033604262 +0100
>> +++ gcc/testsuite/gcc.dg/vect/slp-37.c  2018-07-20 11:55:22.570911497 +0100
>> @@ -17,8 +17,8 @@ foo1 (s1 *arr)
>>    int i;
>>    s1 *ptr = arr;
>>
>> -  /* Different constant types - not SLPable.  The group size is not power 
>> of 2,
>> -     interleaving is not supported either.  */
>> +  /* Vectorized as a strided SLP pair of accesses to <a, b> and a single
>> +     strided access to c.  */
>>    for (i = 0; i < N; i++)
>>      {
>>        ptr->a = 6;
>> @@ -58,6 +58,5 @@ int main (void)
>>    return 0;
>>  }
>>
>> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect"  } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" 
>>  } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" 
>>  } } */
>> Index: gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c 2018-05-02 
>> 08:37:49.021604375 +0100
>> +++ gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c 2018-07-20 
>> 11:55:22.570911497 +0100
>> @@ -53,7 +53,7 @@ main1 (s *arr)
>>     }
>>
>>    ptr = arr;
>> -  /* Not vectorizable: gap in store. */
>> +  /* Vectorized as a strided SLP pair.  */
>>    for (i = 0; i < N; i++)
>>      {
>>        res[i].a = ptr->b;
>> @@ -97,5 +97,4 @@ int main (void)
>>    return 0;
>>  }
>>
>> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
>> vect_strided8 } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target 
>> vect_strided8 } } } */
>> Index: gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c       
>> 2018-05-02 08:37:49.013604450 +0100
>> +++ gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c       
>> 2018-07-20 11:55:22.570911497 +0100
>> @@ -55,7 +55,7 @@ main1 (s *arr)
>>     }
>>
>>    ptr = arr;
>> -  /* Not vectorizable: gap in store.  */
>> +  /* Vectorized as a strided SLP pair.  */
>>    for (i = 0; i < N; i++)
>>      {
>>        res[i].a = ptr->b;
>> @@ -110,5 +110,4 @@ int main (void)
>>    return 0;
>>  }
>>
>> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
>> vect_strided8 } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target 
>> vect_strided8 } } } */

Reply via email to