Ping. Please review the patch.

On 12/01/23 10:21 pm, Surya Kumari Jangala via Gcc-patches wrote:
> Ping
> 
> On 04/01/23 1:58 pm, Surya Kumari Jangala via Gcc-patches wrote:
>> swap: Fix incorrect lane extraction by vec_extract() [PR106770]
>>
>> In the routine rs6000_analyze_swaps(), special handling of swappable
>> instructions is done even if the webs that contain the swappable
>> instructions are not optimized, i.e., the webs do not contain any
>> permuting load/store instructions along with the associated register
>> swap instructions. Doing special handling in such webs will result in
>> the extracted lane being adjusted unnecessarily for vec_extract.
>>
>> Modifying swappable instructions is also incorrect in webs where
>> loads/stores on quad word aligned addresses are changed to lvx/stvx.
>> Similarly, in webs where swap(load(vector constant)) instructions are
>> replaced with load(swapped vector constant), the swappable
>> instructions should not be modified.
>>
>> 2023-01-04  Surya Kumari Jangala  <jskum...@linux.ibm.com>
>>
>> gcc/
>>      PR rtl-optimization/106770
>>      * rs6000-p8swap.cc (rs6000_analyze_swaps): .
>>
>> gcc/testsuite/
>>      PR rtl-optimization/106770
>>      * gcc.target/powerpc/pr106770.c: New test.
>> ---
>>
>> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
>> b/gcc/config/rs6000/rs6000-p8swap.cc
>> index 19fbbfb67dc..7ed39251df9 100644
>> --- a/gcc/config/rs6000/rs6000-p8swap.cc
>> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
>> @@ -179,6 +179,9 @@ class swap_web_entry : public web_entry_base
>>    unsigned int special_handling : 4;
>>    /* Set if the web represented by this entry cannot be optimized.  */
>>    unsigned int web_not_optimizable : 1;
>> +  /* Set if the web represented by this entry has been optimized, ie,
>> +     register swaps of permuting loads/stores have been removed.  */
>> +  unsigned int web_is_optimized : 1;
>>    /* Set if this insn should be deleted.  */
>>    unsigned int will_delete : 1;
>>  };
>> @@ -2627,22 +2630,43 @@ rs6000_analyze_swaps (function *fun)
>>    /* For each load and store in an optimizable web (which implies
>>       the loads and stores are permuting), find the associated
>>       register swaps and mark them for removal.  Due to various
>> -     optimizations we may mark the same swap more than once.  Also
>> -     perform special handling for swappable insns that require it.  */
>> +     optimizations we may mark the same swap more than once. Fix up
>> +     the non-permuting loads and stores by converting them into
>> +     permuting ones.  */
>>    for (i = 0; i < e; ++i)
>>      if ((insn_entry[i].is_load || insn_entry[i].is_store)
>>      && insn_entry[i].is_swap)
>>        {
>>      swap_web_entry* root_entry
>>        = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>> -    if (!root_entry->web_not_optimizable)
>> +    if (!root_entry->web_not_optimizable) {
>>        mark_swaps_for_removal (insn_entry, i);
>> +          root_entry->web_is_optimized = true;
>> +        }
>>        }
>> -    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
>> +    else if (insn_entry[i].is_swappable
>> +             && (insn_entry[i].special_handling == SH_NOSWAP_LD ||
>> +                 insn_entry[i].special_handling == SH_NOSWAP_ST))
>> +      {
>> +        swap_web_entry* root_entry
>> +          = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>> +        if (!root_entry->web_not_optimizable) {
>> +          handle_special_swappables (insn_entry, i);
>> +          root_entry->web_is_optimized = true;
>> +        }
>> +      }
>> +
>> +  /* Perform special handling for swappable insns that require it. 
>> +     Note that special handling should be done only for those 
>> +     swappable insns that are present in webs optimized above.  */
>> +  for (i = 0; i < e; ++i)
>> +    if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
>> +        !(insn_entry[i].special_handling == SH_NOSWAP_LD || 
>> +          insn_entry[i].special_handling == SH_NOSWAP_ST))
>>        {
>>      swap_web_entry* root_entry
>>        = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>> -    if (!root_entry->web_not_optimizable)
>> +    if (root_entry->web_is_optimized)
>>        handle_special_swappables (insn_entry, i);
>>        }
>>  
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> new file mode 100644
>> index 00000000000..84e9aead975
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power8 -O3 " } */
>> +/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
>> +
>> +/* Test case to resolve PR106770  */
>> +
>> +#include <altivec.h>
>> +
>> +int cmp2(double a, double b)
>> +{
>> +    vector double va = vec_promote(a, 1);
>> +    vector double vb = vec_promote(b, 1);
>> +    vector long long vlt = (vector long long)vec_cmplt(va, vb);
>> +    vector long long vgt = (vector long long)vec_cmplt(vb, va);
>> +    vector signed long long vr = vec_sub(vlt, vgt);
>> +
>> +    return vec_extract(vr, 1);
>> +}
>> +

Reply via email to