Ping

On 03/10/23 3:53 pm, Surya Kumari Jangala wrote:
> Ping
> 
> On 20/09/23 7:31 am, Surya Kumari Jangala wrote:
>> Ping
>>
>> On 10/09/23 10:58 pm, Surya Kumari Jangala wrote:
>>> swap: Fix incorrect lane extraction by vec_extract() [PR106770]
>>>
>>> In the routine rs6000_analyze_swaps(), special handling of swappable
>>> instructions is done even if the webs that contain the swappable
>>> instructions are not optimized, i.e., the webs do not contain any
>>> permuting load/store instructions along with the associated register
>>> swap instructions. Doing special handling in such webs will result in
>>> the extracted lane being adjusted unnecessarily for vec_extract.
>>>
>>> Another issue is that existing code treats non-permuting loads/stores
>>> as special swappables. Non-permuting loads/stores (that have not yet
>>> been split into a permuting load/store and a swap) are handled by
>>> converting them into a permuting load/store (which effectively removes
>>> the swap). As a result, if special swappables are handled only in webs
>>> containing permuting loads/stores, then non-optimal code is generated
>>> for non-permuting loads/stores.
>>>
>>> Hence, in this patch, all webs containing either permuting loads/
>>> stores or non-permuting loads/stores are marked as requiring special
>>> handling of swappables. Swaps associated with permuting loads/stores
>>> are marked for removal, and non-permuting loads/stores are converted to
>>> permuting loads/stores. Then the special swappables in the webs are
>>> fixed up.
>>>
>>> Another issue with always handling swappable instructions is that it is
>>> incorrect to do so in webs where loads/stores on quad word aligned
>>> addresses are changed to lvx/stvx. Similarly, in webs where
>>> swap(load(vector constant)) instructions are replaced with
>>> load(swapped vector constant), the swappable instructions should not be
>>> modified.
>>>
>>> 2023-09-10  Surya Kumari Jangala  <jskum...@linux.ibm.com>
>>>
>>> gcc/
>>>     PR rtl-optimization/PR106770
>>>     * config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New
>>>     function.
>>>     (handle_non_permuting_mem_insn): New function.
>>>     (rs6000_analyze_swaps): Handle swappable instructions only in
>>>     certain webs.
>>>     (web_requires_special_handling): New instance variable.
>>>     (handle_special_swappables): Remove handling of non-permuting
>>>     load/store instructions.
>>>
>>> gcc/testsuite/
>>>     PR rtl-optimization/PR106770
>>>     * gcc.target/powerpc/pr106770.c: New test.
>>> ---
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
>>> b/gcc/config/rs6000/rs6000-p8swap.cc
>>> index 0388b9bd736..3a695aa1318 100644
>>> --- a/gcc/config/rs6000/rs6000-p8swap.cc
>>> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
>>> @@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base
>>>    unsigned int special_handling : 4;
>>>    /* Set if the web represented by this entry cannot be optimized.  */
>>>    unsigned int web_not_optimizable : 1;
>>> +  /* Set if the swappable insns in the web represented by this entry
>>> +     have to be fixed. Swappable insns have to be fixed in :
>>> +       - webs containing permuting loads/stores and the swap insns
>>> +    in such webs have been marked for removal
>>> +       - webs where non-permuting loads/stores have been converted
>>> +    to permuting loads/stores  */
>>> +  unsigned int web_requires_special_handling : 1;
>>>    /* Set if this insn should be deleted.  */
>>>    unsigned int will_delete : 1;
>>>  };
>>> @@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry 
>>> *insn_entry, unsigned i)
>>>        if (dump_file)
>>>     fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
>>>        break;
>>> -    case SH_NOSWAP_LD:
>>> -      /* Convert a non-permuting load to a permuting one.  */
>>> -      permute_load (insn);
>>> -      break;
>>> -    case SH_NOSWAP_ST:
>>> -      /* Convert a non-permuting store to a permuting one.  */
>>> -      permute_store (insn);
>>> -      break;
>>>      case SH_EXTRACT:
>>>        /* Change the lane on an extract operation.  */
>>>        adjust_extract (insn);
>>> @@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun)
>>>    free (to_delete);
>>>  }
>>>  
>>> +/* Return true if insn is a non-permuting load/store.  */
>>> +static bool
>>> +non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
>>> +{
>>> +  return (insn_entry[i].special_handling == SH_NOSWAP_LD ||
>>> +     insn_entry[i].special_handling == SH_NOSWAP_ST);
>>> +}
>>> +
>>> +/* Convert a non-permuting load/store insn to a permuting one.  */
>>> +static void
>>> +handle_non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
>>> +{
>>> +  rtx_insn *insn = insn_entry[i].insn;
>>> +  if (insn_entry[i].special_handling == SH_NOSWAP_LD)
>>> +    permute_load (insn);
>>> +  else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
>>> +    permute_store (insn);
>>> +}
>>> +
>>>  /* Main entry point for this pass.  */
>>>  unsigned int
>>>  rs6000_analyze_swaps (function *fun)
>>> @@ -2624,25 +2642,56 @@ rs6000_analyze_swaps (function *fun)
>>>        dump_swap_insn_table (insn_entry);
>>>      }
>>>  
>>> -  /* For each load and store in an optimizable web (which implies
>>> -     the loads and stores are permuting), find the associated
>>> -     register swaps and mark them for removal.  Due to various
>>> -     optimizations we may mark the same swap more than once.  Also
>>> -     perform special handling for swappable insns that require it.  */
>>> +  /* There are two kinds of optimizations that can be performed on an
>>> +     optimizable web:
>>> +     1. Remove the register swaps associated with permuting load/store
>>> +   in an optimizable web
>>> +     2. Convert the vanilla loads/stores (that have not yet been split
>>> +   into a permuting load/store and a swap) into a permuting
>>> +   load/store (which effectively removes the swap)
>>> +     In both the cases, swappable instructions in the webs need
>>> +     special handling to fix them up.  */
>>>    for (i = 0; i < e; ++i)
>>> +    /* For each permuting load/store in an optimizable web, find
>>> +       the associated register swaps and mark them for removal.
>>> +       Due to various optimizations we may mark the same swap more
>>> +       than once.  */
>>>      if ((insn_entry[i].is_load || insn_entry[i].is_store)
>>>     && insn_entry[i].is_swap)
>>>        {
>>>     swap_web_entry* root_entry
>>>       = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>>     if (!root_entry->web_not_optimizable)
>>> -     mark_swaps_for_removal (insn_entry, i);
>>> +     {
>>> +       mark_swaps_for_removal (insn_entry, i);
>>> +       root_entry->web_requires_special_handling = true;
>>> +     }
>>>        }
>>> -    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
>>> +    /* Convert the non-permuting loads/stores into a permuting
>>> +       load/store.  */
>>> +    else if (insn_entry[i].is_swappable
>>> +        && non_permuting_mem_insn (insn_entry, i))
>>>        {
>>>     swap_web_entry* root_entry
>>>       = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>>     if (!root_entry->web_not_optimizable)
>>> +     {
>>> +       handle_non_permuting_mem_insn (insn_entry, i);
>>> +       root_entry->web_requires_special_handling = true;
>>> +     }
>>> +      }
>>> +
>>> +  /* Perform special handling for swappable insns that require it.
>>> +     Note that special handling should be done only for those
>>> +     swappable insns that are present in webs marked as requiring
>>> +     special handling.  */
>>> +  for (i = 0; i < e; ++i)
>>> +    if (insn_entry[i].is_swappable && insn_entry[i].special_handling &&
>>> +   !non_permuting_mem_insn (insn_entry, i))
>>> +      {
>>> +   swap_web_entry* root_entry
>>> +     = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>> +   if (root_entry->web_requires_special_handling)
>>>       handle_special_swappables (insn_entry, i);
>>>        }
>>>  
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
>>> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>>> new file mode 100644
>>> index 00000000000..11efe39abc5
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>>> @@ -0,0 +1,21 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>> +/* { dg-options "-mdejagnu-cpu=power8 -O2 " } */
>>> +/* The 2 xxpermdi instructions are generated by the two
>>> +   calls to vec_promote() */
>>> +/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
>>> +
>>> +/* Test case to resolve PR106770  */
>>> +
>>> +#include <altivec.h>
>>> +
>>> +int cmp2(double a, double b)
>>> +{
>>> +    vector double va = vec_promote(a, 1);
>>> +    vector double vb = vec_promote(b, 1);
>>> +    vector long long vlt = (vector long long)vec_cmplt(va, vb);
>>> +    vector long long vgt = (vector long long)vec_cmplt(vb, va);
>>> +    vector signed long long vr = vec_sub(vlt, vgt);
>>> +
>>> +    return vec_extract(vr, 1);
>>> +}
>>>

Reply via email to