[PATCH, rs6000 v2] Make load cost more in vectorization cost for P8/P9

Kewen.Lin Wed, 06 Nov 2019 19:23:31 -0800

Hi Segher,

on 2019/11/7 上午1:38, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Nov 05, 2019 at 10:14:46AM +0800, Kewen.Lin wrote:
>>>> +     benefits were observed on Power8 and up, we can unify it if similar
>>>> +     profits are measured on Power6 and Power7.  */
>>>> +  if (TARGET_P8_VECTOR)
>>>> +    return 2;
>>>> +  else
>>>> +    return 1;
>>>
>>> Hrm, but you showed benchmark improvements for p9 as well?
>>>
>>
>> No significant gains but no degradation as well, so I thought it's fine to 
>> align
>> it together.  Does it make sense?
> 
> It's a bit strange at this point to do tunings for p8 that do we do not
> do for later cpus.
> 
>>> What happens if you enable this for everything as well?
>>
>> My concern was that if we enable it for everything, it's possible to 
>> introduce
>> degradation for some benchmarks on P6 or P7 where we didn't evaluate the
>> performance impact.
> 
> No one cares about p6.


OK.  :)

> 
> We reasonably expect it will work just as well on p7 as on p8 and later.
> That you haven't tested on p7 yet says something about how important that
> platform is now ;-)
> 

Yes, exactly.

>> Although it's reasonable from the point view of load latency,
>> it's possible to get worse result in the actual benchmarks based on my fine 
>> grain
>> cost adjustment experiment before.  
>>
>> Or do you suggest enabling it everywhere and solve the degradation issue if 
>> exposed?
>> I'm also fine with that.  :)
> 
> Yeah, let's just enable it everywhere.

One updated patch to enable it everywhere attached.


BR,
Kewen

-------------------------------------------
gcc/ChangeLog

2019-11-07  Kewen Lin  <li...@gcc.gnu.org>

        * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Make
        scalar_load, vector_load, unaligned_load and vector_gather_load cost
        more to conform hardware latency and insn cost settings.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5876714..1094fbd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4763,15 +4763,17 @@ rs6000_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
   switch (type_of_cost)
     {
       case scalar_stmt:
-      case scalar_load:
       case scalar_store:
       case vector_stmt:
-      case vector_load:
       case vector_store:
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
         return 1;
+      case scalar_load:
+      case vector_load:
+       /* Like rs6000_insn_cost, make load insns cost a bit more.  */
+         return 2;
 
       case vec_perm:
        /* Power7 has only one permute unit, make it a bit expensive.  */
@@ -4792,42 +4794,44 @@ rs6000_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
 
       case unaligned_load:
       case vector_gather_load:
+       /* Like rs6000_insn_cost, make load insns cost a bit more.  */
        if (TARGET_EFFICIENT_UNALIGNED_VSX)
-         return 1;
-
-        if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
-          {
-            elements = TYPE_VECTOR_SUBPARTS (vectype);
-            if (elements == 2)
-              /* Double word aligned.  */
-              return 2;
-
-            if (elements == 4)
-              {
-                switch (misalign)
-                  {
-                    case 8:
-                      /* Double word aligned.  */
-                      return 2;
+         return 2;
 
-                    case -1:
-                      /* Unknown misalignment.  */
-                    case 4:
-                    case 12:
-                      /* Word aligned.  */
-                      return 22;
+       if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+         {
+           elements = TYPE_VECTOR_SUBPARTS (vectype);
+           if (elements == 2)
+             /* Double word aligned.  */
+             return 4;
 
-                    default:
-                      gcc_unreachable ();
-                  }
-              }
-          }
+           if (elements == 4)
+             {
+               switch (misalign)
+                 {
+                 case 8:
+                   /* Double word aligned.  */
+                   return 4;
+
+                 case -1:
+                   /* Unknown misalignment.  */
+                 case 4:
+                 case 12:
+                   /* Word aligned.  */
+                   return 44;
+
+                 default:
+                   gcc_unreachable ();
+                 }
+             }
+         }
 
-        if (TARGET_ALTIVEC)
-          /* Misaligned loads are not supported.  */
-          gcc_unreachable ();
+       if (TARGET_ALTIVEC)
+         /* Misaligned loads are not supported.  */
+         gcc_unreachable ();
 
-        return 2;
+       /* Like rs6000_insn_cost, make load insns cost a bit more.  */
+       return 4;
 
       case unaligned_store:
       case vector_scatter_store:

[PATCH, rs6000 v2] Make load cost more in vectorization cost for P8/P9

Reply via email to