Hi,
  Gently ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647533.html

Thanks
Gui Haochen

在 2024/3/18 17:10, HAO CHEN GUI 写道:
> Hi,
>   Gently ping this:
> https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647533.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/3/11 13:41, HAO CHEN GUI 写道:
>> Hi,
>>   This patch tries to fix the problem when a canonical form doesn't benefit
>> on a specific target. The const operand of AND is and with the nonzero
>> bits of another operand in combine pass. It's a canonical form, but it's no
>> benefits for the target which has rotate and mask insns. As the mask is
>> truncated, it can't match the insn conditions which it originally matches.
>> For example, the following insn condition checks the sum of two AND masks.
>> When one of the mask is truncated, the condition breaks.
>>
>> (define_insn "*rotlsi3_insert_5"
>>   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
>>      (ior:SI (and:SI (match_operand:SI 1 "gpc_reg_operand" "0,r")
>>                      (match_operand:SI 2 "const_int_operand" "n,n"))
>>              (and:SI (match_operand:SI 3 "gpc_reg_operand" "r,0")
>>                      (match_operand:SI 4 "const_int_operand" "n,n"))))]
>>   "rs6000_is_valid_mask (operands[2], NULL, NULL, SImode)
>>    && UINTVAL (operands[2]) != 0 && UINTVAL (operands[4]) != 0
>>    && UINTVAL (operands[2]) + UINTVAL (operands[4]) + 1 == 0"
>> ...
>>
>>   This patch tries to fix the problem by comparing the rtx cost. If another
>> operand (varop) is not changed and rtx cost with new mask is not less than
>> the original one, the mask is restored to original one.
>>
>>   I'm not sure if comparison of rtx cost here is proper. The outer code is
>> unknown and I suppose it as "SET". Also the rtx cost might not be accurate.
>> From my understanding, the canonical forms should always benefit as it can't
>> be undo in combine pass. Do we have a perfect solution for this kind of
>> issues? Looking forward for your advice.
>>
>>   Another similar issues for canonical forms. Whether the widen mode for
>> lshiftrt is always good?
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624852.html
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> Combine: Don't truncate const operand of AND if it's no benefits
>>
>> In combine pass, the canonical form is to turn off all bits in the constant
>> that are know to already be zero for AND.
>>
>>   /* Turn off all bits in the constant that are known to already be zero.
>>      Thus, if the AND isn't needed at all, we will have CONSTOP == 
>> NONZERO_BITS
>>      which is tested below.  */
>>
>>   constop &= nonzero;
>>
>> But it doesn't benefit when the target has rotate and mask insert insns.
>> The AND mask is truncated and lost its information.  Thus it can't match
>> the insn conditions.  For example, the following insn condition checks
>> the sum of two AND masks.
>>
>> (define_insn "*rotlsi3_insert_5"
>>   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
>>      (ior:SI (and:SI (match_operand:SI 1 "gpc_reg_operand" "0,r")
>>                      (match_operand:SI 2 "const_int_operand" "n,n"))
>>              (and:SI (match_operand:SI 3 "gpc_reg_operand" "r,0")
>>                      (match_operand:SI 4 "const_int_operand" "n,n"))))]
>>   "rs6000_is_valid_mask (operands[2], NULL, NULL, SImode)
>>    && UINTVAL (operands[2]) != 0 && UINTVAL (operands[4]) != 0
>>    && UINTVAL (operands[2]) + UINTVAL (operands[4]) + 1 == 0"
>> ...
>>
>> This patch restores the const operand of AND if the another operand is
>> not optimized and the truncated const operand doesn't save the rtx cost.
>>
>> gcc/
>>      * combine.cc (simplify_and_const_int_1): Restore the const operand
>>      of AND if varop is not optimized and the rtx cost of the new const
>>      operand is not reduced.
>>
>> gcc/testsuite/
>>      * gcc.target/powerpc/rlwimi-0.c: Reduced total number of insns and
>>      adjust the number of rotate and mask insns.
>>      * gcc.target/powerpc/rlwimi-1.c: Likewise.
>>      * gcc.target/powerpc/rlwimi-2.c: Likewise.
>>
>> patch.diff
>> diff --git a/gcc/combine.cc b/gcc/combine.cc
>> index a4479f8d836..16ff09ea854 100644
>> --- a/gcc/combine.cc
>> +++ b/gcc/combine.cc
>> @@ -10161,8 +10161,23 @@ simplify_and_const_int_1 (scalar_int_mode mode, rtx 
>> varop,
>>    if (constop == nonzero)
>>      return varop;
>>
>> -  if (varop == orig_varop && constop == orig_constop)
>> -    return NULL_RTX;
>> +  if (varop == orig_varop)
>> +    {
>> +      if (constop == orig_constop)
>> +    return NULL_RTX;
>> +      else
>> +    {
>> +      rtx tmp = simplify_gen_binary (AND, mode, varop,
>> +                                     gen_int_mode (constop, mode));
>> +      rtx orig = simplify_gen_binary (AND, mode, varop,
>> +                                      gen_int_mode (orig_constop, mode));
>> +      if (set_src_cost (tmp, mode, optimize_this_for_speed_p)
>> +          < set_src_cost (orig, mode, optimize_this_for_speed_p))
>> +        return tmp;
>> +      else
>> +        return NULL_RTX;
>> +    }
>> +    }
>>
>>    /* Otherwise, return an AND.  */
>>    return simplify_gen_binary (AND, mode, varop, gen_int_mode (constop, 
>> mode));
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-0.c 
>> b/gcc/testsuite/gcc.target/powerpc/rlwimi-0.c
>> index 961be199901..d9dd4419f1d 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-0.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-0.c
>> @@ -2,15 +2,15 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 16485 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 19909 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 19780 { target lp64 } 
>> } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 3007 { target ilp32 } } } 
>> */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6420 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+mr} 140 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6162 { target lp64 } 
>> } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 6420 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rldimi} 310 { target lp64 } } 
>> } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 6110 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rldimi} 366 { target lp64 } } 
>> } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 6054 { target lp64 } 
>> } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rotlwi} 308 } } */
>>
>>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-1.c 
>> b/gcc/testsuite/gcc.target/powerpc/rlwimi-1.c
>> index e2c607b1d45..d955fc792d9 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-1.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-1.c
>> @@ -2,15 +2,15 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 13977 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20189 { target lp64 } 
>> } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 499 { target ilp32 } } } 
>> */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6728 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+mr} 39 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 6672 { target lp64 } 
>> } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1404 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rldimi} 134 { target lp64 } } 
>> } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1270 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rldimi} 190 { target lp64 } } 
>> } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1214 { target lp64 } 
>> } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5324 } } */
>>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c 
>> b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> index bafa371db73..9abfd7abf08 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> @@ -2,14 +2,14 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 19622 { target lp64 } 
>> } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } 
>> */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+mr} 658 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 5460 { target lp64 } 
>> } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } 
>> } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } 
>> } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1610 { target lp64 } 
>> } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
>>

Reply via email to