Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-11-16 Thread HAO CHEN GUI via Gcc-patches
Bill,

    Sorry, I mixed up the patches. There is one vec_reve patch which hasn't 
gotten approval for a long time. I will re-send it.  Thanks a lot.

On 16/11/2021 下午 9:10, Bill Schmidt wrote:
> Hi Hao Chen,
>
> I don't understand.  This patch was already approved and you committed it. 
> :-)  I know
> because I needed to make corresponding adjustments to the new builtins code.
>
> Thanks,
> Bill
>
> On 11/15/21 8:16 PM, HAO CHEN GUI wrote:
>> Hi,
>>
>>    The patch optimizes the code generation for vec_xl_sext builtin. Now all 
>> the sign extensions are done on VSX registers directly.
>>
>>    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
>> okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>>
>> 2021-11-16 Haochen Gui 
>>
>> gcc/
>>     * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
>>     the expansion for sign extension. All extensions are done on VSX
>>     registers.
>>
>> gcc/testsuite/
>>     * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>
>> patch.diff
>>
>> diff --git a/gcc/config/rs6000/rs6000-call.c 
>> b/gcc/config/rs6000/rs6000-call.c
>> index b4e13af4dc6..587e9fa2a2a 100644
>> --- a/gcc/config/rs6000/rs6000-call.c
>> +++ b/gcc/config/rs6000/rs6000-call.c
>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
>> tree exp, rtx target, bool bl
>>
>>    if (sign_extend)
>>  {
>> -  rtx discratch = gen_reg_rtx (DImode);
>> +  rtx discratch = gen_reg_rtx (V2DImode);
>>    rtx tiscratch = gen_reg_rtx (TImode);
>>
>>    /* Emit the lxvr*x insn.  */
>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
>> tree exp, rtx target, bool bl
>>     return 0;
>>    emit_insn (pat);
>>
>> -  /* Emit a sign extension from QI,HI,WI to double (DI).  */
>> -  rtx scratch = gen_lowpart (smode, tiscratch);
>> +  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
>> +  rtx temp1, temp2;
>>    if (icode == CODE_FOR_vsx_lxvrbx)
>> -   emit_insn (gen_extendqidi2 (discratch, scratch));
>> +   {
>> + temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>> +   }
>>    else if (icode == CODE_FOR_vsx_lxvrhx)
>> -   emit_insn (gen_extendhidi2 (discratch, scratch));
>> +   {
>> + temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>> +   }
>>    else if (icode == CODE_FOR_vsx_lxvrwx)
>> -   emit_insn (gen_extendsidi2 (discratch, scratch));
>> -  /*  Assign discratch directly if scratch is already DI.  */
>> -  if (icode == CODE_FOR_vsx_lxvrdx)
>> -   discratch = scratch;
>> +   {
>> + temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>> +   }
>> +  else if (icode == CODE_FOR_vsx_lxvrdx)
>> +   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>> +  else
>> +   gcc_unreachable ();
>>
>> -  /* Emit the sign extension from DI (double) to TI (quad).  */
>> -  emit_insn (gen_extendditi2 (target, discratch));
>> +  /* Emit the sign extension from V2DI (double) to TI (quad).  */
>> +  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>> +  emit_insn (gen_extendditi2_vector (target, temp2));
>>
>>    return target;
>>  }
>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> new file mode 100644
>> index 000..78e72ac5425
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>> @@ -0,0 +1,35 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target int128 } */
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>> +
>> +#include 
>> +
>> +vector signed __int128
>> +foo1 (signed long a, signed char *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo2 (signed long a, signed short *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo3 (signed long a, signed int *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +vector signed __int128
>> +foo4 (signed long a, signed long *b)
>> +{
>> +  return vec_xl_sext (a, b);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
>> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>>


Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-11-16 Thread Bill Schmidt via Gcc-patches
Hi Hao Chen,

I don't understand.  This patch was already approved and you committed it. :-)  
I know
because I needed to make corresponding adjustments to the new builtins code.

Thanks,
Bill

On 11/15/21 8:16 PM, HAO CHEN GUI wrote:
> Hi,
>
>    The patch optimizes the code generation for vec_xl_sext builtin. Now all 
> the sign extensions are done on VSX registers directly.
>
>    Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
> okay for trunk? Any recommendations? Thanks a lot.
>
> ChangeLog
>
> 2021-11-16 Haochen Gui 
>
> gcc/
>     * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
>     the expansion for sign extension. All extensions are done on VSX
>     registers.
>
> gcc/testsuite/
>     * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
> exp, rtx target, bool bl
>
>    if (sign_extend)
>  {
> -  rtx discratch = gen_reg_rtx (DImode);
> +  rtx discratch = gen_reg_rtx (V2DImode);
>    rtx tiscratch = gen_reg_rtx (TImode);
>
>    /* Emit the lxvr*x insn.  */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
> tree exp, rtx target, bool bl
>     return 0;
>    emit_insn (pat);
>
> -  /* Emit a sign extension from QI,HI,WI to double (DI).  */
> -  rtx scratch = gen_lowpart (smode, tiscratch);
> +  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
> +  rtx temp1, temp2;
>    if (icode == CODE_FOR_vsx_lxvrbx)
> -   emit_insn (gen_extendqidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrhx)
> -   emit_insn (gen_extendhidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrwx)
> -   emit_insn (gen_extendsidi2 (discratch, scratch));
> -  /*  Assign discratch directly if scratch is already DI.  */
> -  if (icode == CODE_FOR_vsx_lxvrdx)
> -   discratch = scratch;
> +   {
> + temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> +   }
> +  else if (icode == CODE_FOR_vsx_lxvrdx)
> +   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> +  else
> +   gcc_unreachable ();
>
> -  /* Emit the sign extension from DI (double) to TI (quad).  */
> -  emit_insn (gen_extendditi2 (target, discratch));
> +  /* Emit the sign extension from V2DI (double) to TI (quad).  */
> +  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> +  emit_insn (gen_extendditi2_vector (target, temp2));
>
>    return target;
>  }
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include 
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>


[PATCH, rs6000] Optimization for vec_xl_sext

2021-11-15 Thread HAO CHEN GUI via Gcc-patches
Hi,

   The patch optimizes the code generation for vec_xl_sext builtin. Now all the 
sign extensions are done on VSX registers directly.

   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.

ChangeLog

2021-11-16 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): Modify
    the expansion for sign extension. All extensions are done on VSX
    registers.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl

   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad).  */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
 
   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);
 
   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
return 0;
   emit_insn (pat);
 
-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, 

Re: PATCH, rs6000] Optimization for vec_xl_sext

2021-10-19 Thread HAO CHEN GUI via Gcc-patches

Committed as r12-4494. Thanks to all of you.

Gui Haochen

On 15/10/2021 上午 2:53, David Edelsohn wrote:

On Thu, Oct 14, 2021 at 2:17 AM HAO CHEN GUI  wrote:

Hi,

The patch optimizes the code generation for vec_xl_sext builtin. Now all 
the sign extensions are done on VSX registers directly.

Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.

I refined the patch according to Bill and David's advice. I put the 
patch.diff and ChangeLog in attachment also in case the indentation doesn't 
show correctly in email body.


ChangeLog

2021-10-11 Haochen Gui 


gcc/

* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

Modify the expansion for sign extension. All extensions are done

within VSX registers.


gcc/testsuite/

* gcc.target/powerpc/p10_vec_xl_sext.c: New test.

This is okay.

Thanks, David


Re: PATCH, rs6000] Optimization for vec_xl_sext

2021-10-14 Thread David Edelsohn via Gcc-patches
On Thu, Oct 14, 2021 at 2:17 AM HAO CHEN GUI  wrote:
>
> Hi,
>
>The patch optimizes the code generation for vec_xl_sext builtin. Now all 
> the sign extensions are done on VSX registers directly.
>
>Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
> okay for trunk? Any recommendations? Thanks a lot.
>
>I refined the patch according to Bill and David's advice. I put the 
> patch.diff and ChangeLog in attachment also in case the indentation doesn't 
> show correctly in email body.
>
>
> ChangeLog
>
> 2021-10-11 Haochen Gui 
>
>
> gcc/
>
> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>
> Modify the expansion for sign extension. All extensions are done
>
> within VSX registers.
>
>
> gcc/testsuite/
>
> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

This is okay.

Thanks, David


Re: PATCH, rs6000] Optimization for vec_xl_sext

2021-10-14 Thread Bill Schmidt via Gcc-patches
Hi Haochen,

The patch looks okay to me now.  Will defer to David for final call. :-)

Thanks!
Bill

On 10/14/21 1:17 AM, HAO CHEN GUI wrote:
> Hi,
>
>   The patch optimizes the code generation for vec_xl_sext builtin. Now all 
> the sign extensions are done on VSX registers directly.
>
>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
> okay for trunk? Any recommendations? Thanks a lot.
>
>   I refined the patch according to Bill and David's advice. I put the 
> patch.diff and ChangeLog in attachment also in case the indentation doesn't 
> show correctly in email body.
>
>
> ChangeLog
>
> 2021-10-11 Haochen Gui 
>
>
> gcc/
>
> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>
> Modify the expansion for sign extension. All extensions are done
>
> within VSX registers.
>
>
> gcc/testsuite/
>
> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
> exp, rtx target, bool bl
>
>    if (sign_extend)
>  {
> -  rtx discratch = gen_reg_rtx (DImode);
> +  rtx discratch = gen_reg_rtx (V2DImode);
>    rtx tiscratch = gen_reg_rtx (TImode);
>
>    /* Emit the lxvr*x insn.  */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
> tree exp, rtx target, bool bl
>     return 0;
>    emit_insn (pat);
>
> -  /* Emit a sign extension from QI,HI,WI to double (DI).  */
> -  rtx scratch = gen_lowpart (smode, tiscratch);
> +  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
> +  rtx temp1, temp2;
>    if (icode == CODE_FOR_vsx_lxvrbx)
> -   emit_insn (gen_extendqidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrhx)
> -   emit_insn (gen_extendhidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrwx)
> -   emit_insn (gen_extendsidi2 (discratch, scratch));
> -  /*  Assign discratch directly if scratch is already DI.  */
> -  if (icode == CODE_FOR_vsx_lxvrdx)
> -   discratch = scratch;
> +   {
> + temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> +   }
> +  else if (icode == CODE_FOR_vsx_lxvrdx)
> +   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> +  else
> +   gcc_unreachable ();
>
> -  /* Emit the sign extension from DI (double) to TI (quad). */
> -  emit_insn (gen_extendditi2 (target, discratch));
> +  /* Emit the sign extension from V2DI (double) to TI (quad).  */
> +  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> +  emit_insn (gen_extendditi2_vector (target, temp2));
>
>    return target;
>  }
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include 
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */


PATCH, rs6000] Optimization for vec_xl_sext

2021-10-14 Thread HAO CHEN GUI via Gcc-patches

Hi,

  The patch optimizes the code generation for vec_xl_sext builtin. Now all the 
sign extensions are done on VSX registers directly.

  Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.

  I refined the patch according to Bill and David's advice. I put the 
patch.diff and ChangeLog in attachment also in case the indentation doesn't 
show correctly in email body.


ChangeLog

2021-10-11 Haochen Gui 


gcc/

* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):

Modify the expansion for sign extension. All extensions are done

within VSX registers.


gcc/testsuite/

* gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl

   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
2021-10-11 Haochen Gui 

gcc/
* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
Modify the expansion for sign extension. All extensions are done
within VSX registers.

gcc/testsuite/
* gcc.target/powerpc/p10_vec_xl_sext.c: New test.

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
 
   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);
 
   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, 

Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-10-13 Thread David Edelsohn via Gcc-patches
>> gcc/
>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>> Modify the expansion for sign extension. All extentions are done
>> within VSX resgisters.
>
> Two typos here:  extentions => extensions, resgisters => registers.

This is okay with Bill's comments addressed.

Thanks, David


Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-10-12 Thread Bill Schmidt via Gcc-patches
Hi Hao Chen,

On 9/15/21 2:35 AM, HAO CHEN GUI wrote:
> Bill,
>
>     Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass. 
> Thanks again for your kindly explanation.
>
>     I finally realized that the line wrap settings on my thunderbird didn't 
> take any effect. I have to set a very large line size,  just for a workaround.
>
> ChangeLog
>
> 2021-09-15 Haochen Gui 
>
> gcc/
>     * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>     Modify the expansion for sign extension. All extentions are done
>     within VSX resgisters.

Two typos here:  extentions => extensions, resgisters => registers.

The patch itself looks good to me.  I recommend the maintainers approve with 
the ChangeLog fixed.

Thanks!
Bill
>     * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>
> gcc/testsuite/
>     * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
> exp, rtx target, bool bl
>
>    if (sign_extend)
>  {
> -  rtx discratch = gen_reg_rtx (DImode);
> +  rtx discratch = gen_reg_rtx (V2DImode);
>    rtx tiscratch = gen_reg_rtx (TImode);
>
>    /* Emit the lxvr*x insn.  */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
> tree exp, rtx target, bool bl
>     return 0;
>    emit_insn (pat);
>
> -  /* Emit a sign extension from QI,HI,WI to double (DI).  */
> -  rtx scratch = gen_lowpart (smode, tiscratch);
> +  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
> +  rtx temp1, temp2;
>    if (icode == CODE_FOR_vsx_lxvrbx)
> -   emit_insn (gen_extendqidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrhx)
> -   emit_insn (gen_extendhidi2 (discratch, scratch));
> +   {
> + temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> +   }
>    else if (icode == CODE_FOR_vsx_lxvrwx)
> -   emit_insn (gen_extendsidi2 (discratch, scratch));
> -  /*  Assign discratch directly if scratch is already DI.  */
> -  if (icode == CODE_FOR_vsx_lxvrdx)
> -   discratch = scratch;
> +   {
> + temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> +   }
> +  else if (icode == CODE_FOR_vsx_lxvrdx)
> +   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> +  else
> +   gcc_unreachable ();
>
> -  /* Emit the sign extension from DI (double) to TI (quad). */
> -  emit_insn (gen_extendditi2 (target, discratch));
> +  /* Emit the sign extension from V2DI (double) to TI (quad).  */
> +  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> +  emit_insn (gen_extendditi2_vector (target, temp2));
>
>    return target;
>  }
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index bcb92be2f5c..987f21bbc22 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
>    "vextsh2 %0,%1"
>    [(set_attr "type" "vecexts")])
>
> -(define_insn "*vsx_sign_extend_si_v2di"
> +(define_insn "vsx_sign_extend_si_v2di"
>    [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>     (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>  UNSPEC_VSX_SIGN_EXTEND))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include 
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times 

[PATCH, rs6000] Optimization for vec_xl_sext

2021-10-11 Thread HAO CHEN GUI via Gcc-patches

Hi,

   The patch optimized the code generation for vec_xl_sext builtin. Now all the 
sign extensions are done on VSX registers directly.

   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.


ChangeLog

2021-10-11 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
    Modify the expansion for sign extension. All extentions are done
    within VSX resgisters.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl

   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */



PING^1 [PATCH, rs6000] Optimization for vec_xl_sext

2021-09-22 Thread HAO CHEN GUI via Gcc-patches

Hi,

  Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579434.html

Thanks

On 15/9/2021 下午 3:35, HAO CHEN GUI wrote:

Bill,

    Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass. 
Thanks again for your kindly explanation.

    I finally realized that the line wrap settings on my thunderbird didn't 
take any effect. I have to set a very large line size,  just for a workaround.

ChangeLog

2021-09-15 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
    Modify the expansion for sign extension. All extentions are done
    within VSX resgisters.
    * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl

   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
   "vextsh2 %0,%1"
   [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
    (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
 UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */


On 10/9/2021 下午 8:18, Bill Schmidt wrote:



On 9/10/21 12:45 AM, HAO CHEN GUI wrote:

Bill,

    Thanks so much for your advice.

    I refined the patch and passed the bootstrap and regression test.
Just one thing, the test case becomes unsupported on P9 

Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-09-15 Thread HAO CHEN GUI via Gcc-patches

Bill,

    Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass. 
Thanks again for your kindly explanation.

    I finally realized that the line wrap settings on my thunderbird didn't 
take any effect. I have to set a very large line size,  just for a workaround.

ChangeLog

2021-09-15 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
    Modify the expansion for sign extension. All extentions are done
    within VSX resgisters.
    * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl

   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
exp, rtx target, bool bl
    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
   "vextsh2 %0,%1"
   [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
    (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
 UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 000..78e72ac5425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */


On 10/9/2021 下午 8:18, Bill Schmidt wrote:



On 9/10/21 12:45 AM, HAO CHEN GUI wrote:

Bill,

    Thanks so much for your advice.

    I refined the patch and passed the bootstrap and regression test.
Just one thing, the test case becomes unsupported on P9 if I set "{
dg-require-effective-target power10_ok }". I just want the test case to
be compiled and check its assembly. Do we need set "power10_ok"?

Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-09-10 Thread Bill Schmidt via Gcc-patches



On 9/10/21 12:45 AM, HAO CHEN GUI wrote:

Bill,

    Thanks so much for your advice.

    I refined the patch and passed the bootstrap and regression test.
Just one thing, the test case becomes unsupported on P9 if I set "{
dg-require-effective-target power10_ok }". I just want the test case to
be compiled and check its assembly. Do we need set "power10_ok"?


Yes.  power10_ok tests whether your toolchain supports *assembly* of p10 
instructions.  It isn't unsupported on P9 per se; it's unsupported if 
the toolchain you're using on P9 doesn't have binutils support for P10.


You can look at gcc/testsuite/lib/target-supports.exp to see how this 
stuff works.  In this case we have:


proc check_effective_target_power10_ok { } {
if { ([istarget powerpc64*-*-linux*]) } {
return [check_no_compiler_messages power10_ok object {
int main (void) {
long e;
asm ("pli %0,%1" : "=r" (e) : "n" (0x12345));
return e;
}
} "-mcpu=power10"]
} else {
return 0
}
}



    I tried to disable line wrap in my email editor. Please let me know
if you still see line wrap. Thanks.


Thanks, but unfortunately it's still broken as I still see:

@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl

This should all be on one line.  The headers show:

Content-Type: text/plain; charset=utf-8; format=flowed

Not sure what mailer you're using, but often you can avoid problems by 
marking the patch portion as untouchable in some way. In Thunderbird, 
you can set it to Preformat, as an example.  Then you can get line wrap 
on the non-patch portions if you prefer that.




ChangeLog

2021-09-10 Haochen Gui 

gcc/
      * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
      Modify the expansion for sign extension. All extentions are done
      within VSX resgisters.
      * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
      * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl

     if (sign_extend)
   {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
     rtx tiscratch = gen_reg_rtx (TImode);

     /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
      return 0;
     emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
     if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
     else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
     else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

     return target;
   }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
     "vextsh2 %0,%1"
     [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
     [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
      (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
   UNSPEC_VSX_SIGN_EXTEND))]
diff --git 

Re: [PATCH, rs6000] Optimization for vec_xl_sext

2021-09-09 Thread HAO CHEN GUI via Gcc-patches

Bill,

  Thanks so much for your advice.

  I refined the patch and passed the bootstrap and regression test. 
Just one thing, the test case becomes unsupported on P9 if I set "{ 
dg-require-effective-target power10_ok }". I just want the test case to 
be compiled and check its assembly. Do we need set "power10_ok"?


  I tried to disable line wrap in my email editor. Please let me know 
if you still see line wrap. Thanks.


ChangeLog

2021-09-10 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
    Modify the expansion for sign extension. All extentions are done
    within VSX resgisters.
    * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c 
b/gcc/config/rs6000/rs6000-call.c

index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
tree exp, rtx target, bool bl


   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code 
icode, tree exp, rtx target, bool bl

    return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-   emit_insn (gen_extendqidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-   emit_insn (gen_extendhidi2 (discratch, scratch));
+   {
+ temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+   }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-   emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-   discratch = scratch;
+   {
+ temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+   }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+   discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+   gcc_unreachable ();

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
   "vextsh2 %0,%1"
   [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
    (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
 UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c

new file mode 100644
index 000..fcecc542d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */


On 10/9/2021 上午 4:49, Bill Schmidt wrote:

Hi Haochen,

This patch was sent with "format=flowed", so it doesn't apply. That 
makes it harder to review.  Can you please make sure you disable line 
wrap from your patch submissions, at least in the patch part?


On 9/6/21 1:18 AM, HAO CHEN GUI wrote:

Hi,

 

[PATCH, rs6000] Optimization for vec_xl_sext

2021-09-06 Thread HAO CHEN GUI via Gcc-patches

Hi,

   The patch optimized the code generation for vec_xl_sext builtin. Now 
all the sign extensions are done on VSX registers directly.


   Bootstrapped and tested on powerpc64le-linux with no regressions. Is 
this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog

2021-09-06 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
    Modify the expansion for sign extension.
    * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
    * gcc.target/powerpc/p10_vec_xl_sext.c: New test.


patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c 
b/gcc/config/rs6000/rs6000-call.c

index b4e13af4dc6..54fdaf47be3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
tree exp, rtx target, bool bl


   if (sign_extend)
 {
-  rtx discratch = gen_reg_rtx (DImode);
+  rtx discratch = gen_reg_rtx (V2DImode);
   rtx tiscratch = gen_reg_rtx (TImode);

   /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code 
icode, tree exp, rtx target, bool bl

 return 0;
   emit_insn (pat);

-  /* Emit a sign extension from QI,HI,WI to double (DI).  */
-  rtx scratch = gen_lowpart (smode, tiscratch);
+  /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+  rtx temp1, temp2;
   if (icode == CODE_FOR_vsx_lxvrbx)
-    emit_insn (gen_extendqidi2 (discratch, scratch));
+    {
+      temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+    }
   else if (icode == CODE_FOR_vsx_lxvrhx)
-    emit_insn (gen_extendhidi2 (discratch, scratch));
+    {
+      temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+    }
   else if (icode == CODE_FOR_vsx_lxvrwx)
-    emit_insn (gen_extendsidi2 (discratch, scratch));
-  /*  Assign discratch directly if scratch is already DI.  */
-  if (icode == CODE_FOR_vsx_lxvrdx)
-    discratch = scratch;
+    {
+      temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+    }
+  else if (icode == CODE_FOR_vsx_lxvrdx)
+    discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+  else
+    return 0;

-  /* Emit the sign extension from DI (double) to TI (quad). */
-  emit_insn (gen_extendditi2 (target, discratch));
+  /* Emit the sign extension from V2DI (double) to TI (quad).  */
+  temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+  emit_insn (gen_extendditi2_vector (target, temp2));

   return target;
 }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_"
   "vextsh2 %0,%1"
   [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
      UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c

new file mode 100644
index 000..e5b84e6167a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times 
{\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */

+/* { dg-final { scan-assembler-not "mtvsrdd"   } } */