Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-10 Thread Ramana Radhakrishnan
On Wed, Jul 10, 2019 at 11:07 AM Richard Sandiford
 wrote:
>
> Christophe Lyon  writes:
> > On Mon, 8 Jul 2019 at 11:04, Richard Sandiford
> >  wrote:
> >>
> >> Christophe Lyon  writes:
> >> > Hi,
> >> >
> >> > This patch fixes PR 91060 where the lane ordering was no longer the
> >> > right one (GCC's vs architecture's).
> >>
> >> Sorry, we clashed :-)
> >>
> >> I'd prefer to go with the version I attached to bugzilla just now.
> >
> > Yes just saw that, thanks!
>
> The bugzilla version didn't properly adjust vec_setv2di_internal.
> Fixed with the version below, tested on armeb-eabi.
>
> Besides gcc.c-torture/execute/scal-to-vec*.c, the patch also fixes:
>
>   c-c++-common/torture/vector-compare-1.c
>   gcc.target/arm/pr69614.c
>   g++.dg/ext/vector37.C
>
> OK for trunk?

OK.

Ramana
>
> Richard
>
>
> 2019-07-10  Richard Sandiford  
>
> gcc/
> PR target/91060
> * config/arm/iterators.md (V2DI_ONLY): New mode iterator.
> * config/arm/neon.md (vec_set_internal): Add a '@' prefix.
> (vec_setv2di_internal): Reexpress as...
> (@vec_set_internal): ...this.
> * config/arm/arm.c (neon_expand_vector_init): Use gen_vec_set_internal
> rather than gen_neon_vset_lane.
>
> Index: gcc/config/arm/iterators.md
> ===
> --- gcc/config/arm/iterators.md 2019-06-18 09:35:55.377865698 +0100
> +++ gcc/config/arm/iterators.md 2019-07-10 11:01:57.990749932 +0100
> @@ -186,6 +186,9 @@ (define_mode_iterator VX [V8QI V4HI V16Q
>  ;; Modes with 8-bit elements.
>  (define_mode_iterator VE [V8QI V16QI])
>
> +;; V2DI only (for use with @ patterns).
> +(define_mode_iterator V2DI_ONLY [V2DI])
> +
>  ;; Modes with 64-bit elements only.
>  (define_mode_iterator V64 [DI V2DI])
>
> Index: gcc/config/arm/neon.md
> ===
> --- gcc/config/arm/neon.md  2019-07-01 09:37:07.220524486 +0100
> +++ gcc/config/arm/neon.md  2019-07-10 11:01:57.990749932 +0100
> @@ -319,7 +319,7 @@ (define_insn "*movmisalign_neon_lo
>"vld1.\t{%q0}, %A1"
>[(set_attr "type" "neon_load1_1reg")])
>
> -(define_insn "vec_set_internal"
> +(define_insn "@vec_set_internal"
>[(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
>  (vec_merge:VD_LANE
>(vec_duplicate:VD_LANE
> @@ -340,7 +340,7 @@ (define_insn "vec_set_internal"
>  }
>[(set_attr "type" "neon_load1_all_lanes,neon_from_gp")])
>
> -(define_insn "vec_set_internal"
> +(define_insn "@vec_set_internal"
>[(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
>  (vec_merge:VQ2
>(vec_duplicate:VQ2
> @@ -369,12 +369,12 @@ (define_insn "vec_set_internal"
>[(set_attr "type" "neon_load1_all_lanes,neon_from_gp")]
>  )
>
> -(define_insn "vec_setv2di_internal"
> -  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
> -(vec_merge:V2DI
> -  (vec_duplicate:V2DI
> +(define_insn "@vec_set_internal"
> +  [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
> +(vec_merge:V2DI_ONLY
> +  (vec_duplicate:V2DI_ONLY
>  (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
> -  (match_operand:V2DI 3 "s_register_operand" "0,0")
> +  (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
>(match_operand:SI 2 "immediate_operand" "i,i")))]
>"TARGET_NEON"
>  {
> Index: gcc/config/arm/arm.c
> ===
> --- gcc/config/arm/arm.c2019-07-01 09:37:07.220524486 +0100
> +++ gcc/config/arm/arm.c2019-07-10 11:01:57.990749932 +0100
> @@ -12471,7 +12471,7 @@ neon_expand_vector_init (rtx target, rtx
>if (n_var == 1)
>  {
>rtx copy = copy_rtx (vals);
> -  rtx index = GEN_INT (one_var);
> +  rtx merge_mask = GEN_INT (1 << one_var);
>
>/* Load constant part of vector, substitute neighboring value for
>  varying element.  */
> @@ -12480,38 +12480,7 @@ neon_expand_vector_init (rtx target, rtx
>
>/* Insert variable.  */
>x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
> -  switch (mode)
> -   {
> -   case E_V8QImode:
> - emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
> - break;
> -   case E_V16QImode:
> - emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
> - break;
> -   case E_V4HImode:
> - emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
> - break;
> -   case E_V8HImode:
> - emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
> - break;
> -   case E_V2SImode:
> - emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
> - break;
> -   case E_V4SImode:
> - emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
> - break;
> -   case E_V2SFmode:
> - emit_insn 

Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-10 Thread Richard Sandiford
Christophe Lyon  writes:
> On Mon, 8 Jul 2019 at 11:04, Richard Sandiford
>  wrote:
>>
>> Christophe Lyon  writes:
>> > Hi,
>> >
>> > This patch fixes PR 91060 where the lane ordering was no longer the
>> > right one (GCC's vs architecture's).
>>
>> Sorry, we clashed :-)
>>
>> I'd prefer to go with the version I attached to bugzilla just now.
>
> Yes just saw that, thanks!

The bugzilla version didn't properly adjust vec_setv2di_internal.
Fixed with the version below, tested on armeb-eabi.

Besides gcc.c-torture/execute/scal-to-vec*.c, the patch also fixes:

  c-c++-common/torture/vector-compare-1.c
  gcc.target/arm/pr69614.c
  g++.dg/ext/vector37.C

OK for trunk?

Richard


2019-07-10  Richard Sandiford  

gcc/
PR target/91060
* config/arm/iterators.md (V2DI_ONLY): New mode iterator.
* config/arm/neon.md (vec_set_internal): Add a '@' prefix.
(vec_setv2di_internal): Reexpress as...
(@vec_set_internal): ...this.
* config/arm/arm.c (neon_expand_vector_init): Use gen_vec_set_internal
rather than gen_neon_vset_lane.

Index: gcc/config/arm/iterators.md
===
--- gcc/config/arm/iterators.md 2019-06-18 09:35:55.377865698 +0100
+++ gcc/config/arm/iterators.md 2019-07-10 11:01:57.990749932 +0100
@@ -186,6 +186,9 @@ (define_mode_iterator VX [V8QI V4HI V16Q
 ;; Modes with 8-bit elements.
 (define_mode_iterator VE [V8QI V16QI])
 
+;; V2DI only (for use with @ patterns).
+(define_mode_iterator V2DI_ONLY [V2DI])
+
 ;; Modes with 64-bit elements only.
 (define_mode_iterator V64 [DI V2DI])
 
Index: gcc/config/arm/neon.md
===
--- gcc/config/arm/neon.md  2019-07-01 09:37:07.220524486 +0100
+++ gcc/config/arm/neon.md  2019-07-10 11:01:57.990749932 +0100
@@ -319,7 +319,7 @@ (define_insn "*movmisalign_neon_lo
   "vld1.\t{%q0}, %A1"
   [(set_attr "type" "neon_load1_1reg")])
 
-(define_insn "vec_set_internal"
+(define_insn "@vec_set_internal"
   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
 (vec_merge:VD_LANE
   (vec_duplicate:VD_LANE
@@ -340,7 +340,7 @@ (define_insn "vec_set_internal"
 }
   [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")])
 
-(define_insn "vec_set_internal"
+(define_insn "@vec_set_internal"
   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
 (vec_merge:VQ2
   (vec_duplicate:VQ2
@@ -369,12 +369,12 @@ (define_insn "vec_set_internal"
   [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")]
 )
 
-(define_insn "vec_setv2di_internal"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
-(vec_merge:V2DI
-  (vec_duplicate:V2DI
+(define_insn "@vec_set_internal"
+  [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
+(vec_merge:V2DI_ONLY
+  (vec_duplicate:V2DI_ONLY
 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
-  (match_operand:V2DI 3 "s_register_operand" "0,0")
+  (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
   (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_NEON"
 {
Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c2019-07-01 09:37:07.220524486 +0100
+++ gcc/config/arm/arm.c2019-07-10 11:01:57.990749932 +0100
@@ -12471,7 +12471,7 @@ neon_expand_vector_init (rtx target, rtx
   if (n_var == 1)
 {
   rtx copy = copy_rtx (vals);
-  rtx index = GEN_INT (one_var);
+  rtx merge_mask = GEN_INT (1 << one_var);
 
   /* Load constant part of vector, substitute neighboring value for
 varying element.  */
@@ -12480,38 +12480,7 @@ neon_expand_vector_init (rtx target, rtx
 
   /* Insert variable.  */
   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
-  switch (mode)
-   {
-   case E_V8QImode:
- emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
- break;
-   case E_V16QImode:
- emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
- break;
-   case E_V4HImode:
- emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
- break;
-   case E_V8HImode:
- emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
- break;
-   case E_V2SImode:
- emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
- break;
-   case E_V4SImode:
- emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
- break;
-   case E_V2SFmode:
- emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
- break;
-   case E_V4SFmode:
- emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
- break;
-   case E_V2DImode:
- emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
- break;
-   default:
- 

Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-08 Thread Christophe Lyon
On Mon, 8 Jul 2019 at 11:06, Kyrill Tkachov  wrote:
>
> Hi Christophe
>
> On 7/8/19 10:01 AM, Christophe Lyon wrote:
> > Hi,
> >
> > This patch fixes PR 91060 where the lane ordering was no longer the
> > right one (GCC's vs architecture's).
> >
> > OK?
> >
> > Thanks to both Richards for most of the debugging!
>
> Thank you to all for tracking this down.
>
> >
> > Christophe
>
>
> pr91060.patch.txt
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 820502a..4c7b5a8 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -12471,7 +12471,7 @@ neon_expand_vector_init (rtx target, rtx vals)
> if (n_var == 1)
>   {
> rtx copy = copy_rtx (vals);
> -  rtx index = GEN_INT (one_var);
> +  rtx index = GEN_INT (1 << one_var);
>
> /* Load constant part of vector, substitute neighboring value for
>  varying element.  */
> @@ -12483,31 +12483,31 @@ neon_expand_vector_init (rtx target, rtx vals)
> switch (mode)
> {
> case E_V8QImode:
> - emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
> + emit_insn (gen_vec_setv8qi_internal (target, x, index, target));
>   break;
> case E_V16QImode:
> - emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
> + emit_insn (gen_vec_setv16qi_internal (target, x, index, target));
>   break;
> case E_V4HImode:
> - emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
> + emit_insn (gen_vec_setv4hi_internal (target, x, index, target));
>   break;
> case E_V8HImode:
> - emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
> + emit_insn (gen_vec_setv8hi_internal (target, x, index, target));
>   break;
> case E_V2SImode:
> - emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
> + emit_insn (gen_vec_setv2si_internal (target, x, index, target));
>   break;
> case E_V4SImode:
> - emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
> + emit_insn (gen_vec_setv4si_internal (target, x, index, target));
>   break;
> case E_V2SFmode:
> - emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
> + emit_insn (gen_vec_setv2sf_internal (target, x, index, target));
>   break;
> case E_V4SFmode:
> - emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
> + emit_insn (gen_vec_setv4sf_internal (target, x, index, target));
>   break;
> case E_V2DImode:
> - emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
> + emit_insn (gen_vec_setv2di_internal (target, x, index, target));
>   break;
> default:
>   gcc_unreachable ();
>
>
> Can we take the opportunity here to remove that switch statement and use the 
> parametrised names machinery:
> https://gcc.gnu.org/onlinedocs/gccint/Parameterized-Names.html#Parameterized-Names
>
> so that we can instead have one call to gen_vec_setv8hi_internal (mode, 
> target, x, merge_mask, target) or something.

Yes, that's what Richard posted in bugzilla, it's much nicer indeed.

> Thanks,
> Kyrill
>


Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-08 Thread Christophe Lyon
On Mon, 8 Jul 2019 at 11:04, Richard Sandiford
 wrote:
>
> Christophe Lyon  writes:
> > Hi,
> >
> > This patch fixes PR 91060 where the lane ordering was no longer the
> > right one (GCC's vs architecture's).
>
> Sorry, we clashed :-)
>
> I'd prefer to go with the version I attached to bugzilla just now.

Yes just saw that, thanks!

>
> Thanks,
> Richard


Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-08 Thread Kyrill Tkachov

Hi Christophe

On 7/8/19 10:01 AM, Christophe Lyon wrote:

Hi,

This patch fixes PR 91060 where the lane ordering was no longer the
right one (GCC's vs architecture's).

OK?

Thanks to both Richards for most of the debugging!


Thank you to all for tracking this down.



Christophe



pr91060.patch.txt

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 820502a..4c7b5a8 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12471,7 +12471,7 @@ neon_expand_vector_init (rtx target, rtx vals)
   if (n_var == 1)
 {
   rtx copy = copy_rtx (vals);
-  rtx index = GEN_INT (one_var);
+  rtx index = GEN_INT (1 << one_var);
 
   /* Load constant part of vector, substitute neighboring value for

 varying element.  */
@@ -12483,31 +12483,31 @@ neon_expand_vector_init (rtx target, rtx vals)
   switch (mode)
{
case E_V8QImode:
- emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
+ emit_insn (gen_vec_setv8qi_internal (target, x, index, target));
  break;
case E_V16QImode:
- emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
+ emit_insn (gen_vec_setv16qi_internal (target, x, index, target));
  break;
case E_V4HImode:
- emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
+ emit_insn (gen_vec_setv4hi_internal (target, x, index, target));
  break;
case E_V8HImode:
- emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
+ emit_insn (gen_vec_setv8hi_internal (target, x, index, target));
  break;
case E_V2SImode:
- emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
+ emit_insn (gen_vec_setv2si_internal (target, x, index, target));
  break;
case E_V4SImode:
- emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
+ emit_insn (gen_vec_setv4si_internal (target, x, index, target));
  break;
case E_V2SFmode:
- emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
+ emit_insn (gen_vec_setv2sf_internal (target, x, index, target));
  break;
case E_V4SFmode:
- emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
+ emit_insn (gen_vec_setv4sf_internal (target, x, index, target));
  break;
case E_V2DImode:
- emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
+ emit_insn (gen_vec_setv2di_internal (target, x, index, target));
  break;
default:
  gcc_unreachable ();


Can we take the opportunity here to remove that switch statement and use the 
parametrised names machinery:
https://gcc.gnu.org/onlinedocs/gccint/Parameterized-Names.html#Parameterized-Names

so that we can instead have one call to gen_vec_setv8hi_internal (mode, target, 
x, merge_mask, target) or something.
Thanks,
Kyrill



Re: [PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-08 Thread Richard Sandiford
Christophe Lyon  writes:
> Hi,
>
> This patch fixes PR 91060 where the lane ordering was no longer the
> right one (GCC's vs architecture's).

Sorry, we clashed :-)

I'd prefer to go with the version I attached to bugzilla just now.

Thanks,
Richard


[PATCH][armeb] PR 91060 gcc.c-torture/execute/scal-to-vec1.c fails since r272843

2019-07-08 Thread Christophe Lyon
Hi,

This patch fixes PR 91060 where the lane ordering was no longer the
right one (GCC's vs architecture's).

OK?

Thanks to both Richards for most of the debugging!

Christophe
gcc/ChangeLog:

2019-07-08  Christophe Lyon  

PR target/91060
* config/arm/arm.c (neon_expand_vector_init): Use
vec_set*_internal instead of neon_vset_lane*.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 820502a..4c7b5a8 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12471,7 +12471,7 @@ neon_expand_vector_init (rtx target, rtx vals)
   if (n_var == 1)
 {
   rtx copy = copy_rtx (vals);
-  rtx index = GEN_INT (one_var);
+  rtx index = GEN_INT (1 << one_var);
 
   /* Load constant part of vector, substitute neighboring value for
 varying element.  */
@@ -12483,31 +12483,31 @@ neon_expand_vector_init (rtx target, rtx vals)
   switch (mode)
{
case E_V8QImode:
- emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
+ emit_insn (gen_vec_setv8qi_internal (target, x, index, target));
  break;
case E_V16QImode:
- emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
+ emit_insn (gen_vec_setv16qi_internal (target, x, index, target));
  break;
case E_V4HImode:
- emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
+ emit_insn (gen_vec_setv4hi_internal (target, x, index, target));
  break;
case E_V8HImode:
- emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
+ emit_insn (gen_vec_setv8hi_internal (target, x, index, target));
  break;
case E_V2SImode:
- emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
+ emit_insn (gen_vec_setv2si_internal (target, x, index, target));
  break;
case E_V4SImode:
- emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
+ emit_insn (gen_vec_setv4si_internal (target, x, index, target));
  break;
case E_V2SFmode:
- emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
+ emit_insn (gen_vec_setv2sf_internal (target, x, index, target));
  break;
case E_V4SFmode:
- emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
+ emit_insn (gen_vec_setv4sf_internal (target, x, index, target));
  break;
case E_V2DImode:
- emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
+ emit_insn (gen_vec_setv2di_internal (target, x, index, target));
  break;
default:
  gcc_unreachable ();