Re: [PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
On 10/9/20 8:24 AM, Claudiu Zissulescu wrote: > From: Claudiu Zissulescu > > ARC MYP7+ instructions add MAC instructions for vector and scalar data > types. This patch adds a madd pattern for 16it datum that is using the > 32bit MAC instruction, and dot_prod patterns for v4hi vector > types. The 64bit moves are also upgraded by using vadd2 instuction. > > gcc/ > -xx-xx Claudiu Zissulescu > > * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. > * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 > instructions. > (movdf_insn): Likewise. > (maddhisi4): New pattern. > (umaddhisi4): Likewise. > * config/arc/simdext.md (mov_int): Update pattern to use > vadd2. > (sdot_prodv4hi): New pattern. > (udot_prodv4hi): Likewise. > (arc_vec_mac_hi_v4hi): Update/renamed to > arc_vec_mac_v2hiv2si. > (arc_vec_mac_v2hiv2si_zero): New pattern. OK for the trunk. Sorry for the delay. jeff
Re: [PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
Gentle PING. On Fri, Oct 9, 2020 at 5:24 PM Claudiu Zissulescu wrote: > > From: Claudiu Zissulescu > > ARC MYP7+ instructions add MAC instructions for vector and scalar data > types. This patch adds a madd pattern for 16it datum that is using the > 32bit MAC instruction, and dot_prod patterns for v4hi vector > types. The 64bit moves are also upgraded by using vadd2 instuction. > > gcc/ > -xx-xx Claudiu Zissulescu > > * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. > * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 > instructions. > (movdf_insn): Likewise. > (maddhisi4): New pattern. > (umaddhisi4): Likewise. > * config/arc/simdext.md (mov_int): Update pattern to use > vadd2. > (sdot_prodv4hi): New pattern. > (udot_prodv4hi): Likewise. > (arc_vec_mac_hi_v4hi): Update/renamed to > arc_vec_mac_v2hiv2si. > (arc_vec_mac_v2hiv2si_zero): New pattern. > > Signed-off-by: Claudiu Zissulescu > --- > gcc/config/arc/arc.c | 8 > gcc/config/arc/arc.md | 71 --- > gcc/config/arc/constraints.md | 5 ++ > gcc/config/arc/simdext.md | 90 +++ > 4 files changed, 147 insertions(+), 27 deletions(-) > > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index ec55cfde87a9..d5b521e75e67 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -10202,6 +10202,14 @@ arc_split_move (rtx *operands) >return; > } > > + if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], mode) > + && even_register_operand (operands[1], mode)) > +{ > + emit_move_insn (operands[0], operands[1]); > + return; > +} > + >if (TARGET_PLUS_QMACW >&& GET_CODE (operands[1]) == CONST_VECTOR) > { > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index f9fc11e51a85..1720e8cd2f6f 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -1345,8 +1345,8 @@ archs4x, archs4xd" >") > > (define_insn_and_split "*movdi_insn" > - [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r, m") > - (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))] > + [(set (match_operand:DI 0 "move_dest_operand" "=r, r,r, m") > + (match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))] >"register_operand (operands[0], DImode) > || register_operand (operands[1], DImode) > || (satisfies_constraint_Cm3 (operands[1]) > @@ -1358,6 +1358,13 @@ archs4x, archs4xd" > default: >return \"#\"; > > +case 0: > +if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], DImode) > + && even_register_operand (operands[1], DImode)) > + return \"vadd2\\t%0,%1,0\"; > +return \"#\"; > + > case 2: > if (TARGET_LL64 > && memory_operand (operands[1], DImode) > @@ -1374,7 +1381,7 @@ archs4x, archs4xd" > return \"#\"; > } > }" > - "reload_completed" > + "&& reload_completed" >[(const_int 0)] >{ > arc_split_move (operands); > @@ -1420,15 +1427,24 @@ archs4x, archs4xd" >"if (prepare_move_operands (operands, DFmode)) DONE;") > > (define_insn_and_split "*movdf_insn" > - [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") > - (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] > - "register_operand (operands[0], DFmode) || register_operand (operands[1], > DFmode)" > + [(set (match_operand:DF 0 "move_dest_operand" "=D,r,r,r,r,m") > + (match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))] > + "register_operand (operands[0], DFmode) > + || register_operand (operands[1], DFmode)" >"* > { > switch (which_alternative) > { > default: >return \"#\"; > + > +case 2: > +if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], DFmode) > + && even_register_operand (operands[1], DFmode)) > + return \"vadd2\\t%0,%1,0\"; > +return \"#\"; > + > case 4: > if (TARGET_LL64 > && ((even_register_operand (operands[0], DFmode) > @@ -6177,6 +6193,49 @@ archs4x, archs4xd" >[(set_attr "length" "0")]) > > ;; MAC and DMPY instructions > + > +; Use MAC instruction to emulate 16bit mac. > +(define_expand "maddhisi4" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:HI 1 "register_operand" "") > + (match_operand:HI 2 "extend_operand" "") > + (match_operand:SI 3 "register_operand" "")] > + "TARGET_PLUS_DMPY" > + "{ > + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); > + rtx tmp1 = gen_reg_rtx (SImode); > + rtx tmp2 = gen_reg_rtx (SImode); > + rtx accl = gen_lowpart (SImode, acc_reg); > + > + emit_move_insn (accl, operands[3]); > + emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1]))); > + emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode,
[PATCH] arc: Improve/add instruction patterns to better use MAC instructions.
From: Claudiu Zissulescu ARC MYP7+ instructions add MAC instructions for vector and scalar data types. This patch adds a madd pattern for 16it datum that is using the 32bit MAC instruction, and dot_prod patterns for v4hi vector types. The 64bit moves are also upgraded by using vadd2 instuction. gcc/ -xx-xx Claudiu Zissulescu * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 instructions. (movdf_insn): Likewise. (maddhisi4): New pattern. (umaddhisi4): Likewise. * config/arc/simdext.md (mov_int): Update pattern to use vadd2. (sdot_prodv4hi): New pattern. (udot_prodv4hi): Likewise. (arc_vec_mac_hi_v4hi): Update/renamed to arc_vec_mac_v2hiv2si. (arc_vec_mac_v2hiv2si_zero): New pattern. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.c | 8 gcc/config/arc/arc.md | 71 --- gcc/config/arc/constraints.md | 5 ++ gcc/config/arc/simdext.md | 90 +++ 4 files changed, 147 insertions(+), 27 deletions(-) diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index ec55cfde87a9..d5b521e75e67 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -10202,6 +10202,14 @@ arc_split_move (rtx *operands) return; } + if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], mode) + && even_register_operand (operands[1], mode)) +{ + emit_move_insn (operands[0], operands[1]); + return; +} + if (TARGET_PLUS_QMACW && GET_CODE (operands[1]) == CONST_VECTOR) { diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index f9fc11e51a85..1720e8cd2f6f 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1345,8 +1345,8 @@ archs4x, archs4xd" ") (define_insn_and_split "*movdi_insn" - [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r, m") - (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))] + [(set (match_operand:DI 0 "move_dest_operand" "=r, r,r, m") + (match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))] "register_operand (operands[0], DImode) || register_operand (operands[1], DImode) || (satisfies_constraint_Cm3 (operands[1]) @@ -1358,6 +1358,13 @@ archs4x, archs4xd" default: return \"#\"; +case 0: +if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], DImode) + && even_register_operand (operands[1], DImode)) + return \"vadd2\\t%0,%1,0\"; +return \"#\"; + case 2: if (TARGET_LL64 && memory_operand (operands[1], DImode) @@ -1374,7 +1381,7 @@ archs4x, archs4xd" return \"#\"; } }" - "reload_completed" + "&& reload_completed" [(const_int 0)] { arc_split_move (operands); @@ -1420,15 +1427,24 @@ archs4x, archs4xd" "if (prepare_move_operands (operands, DFmode)) DONE;") (define_insn_and_split "*movdf_insn" - [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") - (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] - "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)" + [(set (match_operand:DF 0 "move_dest_operand" "=D,r,r,r,r,m") + (match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))] + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" "* { switch (which_alternative) { default: return \"#\"; + +case 2: +if (TARGET_PLUS_QMACW + && even_register_operand (operands[0], DFmode) + && even_register_operand (operands[1], DFmode)) + return \"vadd2\\t%0,%1,0\"; +return \"#\"; + case 4: if (TARGET_LL64 && ((even_register_operand (operands[0], DFmode) @@ -6177,6 +6193,49 @@ archs4x, archs4xd" [(set_attr "length" "0")]) ;; MAC and DMPY instructions + +; Use MAC instruction to emulate 16bit mac. +(define_expand "maddhisi4" + [(match_operand:SI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "extend_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_PLUS_DMPY" + "{ + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx accl = gen_lowpart (SImode, acc_reg); + + emit_move_insn (accl, operands[3]); + emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1]))); + emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2]))); + emit_insn (gen_mac (tmp1, tmp2)); + emit_move_insn (operands[0], accl); + DONE; + }") + +; The same for the unsigned variant, but using MACU instruction. +(define_expand "umaddhisi4" + [(match_operand:SI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "extend_operand"