The logic for the vector dot product operation, where the destination
elements are quadruple the width of the source elements, is as
follows (Take sdot_prodv4siv16qi as an example.):
v16i8 src1, src2;
v4i32 src3, dest;
dest[0] = src1[0] * src2[0] + src1[1] * src2[1]
+ src1[2] * src2[2] + src1[3] * src2[3]
+ src3[0]
dest[1] = src1[4] * src2[4] + src1[5] * src2[5]
+ src1[6] * src2[6] + src1[7] * src2[7]
+ src3[1]
dest[2] = src1[8] * src2[8] + src1[9] * src2[9]
+ src1[10] * src2[10] + src1[11] * src2[11]
+ src3[2]
dest[3] = src1[12] * src2[12] + src1[13] * src2[13]
+ src1[14] * src2[14] + src1[15] * src2[15]
+ src3[3]
gcc/ChangeLog:
* config/loongarch/lasx.md (ILASX_HB): Move to ...
* config/loongarch/lsx.md (ILSX_HB): Move to ...
* config/loongarch/simd.md (ILSX_HB): ... here.
(ILASX_HB): ... here.
(IVEC_HB): New iterator.
(WVEC_QUARTER): New attr.
(wvec_quarter): Likewise.
(simdfmt_qw): Likewise.
(<su>dot_prod<wvec_quarter><mode>): New template.
Change-Id: Icaa7101cca38495c0dd05009466555a47eda9ae5
---
gcc/config/loongarch/lasx.md | 3 --
gcc/config/loongarch/lsx.md | 3 --
gcc/config/loongarch/simd.md | 53 +++++++++++++++++++++++++++++++++++-
3 files changed, 52 insertions(+), 7 deletions(-)
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 72b490727ee..8156f84da46 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -146,9 +146,6 @@ (define_mode_iterator ILASX_WHB [V8SI V16HI V32QI])
;; Only integer modes equal or larger than a word.
(define_mode_iterator ILASX_DW [V4DI V8SI])
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILASX_HB [V16HI V32QI])
-
;; Only used for immediate set shuffle elements instruction.
(define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF])
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index be3a99b6d12..1786cc7dcb2 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -145,9 +145,6 @@ (define_mode_iterator ILSX_WHB [V4SI V8HI V16QI])
;; Only integer modes equal or larger than a word.
(define_mode_iterator ILSX_DW [V2DI V4SI])
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILSX_HB [V8HI V16QI])
-
;;;; Only integer modes for fixed-point madd_q/maddr_q.
;;(define_mode_iterator ILSX_WH [V4SI V8HI])
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 12eda2ee5ca..59ecb55cd3a 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -23,6 +23,10 @@ (define_mode_iterator ILSX [V2DI V4SI V8HI V16QI])
;; Integer modes supported by LASX.
(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI])
+;; Only integer modes smaller than a word.
+(define_mode_iterator ILSX_HB [V8HI V16QI])
+(define_mode_iterator ILASX_HB [V16HI V32QI])
+
;; FP modes supported by LSX
(define_mode_iterator FLSX [V2DF V4SF])
@@ -38,6 +42,10 @@ (define_mode_iterator LASX [ILASX FLASX])
;; All integer modes available
(define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])
+;; All integer modes smaller than a word.
+(define_mode_iterator IVEC_HB [(ILSX_HB "ISA_HAS_LSX")
+ (ILASX_HB "ISA_HAS_LASX")])
+
;; All FP modes available
(define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])
@@ -90,12 +98,18 @@ (define_mode_attr WVEC_HALF [(V2DI "V1TI") (V4DI "V2TI")
(V8HI "V4SI") (V16HI "V8SI")
(V16QI "V8HI") (V32QI "V16HI")])
+(define_mode_attr WVEC_QUARTER [(V8HI "V2DI") (V16HI "V4DI")
+ (V16QI "V4SI") (V32QI "V8SI")])
+
;; Lower-case version.
(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
(V4SI "v2di") (V8SI "v4di")
(V8HI "v4si") (V16HI "v8si")
(V16QI "v8hi") (V32QI "v16hi")])
+(define_mode_attr wvec_quarter [(V8HI "v2di") (V16HI "v4di")
+ (V16QI "v4si") (V32QI "v8si")])
+
;; Integer vector modes with the same length and unit size as a mode.
(define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
(V8HI "V8HI") (V16QI "V16QI")
@@ -124,12 +138,16 @@ (define_mode_attr simdfmt [(V2DF "d") (V4DF "d")
(V8HI "h") (V16HI "h")
(V16QI "b") (V32QI "b")])
-;; Suffix for widening LSX or LASX instructions.
+;; Suffix for double widening LSX or LASX instructions.
(define_mode_attr simdfmt_w [(V2DI "q") (V4DI "q")
(V4SI "d") (V8SI "d")
(V8HI "w") (V16HI "w")
(V16QI "h") (V32QI "h")])
+;; Suffix for quadruple widening LSX or LASX instructions.
+(define_mode_attr simdfmt_qw [(V8HI "d") (V16HI "d")
+ (V16QI "w") (V32QI "w")])
+
;; Suffix for integer mode in LSX or LASX instructions with FP input but
;; integer output.
(define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l")
@@ -828,6 +846,39 @@ (define_expand "<su>dot_prod<wvec_half><mode>"
DONE;
})
+(define_expand "<su>dot_prod<wvec_quarter><mode>"
+ [(match_operand:<WVEC_QUARTER> 0 "register_operand" "=f,f")
+ (match_operand:IVEC_HB 1 "register_operand" "f,f")
+ (match_operand:IVEC_HB 2 "register_operand" "f,f")
+ (match_operand:<WVEC_QUARTER> 3 "reg_or_0_operand" "f, YG")
+ (any_extend (const_int 0))]
+ ""
+{
+ rtx *op = operands;
+ rtx res_mulev = gen_reg_rtx (<WVEC_HALF>mode);
+ rtx res_mulod = gen_reg_rtx (<WVEC_HALF>mode);
+ rtx res_addev = gen_reg_rtx (<WVEC_QUARTER>mode);
+ rtx res_addod = gen_reg_rtx (<WVEC_QUARTER>mode);
+ emit_insn (gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u>
+ (res_mulev, op[1], op[2]));
+ emit_insn (gen_<simd_isa>_<x>vmulwod_<simdfmt_w>_<simdfmt><u>
+ (res_mulod, op[1], op[2]));
+ emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+ (res_addev, res_mulev, res_mulev));
+ emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+ (res_addod, res_mulod, res_mulod));
+ if (op[3] == CONST0_RTX (<WVEC_QUARTER>mode))
+ emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev,
+ res_addod));
+ else
+ {
+ emit_insn (gen_add<wvec_quarter>3 (res_addev, res_addev,
+ res_addod));
+ emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev, op[3]));
+ }
+ DONE;
+})
+
(define_insn "simd_maddw_evod_<mode>_hetero"
[(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
(plus:<WVEC_HALF>
--
2.34.1