Hi Karl,
This looks good. A few trailing spaces were removed, but all seem close to code
that was changed - but still it's worth mentioning "Fixed trailing spaces." for
eg.
lib/target-supports.exp since there were quite a few.
A few comments:
+;; BFSCALE (multiple vectors)
+;; svbfloat16x2_t svscale[_bf16_x2] (svbfloat16x2_t zdn, svint16x2_t zm);
+;; svbfloat16x4_t svscale[_bf16_x4] (svbfloat16x4_t zdn, svint16x4_t zm);
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_BFx24 0 "register_operand")
This should presumably have a "Uw<vector_count>"? Not sure why it didn't
complain...
+ (unspec:SVE_BFx24
+ [(match_operand:SVE_BFx24 1 "register_operand" "0")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand"
"Uw<vector_count>")]
+ SVE_COND_FP_BINARY_INT))]
+ "TARGET_SME2 && TARGET_SVE_BFSCALE"
+ "bfscale %0, %1, %2"
+)
+
+;; BFSCALE (multiple and single vector)
+;; svbfloat16x2_t svscale[_single_bf16_x2](svbfloat16x2_t zn, svint16_t zm);
+;; svbfloat16x4_t svscale[_single_bf16_x4](svbfloat16x4_t zn, svint16_t zm);
+(define_insn "@aarch64_sve_<optab><mode>_single"
+ [(set (match_operand:SVE_BFx24 0 "register_operand")
Same here.
+ (unspec:SVE_BFx24
+ [(match_operand:SVE_BFx24 1 "register_operand" "0")
+ (match_operand:<SVSCALE_SINGLE_INTARG> 2 "register_operand" "x")]
+ SVE_COND_FP_BINARY_INT))]
+ "TARGET_SME2 && TARGET_SVE_BFSCALE"
+ "bfscale %0, %1, %2.h"
+)
;; BFMUL (multiple vectors)
+;; svbfloat16x2_t svmul[_bf16_x2](svbfloat16x2_t zd, svbfloat16x2_t zm)
+;; __arm_streaming;
+;; svbfloat16x4_t svmul[_bf16_x4](svbfloat16x4_t zd, svbfloat16x4_t zm)
+;; __arm_streaming;
+;; BFMUL { <Zd1>.H-<Zd2>.H }, { <Zn1>.H-<Zn2>.H }, { <Zm1>.H-<Zm2>.H }
+;; BFMUL { <Zd1>.H-<Zd4>.H }, { <Zn1>.H-<Zn4>.H }, { <Zm1>.H-<Zm4>.H }
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_BFx24 0 "register_operand")
And here.
+ (unspec:SVE_BFx24
+ [(match_operand:SVE_BFx24 1 "register_operand" "Uw<vector_count>")
+ (match_operand:SVE_BFx24 2 "register_operand" "Uw<vector_count>")]
+ SVE_FP_MUL))]
+ "TARGET_SME2 && TARGET_SVE_BFSCALE"
+ "bfmul %0, %1, %2"
+)
+
+;; BFMUL (multiple and single vector)
+;; svbfloat16x2_t svmul[_single_bf16_x2](svbfloat16x2_t zd, svbfloat16_t zm)
+;; __arm_streaming;
+;; svbfloat16x4_t svmul[_single_bf16_x4](svbfloat16x4_t zd, svbfloat16_t zm)
+;; __arm_streaming;
+;; BFMUL { <Zd1>.H-<Zd2>.H }, { <Zn1>.H-<Zn2>.H }, <Zm>.H
+;; BFMUL { <Zd1>.H-<Zd4>.H }, { <Zn1>.H-<Zn4>.H }, <Zm>.H
+(define_insn "@aarch64_sve_<optab><mode>_single"
+ [(set (match_operand:SVE_BFx24 0 "register_operand")
And here.
+ (unspec:SVE_BFx24
+ [(match_operand:SVE_BFx24 1 "register_operand" "Uw<vector_count>")
+ (match_operand:<VSINGLE> 2 "register_operand" "x")]
+ SVE_FP_MUL))]
+ "TARGET_SME2 && TARGET_SVE_BFSCALE"
+ "bfmul %0, %1, %2.h"
+)
;; their second operand, with the values of inactive lanes being distinct
;; from the other inputs.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_FULL_F_BFSCALE 0 "register_operand")
+ (unspec:SVE_FULL_F_BFSCALE
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_FULL_F_BFSCALE
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
+ (match_operand:SVE_FULL_F_BFSCALE 2 "register_operand")
(match_operand:<V_INT_EQUIV> 3 "register_operand")]
SVE_COND_FP_BINARY_INT)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_FULL_F_BFSCALE 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
- [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z,
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z,
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m,
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z,
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z,
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m,
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
[ ?&w , Upl , w , w , w ] #
Is there any extra action required in the "[ ?&w , Upl , w , w , w ] #"
case or will that just work for bfscale?
Cheers,
Wilco