[v1,1/1] aarch64: FEAT_SVE_BFSCALE support

Wilco Dijkstra Fri, 19 Dec 2025 08:08:26 -0800

Hi Karl,

This looks good. A few trailing spaces were removed, but all seem close to code
that was changed - but still it's worth mentioning "Fixed trailing spaces." for 
eg.
lib/target-supports.exp since there were quite a few.


A few comments:

+;; BFSCALE (multiple vectors)
+;; svbfloat16x2_t svscale[_bf16_x2] (svbfloat16x2_t zdn, svint16x2_t zm);
+;; svbfloat16x4_t svscale[_bf16_x4] (svbfloat16x4_t zdn, svint16x4_t zm);
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_BFx24 0 "register_operand")

This should presumably have a "Uw<vector_count>"? Not sure why it didn't 
complain...

+       (unspec:SVE_BFx24
+         [(match_operand:SVE_BFx24     1 "register_operand" "0")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" 
"Uw<vector_count>")]
+         SVE_COND_FP_BINARY_INT))]
+  "TARGET_SME2 && TARGET_SVE_BFSCALE"
+  "bfscale %0, %1, %2"
+)
+
+;; BFSCALE (multiple and single vector)
+;; svbfloat16x2_t svscale[_single_bf16_x2](svbfloat16x2_t zn, svint16_t zm);
+;; svbfloat16x4_t svscale[_single_bf16_x4](svbfloat16x4_t zn, svint16_t zm);
+(define_insn "@aarch64_sve_<optab><mode>_single"
+  [(set (match_operand:SVE_BFx24 0 "register_operand")

Same here.

+       (unspec:SVE_BFx24
+         [(match_operand:SVE_BFx24               1 "register_operand" "0")
+          (match_operand:<SVSCALE_SINGLE_INTARG> 2 "register_operand" "x")]
+         SVE_COND_FP_BINARY_INT))]
+  "TARGET_SME2 && TARGET_SVE_BFSCALE"
+  "bfscale %0, %1, %2.h"
+)


;; BFMUL (multiple vectors)
+;; svbfloat16x2_t svmul[_bf16_x2](svbfloat16x2_t zd, svbfloat16x2_t zm)
+;;   __arm_streaming;
+;; svbfloat16x4_t svmul[_bf16_x4](svbfloat16x4_t zd, svbfloat16x4_t zm)
+;;   __arm_streaming;
+;; BFMUL { <Zd1>.H-<Zd2>.H }, { <Zn1>.H-<Zn2>.H }, { <Zm1>.H-<Zm2>.H }
+;; BFMUL { <Zd1>.H-<Zd4>.H }, { <Zn1>.H-<Zn4>.H }, { <Zm1>.H-<Zm4>.H }
+(define_insn "@aarch64_sve_<optab><mode>"
+  [(set (match_operand:SVE_BFx24 0 "register_operand")

And here.

+       (unspec:SVE_BFx24
+         [(match_operand:SVE_BFx24 1 "register_operand" "Uw<vector_count>")
+          (match_operand:SVE_BFx24 2 "register_operand" "Uw<vector_count>")]
+         SVE_FP_MUL))]
+  "TARGET_SME2 && TARGET_SVE_BFSCALE"
+  "bfmul %0, %1, %2"
+)
+
+;; BFMUL (multiple and single vector)
+;; svbfloat16x2_t svmul[_single_bf16_x2](svbfloat16x2_t zd, svbfloat16_t zm)
+;;   __arm_streaming;
+;; svbfloat16x4_t svmul[_single_bf16_x4](svbfloat16x4_t zd, svbfloat16_t zm)
+;;   __arm_streaming;
+;; BFMUL { <Zd1>.H-<Zd2>.H }, { <Zn1>.H-<Zn2>.H }, <Zm>.H
+;; BFMUL { <Zd1>.H-<Zd4>.H }, { <Zn1>.H-<Zn4>.H }, <Zm>.H
+(define_insn "@aarch64_sve_<optab><mode>_single"
+  [(set (match_operand:SVE_BFx24 0 "register_operand")

And here.

+       (unspec:SVE_BFx24
+         [(match_operand:SVE_BFx24 1 "register_operand" "Uw<vector_count>")
+          (match_operand:<VSINGLE> 2 "register_operand" "x")]
+         SVE_FP_MUL))]
+  "TARGET_SME2 && TARGET_SVE_BFSCALE"
+  "bfmul %0, %1, %2.h"
+)



 ;; their second operand, with the values of inactive lanes being distinct
 ;; from the other inputs.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_BFSCALE 0 "register_operand")
+       (unspec:SVE_FULL_F_BFSCALE
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_FULL_F_BFSCALE
             [(match_operand 5)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
+             (match_operand:SVE_FULL_F_BFSCALE 2 "register_operand")
              (match_operand:<V_INT_EQUIV> 3 "register_operand")]
             SVE_COND_FP_BINARY_INT)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_FULL_F_BFSCALE 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
-     [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, 
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, 
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, 
%2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, 
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, 
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, 
%2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w , w , w   ] #


Is there any extra action required in the "[ ?&w      , Upl , w , w , w   ] #" 
case or will that just work for bfscale?

Cheers,
Wilco

[v1,1/1] aarch64: FEAT_SVE_BFSCALE support

Reply via email to