[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-30 Thread Vladislav Dzhidzhoev via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG38c92c1ee2f0: [AArch64] Add patterns for FMADD, FMSUB 
(authored by overmighty, committed by dzhidzhoev).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

Files:
  clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
  llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

Index: llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
===
--- llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -7,56 +7,132 @@
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
 
-define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S
+define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S_swap
+define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3_swap
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss2S
+define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_1
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmla_ddD
-  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0
+  ; CHECK: fmadd d0, d1, d2, d0
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
-define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmla_dd2D
+define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0
+  ; CHECK: fmadd d0, d1, d2, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+

[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-30 Thread OverMighty via Phabricator via cfe-commits
overmighty added a comment.

Ping.

If you would like the current patch with `extractelt` to be committed, please 
commit it as "OverMighty ". Thank you.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-23 Thread Vladislav Dzhidzhoev via Phabricator via cfe-commits
dzhidzhoev accepted this revision.
dzhidzhoev added a subscriber: arsenm.
dzhidzhoev added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5393
+  (EXTRACT_SUBREG V128:$Rn, hsub), FPR16:$Rm, FPR16:$Ra)>;
+  }
+

overmighty wrote:
> dzhidzhoev wrote:
> > BTW, these lines add some patterns for fnmadd. Could you add some tests for 
> > them? Or are they already covered by existing ones?
> The patterns for `FNMADD` and `FNMSUB` are useless as there are no Neon 
> vector equivalents of these instructions. Should I add a template argument to 
> `ThreeOperandFPData` to prevent the patterns from being generated for them?
> 
> I previously had patterns in AArch64InstrInfo.td for `FMADD` and `FMSUB` only 
> and it was suggested to me to move them to `ThreeOperandFPData`: 
> https://reviews.llvm.org/D153207?id=532433#inline-1481961. I was asked to 
> split that part of my previous patch into a second (this) one.
> The patterns for `FNMADD` and `FNMSUB` are useless as there are no Neon 
> vector equivalents of these instructions. Should I add a template argument to 
> `ThreeOperandFPData` to prevent the patterns from being generated for them?

Thank you for the explanation! There's no need for it since I can barely find a 
way it can interfere with other patterns.




Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5418
+  (EXTRACT_SUBREG V128:$Rn, dsub), FPR64:$Rm, FPR64:$Ra)>;
 }
 

overmighty wrote:
> dzhidzhoev wrote:
> > Is it possible to use extractelt here? Since vector_extract is marked as 
> > deprecated in `TargetSelectionDAG.td`
> I saw the comment marking it as deprecated but I also saw new commits using 
> `vector_extract`. I should have asked for clarification earlier. Is 
> `vector_extract` truly deprecated? Should I change patterns I added in 
> previous patches to use `extractelt` too?
Thank you! Now I've noticed that the comment about the deprecation was made in 
2015, and vector_extract is still in use. @arsenm, could you suggest whether it 
is relevant?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-19 Thread OverMighty via Phabricator via cfe-commits
overmighty added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5393
+  (EXTRACT_SUBREG V128:$Rn, hsub), FPR16:$Rm, FPR16:$Ra)>;
+  }
+

dzhidzhoev wrote:
> BTW, these lines add some patterns for fnmadd. Could you add some tests for 
> them? Or are they already covered by existing ones?
The patterns for `FNMADD` and `FNMSUB` are useless as there are no Neon vector 
equivalents of these instructions. Should I add a template argument to 
`ThreeOperandFPData` to prevent the patterns from being generated for them?

I previously had patterns in AArch64InstrInfo.td for `FMADD` and `FMSUB` only 
and it was suggested to me to move them to `ThreeOperandFPData`: 
https://reviews.llvm.org/D153207?id=532433#inline-1481961. I was asked to split 
that part of my previous patch into a second (this) one.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5418
+  (EXTRACT_SUBREG V128:$Rn, dsub), FPR64:$Rm, FPR64:$Ra)>;
 }
 

dzhidzhoev wrote:
> Is it possible to use extractelt here? Since vector_extract is marked as 
> deprecated in `TargetSelectionDAG.td`
I saw the comment marking it as deprecated but I also saw new commits using 
`vector_extract`. I should have asked for clarification earlier. Is 
`vector_extract` truly deprecated? Should I change patterns I added in previous 
patches to use `extractelt` too?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-19 Thread OverMighty via Phabricator via cfe-commits
overmighty updated this revision to Diff 551754.
overmighty added a reviewer: dzhidzhoev.
overmighty added a comment.

- Rebase new upstream commits.
- Replace usage of deprecated `vector_extract` with `extractelt` in new 
patterns.
- Add `f16` operators before `EXTRACT_SUBREG`s in new `*Hrrr` instruction 
pattern results to fix crashing tests.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

Files:
  clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
  llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

Index: llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
===
--- llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -7,56 +7,132 @@
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
 
-define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S
+define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S_swap
+define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3_swap
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss2S
+define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_1
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmla_ddD
-  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0
+  ; CHECK: fmadd d0, d1, d2, d0
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
-define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmla_dd2D
+define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0
+  ; CHECK: fmadd d0, d1, d2, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double 

[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-18 Thread Vladislav Dzhidzhoev via Phabricator via cfe-commits
dzhidzhoev added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5418
+  (EXTRACT_SUBREG V128:$Rn, dsub), FPR64:$Rm, FPR64:$Ra)>;
 }
 

Is it possible to use extractelt here? Since vector_extract is marked as 
deprecated in `TargetSelectionDAG.td`


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-18 Thread Vladislav Dzhidzhoev via Phabricator via cfe-commits
dzhidzhoev added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5393
+  (EXTRACT_SUBREG V128:$Rn, hsub), FPR16:$Rm, FPR16:$Ra)>;
+  }
+

BTW, these lines add some patterns for fnmadd. Could you add some tests for 
them? Or are they already covered by existing ones?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-18 Thread Vladislav Dzhidzhoev via Phabricator via cfe-commits
dzhidzhoev added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:5381-5393
+  let Predicates = [HasFullFP16] in {
+  def : Pat<(f16 (node (f16 FPR16:$Rn),
+   (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))),
+   (f16 FPR16:$Ra))),
+(!cast(NAME # Hrrr)
+  FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub), FPR16:$Ra)>;
+

The result type of (EXTRACT_SUBREG ...) here seems to be deduced to i16, which 
causes an assertion after applying this rule.
Explicit specification of the result type fixes crashes of tests:

```
FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)), FPR16:$Ra)>;
```

```
(f16 (EXTRACT_SUBREG V128:$Rn, hsub)), FPR16:$Rm, FPR16:$Ra)>
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-17 Thread OverMighty via Phabricator via cfe-commits
overmighty added a comment.

The crashes in complex-deinterleaving-f16-mul.ll and fp16_intrinsic_lane.ll 
seem to be caused by D156831 .


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-17 Thread OverMighty via Phabricator via cfe-commits
overmighty added a comment.

If this still looks good to you, please commit it as "OverMighty 
". Thank you.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-17 Thread OverMighty via Phabricator via cfe-commits
overmighty updated this revision to Diff 551193.
overmighty added a comment.

Add tests for A[0]*B+C patterns.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

Files:
  clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
  llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

Index: llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
===
--- llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -7,56 +7,132 @@
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
 
-define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S
+define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S_swap
+define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3_swap
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss2S
+define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0_swap
+  ; CHECK: fmadd s0, s2, s1, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_1
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmla_ddD
-  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0
+  ; CHECK: fmadd d0, d1, d2, d0
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
-define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmla_dd2D
+define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0
+  ; CHECK: fmadd d0, d1, d2, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0_swap
+  ; CHECK: fmadd d0, d2, d1, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_1(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_1
   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = 

[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-17 Thread Sam Tebbs via Phabricator via cfe-commits
samtebbs accepted this revision.
samtebbs added a comment.
This revision is now accepted and ready to land.

Thanks, this looks good to me with any extra tests required by David's comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-16 Thread Dave Green via Phabricator via cfe-commits
dmgreen added a comment.

It looks like there are patterns for A[0]*B+C and B*A[0]*C to handle 
commutivity. Are there tests for both forms too?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158008/new/

https://reviews.llvm.org/D158008

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158008: [AArch64] Add patterns for FMADD, FMSUB

2023-08-15 Thread OverMighty via Phabricator via cfe-commits
overmighty created this revision.
overmighty added reviewers: dmgreen, john.brawn, SjoerdMeijer.
Herald added subscribers: arphaman, hiraditya, kristof.beyls.
Herald added a project: All.
overmighty requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

FMADD, FMSUB instructions perform better or the same compared to indexed
FMLA, FMLS.

For example, the Arm Cortex-A55 Software Optimization Guide lists "FP
multiply accumulate" FMADD, FMSUB instructions with a throughput of 2
IPC, whereas it lists "ASIMD FP multiply accumulate, by element" FMLA,
FMLS with a throughput of 1 IPC.

The Arm Cortex-A77 Software Optimization Guide, however, does not
separately list "by element" variants of the "ASIMD FP multiply
accumulate" instructions, which are listed with the same throughput of 2
IPC as "FP multiply accumulate" instructions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158008

Files:
  clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
  llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

Index: llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
===
--- llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -7,56 +7,90 @@
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
 
-define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S
+define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <4 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss4S_swap
+define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_3_swap
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
   ret float %tmp2
 }
 
-define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK-LABEL: test_fmla_ss2S
+define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_0
+  ; CHECK: fmadd s0, s1, s2, s0
+  %tmp1 = extractelement <2 x float> %v, i32 0
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_1
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
   ret float %tmp2
 }
 
-define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK-LABEL: test_fmla_ddD
-  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_0
+  ; CHECK: fmadd d0, d1, d2, d0
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
-define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmla_dd2D
+define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_0
+  ; CHECK: fmadd d0, d1, d2, d0
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_1(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_1
   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
-define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
-  ; CHECK-LABEL: test_fmla_dd2D_swap
+define double @test_fmla_dd2D_1_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_1_swap
   ; CHECK: fmla {{d[0-9]+}},