This revision was automatically updated to reflect the committed changes.
Closed by commit rGa255931c4055: [ARM] Supporting lowering of half-precision FP
arguments and returns in… (authored by pratlucas).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D75169/new/
https://reviews.llvm.org/D75169
Files:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/ARM/ARMCallLowering.cpp
llvm/lib/Target/ARM/ARMCallingConv.cpp
llvm/lib/Target/ARM/ARMCallingConv.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
llvm/test/CodeGen/ARM/fp16-args.ll
llvm/test/CodeGen/ARM/fp16-bitcast.ll
llvm/test/CodeGen/ARM/fp16-promote.ll
llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
llvm/test/CodeGen/Thumb2/mve-shuffle.ll
llvm/test/CodeGen/Thumb2/mve-vdup.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -78,7 +78,6 @@
; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -103,7 +102,6 @@
; CHECK-NEXT: vminnm.f16 s4, s4, s6
; CHECK-NEXT: vminnm.f16 s4, s4, s3
; CHECK-NEXT: vminnm.f16 s0, s4, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = call fast half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
@@ -125,7 +123,6 @@
; CHECK-FP-NEXT: vminnm.f16 s4, s4, s6
; CHECK-FP-NEXT: vminnm.f16 s4, s4, s3
; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v16f16:
@@ -169,7 +166,6 @@
; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
@@ -309,20 +305,20 @@
define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
; CHECK-FP-LABEL: fmin_v4f16_nofast:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v4f16_nofast:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmov r1, s1
+; CHECK-NOFP-NEXT: vmov r0, s1
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT: vdup.32 q1, r1
+; CHECK-NOFP-NEXT: vdup.32 q1, r0
; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vcmp.f16 s8, s10
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
@@ -333,7 +329,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %x)
@@ -346,13 +341,13 @@
; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v8f16_nofast:
@@ -384,7 +379,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v8f16(<8 x half> %x)
@@ -398,13 +392,13 @@
; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmin_v16f16_nofast:
@@ -462,7 +456,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmin.v16f16(<16 x half> %x)
@@ -1195,7 +1188,6 @@
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -1220,7 +1212,6 @@
; CHECK-NEXT: vmaxnm.f16 s4, s4, s6
; CHECK-NEXT: vmaxnm.f16 s4, s4, s3
; CHECK-NEXT: vmaxnm.f16 s0, s4, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%z = call fast half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
@@ -1242,7 +1233,6 @@
; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s6
; CHECK-FP-NEXT: vmaxnm.f16 s4, s4, s3
; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v16f16:
@@ -1286,7 +1276,6 @@
; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
@@ -1424,20 +1413,20 @@
define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
; CHECK-FP-LABEL: fmax_v4f16_nofast:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v4f16_nofast:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmov r1, s1
+; CHECK-NOFP-NEXT: vmov r0, s1
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT: vdup.32 q1, r1
+; CHECK-NOFP-NEXT: vdup.32 q1, r0
; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vcmp.f16 s10, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
@@ -1448,7 +1437,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %x)
@@ -1461,13 +1449,13 @@
; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v8f16_nofast:
@@ -1499,7 +1487,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v8f16(<8 x half> %x)
@@ -1513,13 +1500,13 @@
; CHECK-FP-NEXT: vmov.f64 d2, d1
; CHECK-FP-NEXT: vmov.f32 s5, s3
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov r1, s1
-; CHECK-FP-NEXT: vdup.32 q1, r1
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vdup.32 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vmov.u16 r1, q0[1]
-; CHECK-FP-NEXT: vdup.16 q1, r1
+; CHECK-FP-NEXT: vmov.u16 r0, q0[1]
+; CHECK-FP-NEXT: vdup.16 q1, r0
; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
-; CHECK-FP-NEXT: vstr.16 s0, [r0]
+; CHECK-FP-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-FP-NEXT: bx lr
;
; CHECK-NOFP-LABEL: fmax_v16f16_nofast:
@@ -1577,7 +1564,6 @@
; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vstr.16 s0, [r0]
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call half @llvm.experimental.vector.reduce.fmax.v16f16(<16 x half> %x)
Index: llvm/test/CodeGen/Thumb2/mve-vdup.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -253,10 +253,9 @@
define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
; CHECK-LABEL: vdup_f16_extract:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, [r2]
-; CHECK-NEXT: vldr.16 s2, [r1]
+; CHECK-NEXT: vldr.16 s0, [r1]
+; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vadd.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = load half, half *%src1, align 2
Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -682,7 +682,7 @@
define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) {
; CHECK-LABEL: extract_f16_0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bx lr
entry:
%res = extractelement <8 x half> %a, i32 0
@@ -693,7 +693,6 @@
; CHECK-LABEL: extract_f16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s0, s1
-; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%res = extractelement <8 x half> %a, i32 3
Index: llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
===================================================================
--- llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
+++ llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
@@ -21,6 +21,7 @@
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
Index: llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
===================================================================
--- llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
+++ llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
@@ -21,6 +21,7 @@
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
Index: llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
===================================================================
--- llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -2,266 +2,235 @@
; RUN: llc < %s -mtriple=armv8-eabi -mattr=+fullfp16 | FileCheck %s
; RUN: llc < %s -mtriple thumbv7a -mattr=+fullfp16 | FileCheck %s
-; TODO: we can't pass half-precision arguments as "half" types yet. We do
-; that for the time being by passing "float %f.coerce" and the necessary
-; bitconverts/truncates. In these tests we pass i16 and use 1 bitconvert, which
-; is the shortest way to get a half type. But when we can pass half types, we
-; want to use that here.
-
-define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) {
+define half @fp16_vminnm_o(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_o:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp olt half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp olt half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) {
+define half @fp16_vminnm_o_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_o_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ogt half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp ogt half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vminnm_u(i16 signext %a, i16 signext %b) {
+define half @fp16_vminnm_u(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ult half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp ult half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vminnm_ule(i16 signext %a, i16 signext %b) {
+define half @fp16_vminnm_ule(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_ule:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ule half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp ule half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vminnm_u_rev(i16 signext %a, i16 signext %b) {
+define half @fp16_vminnm_u_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ugt half %0, %1
- %cond = select i1 %cmp, half %1, half %0
+ %cmp = fcmp ugt half %a, %b
+ %cond = select i1 %cmp, half %b, half %a
ret half %cond
}
-define half @fp16_vmaxnm_o(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_o(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_o:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ogt half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp ogt half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vmaxnm_oge(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_oge(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_oge:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp oge half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp oge half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vmaxnm_o_rev(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_o_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_o_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp olt half %0, %1
- %cond = select i1 %cmp, half %1, half %0
+ %cmp = fcmp olt half %a, %b
+ %cond = select i1 %cmp, half %b, half %a
ret half %cond
}
-define half @fp16_vmaxnm_ole_rev(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_ole_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_ole_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ole half %0, %1
- %cond = select i1 %cmp, half %1, half %0
+ %cmp = fcmp ole half %a, %b
+ %cond = select i1 %cmp, half %b, half %a
ret half %cond
}
-define half @fp16_vmaxnm_u(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_u(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_u:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ugt half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp ugt half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vmaxnm_uge(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_uge(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_uge:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r2
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp uge half %0, %1
- %cond = select i1 %cmp, half %0, half %1
+ %cmp = fcmp uge half %a, %b
+ %cond = select i1 %cmp, half %a, half %b
ret half %cond
}
-define half @fp16_vmaxnm_u_rev(i16 signext %a, i16 signext %b) {
+define half @fp16_vmaxnm_u_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vmaxnm_u_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r2
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
- %0 = bitcast i16 %a to half
- %1 = bitcast i16 %b to half
- %cmp = fcmp ult half %0, %1
- %cond = select i1 %cmp, half %1, half %0
+ %cmp = fcmp ult half %a, %b
+ %cond = select i1 %cmp, half %b, half %a
ret half %cond
}
; known non-NaNs
-define half @fp16_vminnm_NNNo(i16 signext %a) {
+define half @fp16_vminnm_NNNo(half %a) {
; CHECK-LABEL: fp16_vminnm_NNNo:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp olt half %0, 12.
- %cond1 = select i1 %cmp1, half %0, half 12.
+ %cmp1 = fcmp olt half %a, 12.
+ %cond1 = select i1 %cmp1, half %a, half 12.
%cmp2 = fcmp olt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2
}
-define half @fp16_vminnm_NNNo_rev(i16 signext %a) {
+define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-LABEL: fp16_vminnm_NNNo_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI13_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -270,50 +239,48 @@
; CHECK-NEXT: .LCPI13_1:
; CHECK-NEXT: .short 0x54e0 @ half 78
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp ogt half %0, 56.
- %cond1 = select i1 %cmp1, half 56., half %0
+ %cmp1 = fcmp ogt half %a, 56.
+ %cond1 = select i1 %cmp1, half 56., half %a
%cmp2 = fcmp ogt half 78., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 78.
ret half %cond2
}
-define half @fp16_vminnm_NNNu(i16 signext %b) {
+define half @fp16_vminnm_NNNu(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNu:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI14_0:
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
- %0 = bitcast i16 %b to half
- %cmp1 = fcmp ult half 12., %0
- %cond1 = select i1 %cmp1, half 12., half %0
+ %cmp1 = fcmp ult half 12., %b
+ %cond1 = select i1 %cmp1, half 12., half %b
%cmp2 = fcmp ult half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2
}
-define half @fp16_vminnm_NNNule(i16 signext %b) {
+define half @fp16_vminnm_NNNule(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNule:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -323,25 +290,24 @@
; CHECK-NEXT: .short 0x5300 @ half 56
entry:
- %0 = bitcast i16 %b to half
- %cmp1 = fcmp ule half 34., %0
- %cond1 = select i1 %cmp1, half 34., half %0
+ %cmp1 = fcmp ule half 34., %b
+ %cond1 = select i1 %cmp1, half 34., half %b
%cmp2 = fcmp ule half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2
}
-define half @fp16_vminnm_NNNu_rev(i16 signext %b) {
+define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-LABEL: fp16_vminnm_NNNu_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI16_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -352,50 +318,48 @@
entry:
- %0 = bitcast i16 %b to half
- %cmp1 = fcmp ugt half 56., %0
- %cond1 = select i1 %cmp1, half %0, half 56.
+ %cmp1 = fcmp ugt half 56., %b
+ %cond1 = select i1 %cmp1, half %b, half 56.
%cmp2 = fcmp ugt half %cond1, 78.
%cond2 = select i1 %cmp2, half 78., half %cond1
ret half %cond2
}
-define half @fp16_vmaxnm_NNNo(i16 signext %a) {
+define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNo:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI17_0:
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp ogt half %0, 12.
- %cond1 = select i1 %cmp1, half %0, half 12.
+ %cmp1 = fcmp ogt half %a, 12.
+ %cond1 = select i1 %cmp1, half %a, half 12.
%cmp2 = fcmp ogt half 34., %cond1
%cond2 = select i1 %cmp2, half 34., half %cond1
ret half %cond2
}
-define half @fp16_vmaxnm_NNNoge(i16 signext %a) {
+define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNoge:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -404,25 +368,24 @@
; CHECK-NEXT: .LCPI18_1:
; CHECK-NEXT: .short 0x5300 @ half 56
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp oge half %0, 34.
- %cond1 = select i1 %cmp1, half %0, half 34.
+ %cmp1 = fcmp oge half %a, 34.
+ %cond1 = select i1 %cmp1, half %a, half 34.
%cmp2 = fcmp oge half 56., %cond1
%cond2 = select i1 %cmp2, half 56., half %cond1
ret half %cond2
}
-define half @fp16_vmaxnm_NNNo_rev(i16 signext %a) {
+define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNo_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI19_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -431,25 +394,24 @@
; CHECK-NEXT: .LCPI19_1:
; CHECK-NEXT: .short 0x54e0 @ half 78
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp olt half %0, 56.
- %cond1 = select i1 %cmp1, half 56., half %0
+ %cmp1 = fcmp olt half %a, 56.
+ %cond1 = select i1 %cmp1, half 56., half %a
%cmp2 = fcmp olt half 78., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 78.
ret half %cond2
}
-define half @fp16_vmaxnm_NNNole_rev(i16 signext %a) {
+define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-LABEL: fp16_vmaxnm_NNNole_rev:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI20_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -458,50 +420,48 @@
; CHECK-NEXT: .LCPI20_1:
; CHECK-NEXT: .short 0x55a0 @ half 90
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp ole half %0, 78.
- %cond1 = select i1 %cmp1, half 78., half %0
+ %cmp1 = fcmp ole half %a, 78.
+ %cond1 = select i1 %cmp1, half 78., half %a
%cmp2 = fcmp ole half 90., %cond1
%cond2 = select i1 %cmp2, half %cond1, half 90.
ret half %cond2
}
-define half @fp16_vmaxnm_NNNu(i16 signext %b) {
+define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK-LABEL: fp16_vmaxnm_NNNu:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI21_0:
; CHECK-NEXT: .short 0x5040 @ half 34
entry:
- %0 = bitcast i16 %b to half
- %cmp1 = fcmp ugt half 12., %0
- %cond1 = select i1 %cmp1, half 12., half %0
+ %cmp1 = fcmp ugt half 12., %b
+ %cond1 = select i1 %cmp1, half 12., half %b
%cmp2 = fcmp ugt half %cond1, 34.
%cond2 = select i1 %cmp2, half %cond1, half 34.
ret half %cond2
}
-define half @fp16_vmaxnm_NNNuge(i16 signext %b) {
+define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK-LABEL: fp16_vmaxnm_NNNuge:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
@@ -510,81 +470,77 @@
; CHECK-NEXT: .LCPI22_1:
; CHECK-NEXT: .short 0x5300 @ half 56
entry:
- %0 = bitcast i16 %b to half
- %cmp1 = fcmp uge half 34., %0
- %cond1 = select i1 %cmp1, half 34., half %0
+ %cmp1 = fcmp uge half 34., %b
+ %cond1 = select i1 %cmp1, half 34., half %b
%cmp2 = fcmp uge half %cond1, 56.
%cond2 = select i1 %cmp2, half %cond1, half 56.
ret half %cond2
}
-define half @fp16_vminmaxnm_neg0(i16 signext %a) {
+define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vminnm.f16 s2, s2, s0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .short 0x8000 @ half -0
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp olt half %0, -0.
- %cond1 = select i1 %cmp1, half %0, half -0.
+ %cmp1 = fcmp olt half %a, -0.
+ %cond1 = select i1 %cmp1, half %a, half -0.
%cmp2 = fcmp ugt half %cond1, -0.
%cond2 = select i1 %cmp2, half %cond1, half -0.
ret half %cond2
}
-define half @fp16_vminmaxnm_e_0(i16 signext %a) {
+define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vldr.16 s2, .LCPI24_0
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI24_0:
; CHECK-NEXT: .short 0x0000 @ half 0
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp nsz ole half 0., %0
- %cond1 = select i1 %cmp1, half 0., half %0
+ %cmp1 = fcmp nsz ole half 0., %a
+ %cond1 = select i1 %cmp1, half 0., half %a
%cmp2 = fcmp nsz uge half 0., %cond1
%cond2 = select i1 %cmp2, half 0., half %cond1
ret half %cond2
}
-define half @fp16_vminmaxnm_e_neg0(i16 signext %a) {
+define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vmov.f16 s2, r0
; CHECK-NEXT: vminnm.f16 s2, s2, s0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI25_0:
; CHECK-NEXT: .short 0x8000 @ half -0
entry:
- %0 = bitcast i16 %a to half
- %cmp1 = fcmp nsz ule half -0., %0
- %cond1 = select i1 %cmp1, half -0., half %0
+ %cmp1 = fcmp nsz ule half -0., %a
+ %cond1 = select i1 %cmp1, half -0., half %a
%cmp2 = fcmp nsz oge half -0., %cond1
%cond2 = select i1 %cmp2, half -0., half %cond1
ret half %cond2
Index: llvm/test/CodeGen/ARM/fp16-promote.ll
===================================================================
--- llvm/test/CodeGen/ARM/fp16-promote.ll
+++ llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -933,7 +933,6 @@
}
; CHECK-ALL-LABEL: test_struct_return:
-; CHECK-FP16: vcvtb.f32.f16
; CHECK-VFP-LIBCALL: bl __aeabi_h2f
; CHECK-NOVFP-DAG: ldr
; CHECK-NOVFP-DAG: ldrh
Index: llvm/test/CodeGen/ARM/fp16-bitcast.ll
===================================================================
--- llvm/test/CodeGen/ARM/fp16-bitcast.ll
+++ llvm/test/CodeGen/ARM/fp16-bitcast.ll
@@ -1,71 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4
-; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
+; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi --float-abi=soft -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4-SOFT
+; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi --float-abi=soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16-SOFT
+; RUN: llc -mtriple thumbv8m.main-arm-unknown-eabi --float-abi=hard -mattr=+vfp4d16sp < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-VFPV4-HARD
+; RUN: llc -mtriple thumbv8.1m.main-arm-unknown-eabi --float-abi=hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16-HARD
target triple = "thumbv8.1m.main-arm-unknown-eabi"
define float @add(float %a, float %b) {
-; CHECK-LABEL: add:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov s0, r1
-; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vadd.f32 s0, s2, s0
-; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: bx lr
+; CHECK-VFPV4-SOFT-LABEL: add:
+; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
+; CHECK-VFPV4-SOFT-NEXT: vmov s0, r1
+; CHECK-VFPV4-SOFT-NEXT: vmov s2, r0
+; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s2, s0
+; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
+; CHECK-VFPV4-SOFT-NEXT: bx lr
+;
+; CHECK-FP16-SOFT-LABEL: add:
+; CHECK-FP16-SOFT: @ %bb.0: @ %entry
+; CHECK-FP16-SOFT-NEXT: vmov s0, r1
+; CHECK-FP16-SOFT-NEXT: vmov s2, r0
+; CHECK-FP16-SOFT-NEXT: vadd.f32 s0, s2, s0
+; CHECK-FP16-SOFT-NEXT: vmov r0, s0
+; CHECK-FP16-SOFT-NEXT: bx lr
+;
+; CHECK-VFPV4-HARD-LABEL: add:
+; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
+; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s0, s1
+; CHECK-VFPV4-HARD-NEXT: bx lr
+;
+; CHECK-FP16-HARD-LABEL: add:
+; CHECK-FP16-HARD: @ %bb.0: @ %entry
+; CHECK-FP16-HARD-NEXT: vadd.f32 s0, s0, s1
+; CHECK-FP16-HARD-NEXT: bx lr
entry:
%add = fadd float %a, %b
ret float %add
}
-define i32 @addf16(i32 %a.coerce, i32 %b.coerce) {
-; CHECK-VFPV4-LABEL: addf16:
-; CHECK-VFPV4: @ %bb.0: @ %entry
-; CHECK-VFPV4-NEXT: vmov s2, r1
-; CHECK-VFPV4-NEXT: vmov s0, r0
-; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2
-; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-VFPV4-NEXT: vadd.f32 s0, s0, s2
-; CHECK-VFPV4-NEXT: vcvtb.f16.f32 s0, s0
-; CHECK-VFPV4-NEXT: vmov r0, s0
-; CHECK-VFPV4-NEXT: uxth r0, r0
-; CHECK-VFPV4-NEXT: bx lr
+define half @addf16(half %a, half %b) {
+; CHECK-VFPV4-SOFT-LABEL: addf16:
+; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
+; CHECK-VFPV4-SOFT-NEXT: vmov s2, r1
+; CHECK-VFPV4-SOFT-NEXT: vmov s0, r0
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s0, s2
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
+; CHECK-VFPV4-SOFT-NEXT: bx lr
;
-; CHECK-FP16-LABEL: addf16:
-; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: vmov.f16 s0, r1
-; CHECK-FP16-NEXT: vmov.f16 s2, r0
-; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0
-; CHECK-FP16-NEXT: vmov.f16 r0, s0
-; CHECK-FP16-NEXT: bx lr
+; CHECK-FP16-SOFT-LABEL: addf16:
+; CHECK-FP16-SOFT: @ %bb.0: @ %entry
+; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, r1
+; CHECK-FP16-SOFT-NEXT: vmov.f16 s2, r0
+; CHECK-FP16-SOFT-NEXT: vadd.f16 s0, s2, s0
+; CHECK-FP16-SOFT-NEXT: vmov r0, s0
+; CHECK-FP16-SOFT-NEXT: bx lr
+;
+; CHECK-VFPV4-HARD-LABEL: addf16:
+; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
+; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s2, s1
+; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s0, s2
+; CHECK-VFPV4-HARD-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-VFPV4-HARD-NEXT: bx lr
+;
+; CHECK-FP16-HARD-LABEL: addf16:
+; CHECK-FP16-HARD: @ %bb.0: @ %entry
+; CHECK-FP16-HARD-NEXT: vadd.f16 s0, s0, s1
+; CHECK-FP16-HARD-NEXT: bx lr
entry:
- %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
- %0 = bitcast i16 %tmp.0.extract.trunc to half
- %tmp1.0.extract.trunc = trunc i32 %b.coerce to i16
- %1 = bitcast i16 %tmp1.0.extract.trunc to half
- %add = fadd half %0, %1
- %2 = bitcast half %add to i16
- %tmp4.0.insert.ext = zext i16 %2 to i32
- ret i32 %tmp4.0.insert.ext
+ %add = fadd half %a, %b
+ ret half %add
}
define half @load_i16(i16 *%hp) {
-; CHECK-VFPV4-LABEL: load_i16:
-; CHECK-VFPV4: @ %bb.0: @ %entry
-; CHECK-VFPV4-NEXT: vmov.f32 s0, #1.000000e+00
-; CHECK-VFPV4-NEXT: ldrh r0, [r0]
-; CHECK-VFPV4-NEXT: vmov s2, r0
-; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2
-; CHECK-VFPV4-NEXT: vadd.f32 s0, s2, s0
-; CHECK-VFPV4-NEXT: vmov r0, s0
-; CHECK-VFPV4-NEXT: bx lr
+; CHECK-VFPV4-SOFT-LABEL: load_i16:
+; CHECK-VFPV4-SOFT: @ %bb.0: @ %entry
+; CHECK-VFPV4-SOFT-NEXT: vmov.f32 s0, #1.000000e+00
+; CHECK-VFPV4-SOFT-NEXT: ldrh r0, [r0]
+; CHECK-VFPV4-SOFT-NEXT: vmov s2, r0
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-VFPV4-SOFT-NEXT: vadd.f32 s0, s2, s0
+; CHECK-VFPV4-SOFT-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-VFPV4-SOFT-NEXT: vmov r0, s0
+; CHECK-VFPV4-SOFT-NEXT: bx lr
+;
+; CHECK-FP16-SOFT-LABEL: load_i16:
+; CHECK-FP16-SOFT: @ %bb.0: @ %entry
+; CHECK-FP16-SOFT-NEXT: vldr.16 s2, [r0]
+; CHECK-FP16-SOFT-NEXT: vmov.f16 s0, #1.000000e+00
+; CHECK-FP16-SOFT-NEXT: vadd.f16 s0, s2, s0
+; CHECK-FP16-SOFT-NEXT: vmov r0, s0
+; CHECK-FP16-SOFT-NEXT: bx lr
+;
+; CHECK-VFPV4-HARD-LABEL: load_i16:
+; CHECK-VFPV4-HARD: @ %bb.0: @ %entry
+; CHECK-VFPV4-HARD-NEXT: vmov.f32 s0, #1.000000e+00
+; CHECK-VFPV4-HARD-NEXT: ldrh r0, [r0]
+; CHECK-VFPV4-HARD-NEXT: vmov s2, r0
+; CHECK-VFPV4-HARD-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-VFPV4-HARD-NEXT: vadd.f32 s0, s2, s0
+; CHECK-VFPV4-HARD-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-VFPV4-HARD-NEXT: bx lr
;
-; CHECK-FP16-LABEL: load_i16:
-; CHECK-FP16: @ %bb.0: @ %entry
-; CHECK-FP16-NEXT: vldr.16 s2, [r1]
-; CHECK-FP16-NEXT: vmov.f16 s0, #1.000000e+00
-; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0
-; CHECK-FP16-NEXT: vstr.16 s0, [r0]
-; CHECK-FP16-NEXT: bx lr
+; CHECK-FP16-HARD-LABEL: load_i16:
+; CHECK-FP16-HARD: @ %bb.0: @ %entry
+; CHECK-FP16-HARD-NEXT: vldr.16 s2, [r0]
+; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+00
+; CHECK-FP16-HARD-NEXT: vadd.f16 s0, s2, s0
+; CHECK-FP16-HARD-NEXT: bx lr
entry:
%h = load i16, i16 *%hp, align 2
%hc = bitcast i16 %h to half
Index: llvm/test/CodeGen/ARM/fp16-args.ll
===================================================================
--- llvm/test/CodeGen/ARM/fp16-args.ll
+++ llvm/test/CodeGen/ARM/fp16-args.ll
@@ -1,41 +1,46 @@
-; RUN: llc -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
-; RUN: llc -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
+; RUN: llc -mtriple=armv7a--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-SOFT
+; RUN: llc -mtriple=armv7aeb--none-eabi -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FULL-HARD
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "armv7a--none-eabi"
-
-define float @foo(float %a.coerce, float %b.coerce) {
+define half @foo(half %a, half %b) {
+; SOFT-LABEL: foo:
+; SOFT: @ %bb.0: @ %entry
+; SOFT-NEXT: vmov s2, r1
+; SOFT-NEXT: vmov s0, r0
+; SOFT-NEXT: vcvtb.f32.f16 s2, s2
+; SOFT-NEXT: vcvtb.f32.f16 s0, s0
+; SOFT-NEXT: vadd.f32 s0, s0, s2
+; SOFT-NEXT: vcvtb.f16.f32 s0, s0
+; SOFT-NEXT: vmov r0, s0
+; SOFT-NEXT: bx lr
+;
+; HARD-LABEL: foo:
+; HARD: @ %bb.0: @ %entry
+; HARD-NEXT: vcvtb.f32.f16 s2, s1
+; HARD-NEXT: vcvtb.f32.f16 s0, s0
+; HARD-NEXT: vadd.f32 s0, s0, s2
+; HARD-NEXT: vcvtb.f16.f32 s0, s0
+; HARD-NEXT: bx lr
+;
+; FULL-SOFT-LABEL: foo:
+; FULL-SOFT: @ %bb.0: @ %entry
+; FULL-SOFT-NEXT: vmov.f16 s0, r1
+; FULL-SOFT-NEXT: vmov.f16 s2, r0
+; FULL-SOFT-NEXT: vadd.f16 s0, s2, s0
+; FULL-SOFT-NEXT: vmov r0, s0
+; FULL-SOFT-NEXT: bx lr
+;
+; FULL-HARD-LABEL: foo:
+; FULL-HARD: @ %bb.0: @ %entry
+; FULL-HARD-NEXT: vadd.f16 s0, s0, s1
+; FULL-HARD-NEXT: bx lr
entry:
- %0 = bitcast float %a.coerce to i32
- %tmp.0.extract.trunc = trunc i32 %0 to i16
- %1 = bitcast i16 %tmp.0.extract.trunc to half
- %2 = bitcast float %b.coerce to i32
- %tmp1.0.extract.trunc = trunc i32 %2 to i16
- %3 = bitcast i16 %tmp1.0.extract.trunc to half
- %4 = fadd half %1, %3
- %5 = bitcast half %4 to i16
- %tmp5.0.insert.ext = zext i16 %5 to i32
- %6 = bitcast i32 %tmp5.0.insert.ext to float
- ret float %6
-; CHECK: foo:
-
-; SOFT: vmov {{s[0-9]+}}, r1
-; SOFT: vmov {{s[0-9]+}}, r0
-; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
-; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
-; SOFT: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; SOFT: vcvtb.f16.f32 {{s[0-9]+}}, {{s[0-9]+}}
-; SOFT: vmov r0, {{s[0-9]+}}
-
-; HARD-NOT: vmov
-; HARD-NOT: uxth
-; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s1
-; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s0
-; HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; HARD: vcvtb.f16.f32 [[SREG:s[0-9]+]], {{s[0-9]+}}
-; HARD-NEXT: vmov [[REG0:r[0-9]+]], [[SREG]]
-; HARD-NEXT: uxth [[REG1:r[0-9]+]], [[REG0]]
-; HARD-NEXT: vmov s0, [[REG1]]
-
-; CHECK: bx lr
+ %0 = fadd half %a, %b
+ ret half %0
}
Index: llvm/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
===================================================================
--- llvm/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
+++ llvm/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
@@ -49,7 +49,7 @@
}
define half @test_half(half %a, half %b) {
-; CHECK: remark: {{.*}} unable to translate instruction: ret: ' ret half %res' (in function: test_half)
+; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* (in function: test_half)
; CHECK-LABEL: warning: Instruction selection used fallback path for test_half
%res = fadd half %a, %b
ret half %res
Index: llvm/lib/Target/ARM/ARMISelLowering.h
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.h
+++ llvm/lib/Target/ARM/ARMISelLowering.h
@@ -806,6 +806,17 @@
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+ bool
+ splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const override;
+
+ SDValue
+ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT,
+ Optional<CallingConv::ID> CC) const override;
+
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
Index: llvm/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2024,7 +2024,8 @@
}
SDValue Val;
- if (VA.needsCustom()) {
+ if (VA.needsCustom() &&
+ (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
@@ -2073,6 +2074,17 @@
break;
}
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
+ assert(Subtarget->hasFullFP16() &&
+ "Lowering f16 type return without full fp16 support");
+ Val = DAG.getNode(ISD::BITCAST, dl,
+ MVT::getIntegerVT(VA.getLocVT().getSizeInBits()), Val);
+ Val = DAG.getNode(ARMISD::VMOVhr, dl, VA.getValVT(), Val);
+ }
+
InVals.push_back(Val);
}
@@ -2241,31 +2253,40 @@
break;
}
- // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
- if (VA.needsCustom()) {
- if (VA.getLocVT() == MVT::v2f64) {
- SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
- DAG.getConstant(1, dl, MVT::i32));
-
- PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
- VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
-
- VA = ArgLocs[++i]; // skip ahead to next loc
- if (VA.isRegLoc()) {
- PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
- VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
- } else {
- assert(VA.isMemLoc());
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
+ assert(Subtarget->hasFullFP16() &&
+ "Lowering f16 type argument without full fp16 support");
+ Arg = DAG.getNode(ARMISD::VMOVrh, dl,
+ MVT::getIntegerVT(VA.getLocVT().getSizeInBits()), Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ }
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
- dl, DAG, VA, Flags));
- }
- } else {
- PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(
+ LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
}
+ } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
} else if (VA.isRegLoc()) {
if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i32) {
@@ -2755,7 +2776,7 @@
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
- if (VA.needsCustom()) {
+ if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
@@ -2907,7 +2928,8 @@
break;
}
- if (VA.needsCustom()) {
+ if (VA.needsCustom() &&
+ (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
if (VA.getLocVT() == MVT::v2f64) {
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
@@ -2915,15 +2937,15 @@
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 0 : 1),
- Flag);
+ Chain =
+ DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 1 : 0),
- Flag);
+ Chain =
+ DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
@@ -2937,14 +2959,12 @@
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 0 : 1),
- Flag);
+ fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 1 : 0),
- Flag);
+ fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
@@ -4080,6 +4100,40 @@
AFI->setVarArgsFrameIndex(FrameIndex);
}
+bool ARMTargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.hasValue();
+ EVT ValueVT = Val.getValueType();
+ if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ Parts[0] = Val;
+ return true;
+ }
+ return false;
+}
+
+SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
+ SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.hasValue();
+ if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ SDValue Val = Parts[0];
+
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ return Val;
+ }
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4152,33 +4206,29 @@
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- if (VA.needsCustom()) {
+ if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
// f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots.
- if (VA.getLocVT() == MVT::v2f64) {
- SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
- Chain, DAG, dl);
- VA = ArgLocs[++i]; // skip ahead to next loc
- SDValue ArgValue2;
- if (VA.isMemLoc()) {
- int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FI));
- } else {
- ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
- Chain, DAG, dl);
- }
- ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
- ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
- ArgValue, ArgValue1,
- DAG.getIntPtrConstant(0, dl));
- ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
- ArgValue, ArgValue2,
- DAG.getIntPtrConstant(1, dl));
- } else
- ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ SDValue ArgValue1 =
+ GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
+ ArgValue1, DAG.getIntPtrConstant(0, dl));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
+ ArgValue2, DAG.getIntPtrConstant(1, dl));
+ } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else {
const TargetRegisterClass *RC;
@@ -4229,6 +4279,18 @@
break;
}
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
+ assert(Subtarget->hasFullFP16() &&
+ "Lowering f16 type argument without full fp16 support");
+ ArgValue = DAG.getNode(ISD::BITCAST, dl,
+ MVT::getIntegerVT(VA.getLocVT().getSizeInBits()),
+ ArgValue);
+ ArgValue = DAG.getNode(ARMISD::VMOVhr, dl, VA.getValVT(), ArgValue);
+ }
+
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
// sanity check
Index: llvm/lib/Target/ARM/ARMCallingConv.td
===================================================================
--- llvm/lib/Target/ARM/ARMCallingConv.td
+++ llvm/lib/Target/ARM/ARMCallingConv.td
@@ -139,7 +139,7 @@
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>,
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
- CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f16, f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
CCIfType<[v2f64], CCIfAlign<"16",
CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
@@ -176,6 +176,7 @@
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
@@ -193,6 +194,7 @@
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
@@ -224,6 +226,7 @@
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>,
+ CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
@@ -242,7 +245,8 @@
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
- S9, S10, S11, S12, S13, S14, S15]>>,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCIfType<[f16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
Index: llvm/lib/Target/ARM/ARMCallingConv.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -278,5 +278,33 @@
return true;
}
+static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, CCState &State,
+ ArrayRef<MCPhysReg> RegList) {
+ unsigned Reg = State.AllocateReg(RegList);
+ if (Reg) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+ return false;
+}
+
+static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ // f16 arguments are extended to i32 and assigned to a register in [r0, r3]
+ return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
+ RRegList);
+}
+
+static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+ // f16 arguments are extended to f32 and assigned to a register in [s0, s15]
+ return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
+ SRegList);
+}
+
// Include the table generated calling convention implementations.
#include "ARMGenCallingConv.inc"
Index: llvm/lib/Target/ARM/ARMCallLowering.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -140,7 +140,10 @@
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
- assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ // Custom lowering for other types, such as f16, is currently not supported
+ if (VA.getValVT() != MVT::f64)
+ return 0;
CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
@@ -360,7 +363,10 @@
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
- assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ // Custom lowering for other types, such as f16, is currently not supported
+ if (VA.getValVT() != MVT::f64)
+ return 0;
CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -205,12 +205,17 @@
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) {
+ // Let the target assemble the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, CC))
+ return Val;
+
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
assert(NumParts > 0 && "No parts to assemble!");
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
@@ -512,6 +517,11 @@
const Value *V,
Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ // Let the target split the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
+ CallConv))
+ return;
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -298,7 +298,11 @@
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ unsigned NumArgRegs =
+ Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ if (!NumArgRegs)
+ return false;
+ j += NumArgRegs;
continue;
}
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3600,6 +3600,24 @@
// the SelectionDAGBuilder code knows how to lower these.
//
+ /// Target-specific splitting of values into parts that fit a register
+ /// storing a legal type
+ virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const {
+ return false;
+ }
+
+ /// Target-specific combining of register parts into its original value
+ virtual SDValue
+ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT,
+ Optional<CallingConv::ID> CC) const {
+ return SDValue();
+ }
+
/// This hook must be implemented to lower the incoming (formal) arguments,
/// described by the Ins array, into the specified DAG. The implementation
/// should fill in the InVals array with legal-type argument values, and
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits