wenju-he wrote:
> As long as the software fma implementation never gets inlined anywhere, and
> the fma entry point is the software one, that should be fine.
Yes, this is the status of `prior to 39f3d72b72fd` and `after this PR`.
I have rebuilt libclc.spv for old libclc spirv64-mesa3d- using b9cecee3fb90
(which is prior to 39f3d72b72fd) and _Z3fmafff implementation is the same as
after this PR.
b9cecee3fb90 :
```
define spir_func float @_Z3fmafff(float %0, float %1, float %2) #1 {
%4 = alloca float, align 4
%5 = alloca float, align 4
%6 = alloca float, align 4
store float %0, ptr %4, align 4
store float %1, ptr %5, align 4
store float %2, ptr %6, align 4
%7 = load float, ptr %4, align 4
%8 = load float, ptr %5, align 4
%9 = load float, ptr %6, align 4
%10 = call spir_func float @_Z12__clc_sw_fmafff(float %7, float %8, float %9)
#1
ret float %10
}
```
after this PR:
```
define spir_func float @_Z3fmafff(float %0, float %1, float %2) #0 {
%4 = alloca float, align 4
%5 = alloca float, align 4
%6 = alloca float, align 4
store float %0, ptr %4, align 4
store float %1, ptr %5, align 4
store float %2, ptr %6, align 4
%7 = load float, ptr %4, align 4
%8 = load float, ptr %5, align 4
%9 = load float, ptr %6, align 4
%10 = call spir_func float @_Z12__clc_sw_fmafff(float %7, float %8, float %9)
#0
ret float %10
}
```
Grep of all fma symbols:
b9cecee3fb90:
```
grep "@_Z3fma" libclc.spv.ll
41194: %10 = call spir_func double @_Z3fmaddd(double %7, double %8, double %9)
#0
41458: %10 = call spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %7, <2
x double> %8, <2 x double> %9) #0
41915: %22 = call spir_func <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> %15, <3
x double> %18, <3 x double> %21) #0
42192: %10 = call spir_func <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> %7, <4
x double> %8, <4 x double> %9) #0
42465: %10 = call spir_func <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> %7, <8
x double> %8, <8 x double> %9) #0
42738: %10 = call spir_func <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> %7,
<16 x double> %8, <16 x double> %9) #0
98109: %18 = call spir_func float @_Z3fmafff(float %15, float %16, float %17)
#0
100160: %18 = call spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %15, <2
x float> %16, <2 x float> %17) #0
101779: %36 = call spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %29, <3
x float> %32, <3 x float> %35) #0
103900: %18 = call spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %15, <4
x float> %16, <4 x float> %17) #0
105269: %18 = call spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %15, <8
x float> %16, <8 x float> %17) #0
106670: %18 = call spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %15,
<16 x float> %16, <16 x float> %17) #0
139558:define spir_func float @_Z3fmafff(float %0, float %1, float %2) #1 {
139573:define spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %0, <2 x
float> %1, <2 x float> %2) #1 {
139588:define spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %0, <3 x
float> %1, <3 x float> %2) #1 {
139615:define spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %0, <4 x
float> %1, <4 x float> %2) #1 {
139630:define spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %0, <8 x
float> %1, <8 x float> %2) #1 {
139645:define spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %0, <16 x
float> %1, <16 x float> %2) #1 {
311098:declare spir_func double @_Z3fmaddd(double, double, double) #4
311104:declare spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double>, <2 x
double>, <2 x double>) #4
311113:declare spir_func <3 x double> @_Z3fmaDv3_dS_S_(<3 x double>, <3 x
double>, <3 x double>) #4
311122:declare spir_func <4 x double> @_Z3fmaDv4_dS_S_(<4 x double>, <4 x
double>, <4 x double>) #4
311131:declare spir_func <8 x double> @_Z3fmaDv8_dS_S_(<8 x double>, <8 x
double>, <8 x double>) #4
311140:declare spir_func <16 x double> @_Z3fmaDv16_dS_S_(<16 x double>, <16 x
double>, <16 x double>) #4
```
double fma symbols are external and used in __clc_fma. These double fma symbols
are represented as OpenCL fma ExtInst in libclc.spv.
```
define internal spir_func <2 x double> @_Z9__clc_fmaDv2_dS_S_(<2 x double> %0,
<2 x double> %1, <2 x double> %2) #1 {
%4 = alloca <2 x double>, align 16
%5 = alloca <2 x double>, align 16
%6 = alloca <2 x double>, align 16
store <2 x double> %0, ptr %4, align 16
store <2 x double> %1, ptr %5, align 16
store <2 x double> %2, ptr %6, align 16
%7 = load <2 x double>, ptr %4, align 16
%8 = load <2 x double>, ptr %5, align 16
%9 = load <2 x double>, ptr %6, align 16
%10 = call spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %7, <2 x
double> %8, <2 x double> %9) #0
ret <2 x double> %10
}
```
after this PR:
```
grep "@_Z3fma" libclc.spv.ll
64:define spir_func float @_Z3fmafff(float %0, float %1, float %2) #0 {
1388:define spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %0, <2 x float>
%1, <2 x float> %2) #0 {
1433:define spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %0, <3 x float>
%1, <3 x float> %2) #0 {
1508:define spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %0, <4 x float>
%1, <4 x float> %2) #0 {
1569:define spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %0, <8 x float>
%1, <8 x float> %2) #0 {
1662:define spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %0, <16 x
float> %1, <16 x float> %2) #0 {
```
_Z9__clc_fmaDv2_dS_S_ is implemented using @llvm.fma.v2f64, which is
represented as OpFmaKHR ext instruction in libclc.spv.
```
define internal spir_func <2 x double> @_Z9__clc_fmaDv2_dS_S_(<2 x double> %0,
<2 x double> %1, <2 x double> %2) #0 {
%4 = alloca <2 x double>, align 16
%5 = alloca <2 x double>, align 16
%6 = alloca <2 x double>, align 16
store <2 x double> %0, ptr %4, align 16
store <2 x double> %1, ptr %5, align 16
store <2 x double> %2, ptr %6, align 16
%7 = load <2 x double>, ptr %4, align 16
%8 = load <2 x double>, ptr %5, align 16
%9 = load <2 x double>, ptr %6, align 16
%10 = call <2 x double> @llvm.fma.v2f64(<2 x double> %7, <2 x double> %8, <2
x double> %9)
ret <2 x double> %10
}
```
So the status of fma in mesa libclc.spv are restored in this PR.
https://github.com/llvm/llvm-project/pull/199626
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits