wenju-he wrote:

> As long as the software fma implementation never gets inlined anywhere, and 
> the fma entry point is the software one, that should be fine.

Yes, this is the status of `prior to 39f3d72b72fd` and `after this PR`.
I have rebuilt libclc.spv for old libclc spirv64-mesa3d- using b9cecee3fb90 
(which is prior to 39f3d72b72fd) and _Z3fmafff implementation is the same as 
after this PR.
b9cecee3fb90 :
```
define spir_func float @_Z3fmafff(float %0, float %1, float %2) #1 {
  %4 = alloca float, align 4
  %5 = alloca float, align 4
  %6 = alloca float, align 4
  store float %0, ptr %4, align 4
  store float %1, ptr %5, align 4
  store float %2, ptr %6, align 4
  %7 = load float, ptr %4, align 4
  %8 = load float, ptr %5, align 4
  %9 = load float, ptr %6, align 4
  %10 = call spir_func float @_Z12__clc_sw_fmafff(float %7, float %8, float %9) 
#1
  ret float %10
}
```
after this PR:
```
define spir_func float @_Z3fmafff(float %0, float %1, float %2) #0 {
  %4 = alloca float, align 4
  %5 = alloca float, align 4
  %6 = alloca float, align 4
  store float %0, ptr %4, align 4
  store float %1, ptr %5, align 4
  store float %2, ptr %6, align 4
  %7 = load float, ptr %4, align 4
  %8 = load float, ptr %5, align 4
  %9 = load float, ptr %6, align 4
  %10 = call spir_func float @_Z12__clc_sw_fmafff(float %7, float %8, float %9) 
#0
  ret float %10
}
```

Grep of all fma symbols:
b9cecee3fb90:
```
grep "@_Z3fma" libclc.spv.ll
41194:  %10 = call spir_func double @_Z3fmaddd(double %7, double %8, double %9) 
#0
41458:  %10 = call spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %7, <2 
x double> %8, <2 x double> %9) #0
41915:  %22 = call spir_func <3 x double> @_Z3fmaDv3_dS_S_(<3 x double> %15, <3 
x double> %18, <3 x double> %21) #0
42192:  %10 = call spir_func <4 x double> @_Z3fmaDv4_dS_S_(<4 x double> %7, <4 
x double> %8, <4 x double> %9) #0
42465:  %10 = call spir_func <8 x double> @_Z3fmaDv8_dS_S_(<8 x double> %7, <8 
x double> %8, <8 x double> %9) #0
42738:  %10 = call spir_func <16 x double> @_Z3fmaDv16_dS_S_(<16 x double> %7, 
<16 x double> %8, <16 x double> %9) #0
98109:  %18 = call spir_func float @_Z3fmafff(float %15, float %16, float %17) 
#0
100160:  %18 = call spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %15, <2 
x float> %16, <2 x float> %17) #0
101779:  %36 = call spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %29, <3 
x float> %32, <3 x float> %35) #0
103900:  %18 = call spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %15, <4 
x float> %16, <4 x float> %17) #0
105269:  %18 = call spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %15, <8 
x float> %16, <8 x float> %17) #0
106670:  %18 = call spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %15, 
<16 x float> %16, <16 x float> %17) #0
139558:define spir_func float @_Z3fmafff(float %0, float %1, float %2) #1 {
139573:define spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %0, <2 x 
float> %1, <2 x float> %2) #1 {
139588:define spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %0, <3 x 
float> %1, <3 x float> %2) #1 {
139615:define spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %0, <4 x 
float> %1, <4 x float> %2) #1 {
139630:define spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %0, <8 x 
float> %1, <8 x float> %2) #1 {
139645:define spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %0, <16 x 
float> %1, <16 x float> %2) #1 {
311098:declare spir_func double @_Z3fmaddd(double, double, double) #4
311104:declare spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double>, <2 x 
double>, <2 x double>) #4
311113:declare spir_func <3 x double> @_Z3fmaDv3_dS_S_(<3 x double>, <3 x 
double>, <3 x double>) #4
311122:declare spir_func <4 x double> @_Z3fmaDv4_dS_S_(<4 x double>, <4 x 
double>, <4 x double>) #4
311131:declare spir_func <8 x double> @_Z3fmaDv8_dS_S_(<8 x double>, <8 x 
double>, <8 x double>) #4
311140:declare spir_func <16 x double> @_Z3fmaDv16_dS_S_(<16 x double>, <16 x 
double>, <16 x double>) #4
```
double fma symbols are external and used in __clc_fma. These double fma symbols 
are represented as OpenCL fma ExtInst in libclc.spv.
```
define internal spir_func <2 x double> @_Z9__clc_fmaDv2_dS_S_(<2 x double> %0, 
<2 x double> %1, <2 x double> %2) #1 {
  %4 = alloca <2 x double>, align 16
  %5 = alloca <2 x double>, align 16
  %6 = alloca <2 x double>, align 16
  store <2 x double> %0, ptr %4, align 16
  store <2 x double> %1, ptr %5, align 16
  store <2 x double> %2, ptr %6, align 16
  %7 = load <2 x double>, ptr %4, align 16
  %8 = load <2 x double>, ptr %5, align 16
  %9 = load <2 x double>, ptr %6, align 16
  %10 = call spir_func <2 x double> @_Z3fmaDv2_dS_S_(<2 x double> %7, <2 x 
double> %8, <2 x double> %9) #0
  ret <2 x double> %10
}
```
after this PR:
```
grep "@_Z3fma" libclc.spv.ll
64:define spir_func float @_Z3fmafff(float %0, float %1, float %2) #0 {
1388:define spir_func <2 x float> @_Z3fmaDv2_fS_S_(<2 x float> %0, <2 x float> 
%1, <2 x float> %2) #0 {
1433:define spir_func <3 x float> @_Z3fmaDv3_fS_S_(<3 x float> %0, <3 x float> 
%1, <3 x float> %2) #0 {
1508:define spir_func <4 x float> @_Z3fmaDv4_fS_S_(<4 x float> %0, <4 x float> 
%1, <4 x float> %2) #0 {
1569:define spir_func <8 x float> @_Z3fmaDv8_fS_S_(<8 x float> %0, <8 x float> 
%1, <8 x float> %2) #0 {
1662:define spir_func <16 x float> @_Z3fmaDv16_fS_S_(<16 x float> %0, <16 x 
float> %1, <16 x float> %2) #0 {
```
_Z9__clc_fmaDv2_dS_S_ is implemented using @llvm.fma.v2f64, which is 
represented as OpFmaKHR ext instruction in libclc.spv.
```
define internal spir_func <2 x double> @_Z9__clc_fmaDv2_dS_S_(<2 x double> %0, 
<2 x double> %1, <2 x double> %2) #0 {
  %4 = alloca <2 x double>, align 16
  %5 = alloca <2 x double>, align 16
  %6 = alloca <2 x double>, align 16
  store <2 x double> %0, ptr %4, align 16
  store <2 x double> %1, ptr %5, align 16
  store <2 x double> %2, ptr %6, align 16
  %7 = load <2 x double>, ptr %4, align 16
  %8 = load <2 x double>, ptr %5, align 16
  %9 = load <2 x double>, ptr %6, align 16
  %10 = call <2 x double> @llvm.fma.v2f64(<2 x double> %7, <2 x double> %8, <2 
x double> %9)
  ret <2 x double> %10
}
```
So the status of fma in mesa libclc.spv are restored in this PR.


https://github.com/llvm/llvm-project/pull/199626
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to