On Sat, 5 Nov 2022 08:40:06 +0100
Thomas Koenig <[email protected]> wrote:
> On 04.11.22 21:59, Bernhard Reutner-Fischer via Fortran wrote:
> > And not sure if fellow gfortraners would accept this attribute
> > target_clones in there in the first place..
>
> It might actually be useful. Is there any change about
> the calling sequence or anything else that should be visible
> in a Fortran module or the calling sequence?
The module interface remains the same.
And the call sequence remains the same, too.
For a user nothing changes.
An example:
module m
implicit none
contains
subroutine sub1()
!GCC$ ATTRIBUTES target_clones("avx", "sse","default") :: sub1
print *, 4321
end
end module m
This used to compiles to:
$ nm /tmp/pristine.o
U _gfortran_st_write
U _gfortran_st_write_done
U _gfortran_transfer_integer_write
0000000000000000 T __m_MOD_sub1
And now compiles to:
$ nm /tmp/new.o
U __cpu_indicator_init
U __cpu_model
U _gfortran_st_write
U _gfortran_st_write_done
U _gfortran_transfer_integer_write
0000000000000000 i __m_MOD_sub1
000000000000006e t __m_MOD_sub1.avx
0000000000000000 t __m_MOD_sub1.default
0000000000000000 W __m_MOD_sub1.resolver
00000000000000dc t __m_MOD_sub1.sse
I.e. the caller still calls __m_MOD_sub1
But this is now an ifunc, which looks at the cpu bits and dispatches to
the appropriate ISA version.
I'm attaching the assembler input for reference.
If you think that we want to add support for that attribute, i can
submit a proper patch. Just let me know please.
thanks,
.file "attr_target_clones-1.F90"
.text
.section .rodata
.align 8
.LC0:
.string
"/scratch/src/gcc-13.mine/gcc/testsuite/gfortran.dg/attr_target_clones-1.F90"
.align 4
.LC1:
.long 4321
.text
.type __m_MOD_sub1.default, @function
__m_MOD_sub1.default:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $528, %rsp
movq $.LC0, -520(%rbp)
movl $21, -512(%rbp)
movl $128, -528(%rbp)
movl $6, -524(%rbp)
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write
leaq -528(%rbp), %rax
movl $4, %edx
movl $.LC1, %esi
movq %rax, %rdi
call _gfortran_transfer_integer_write
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write_done
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size __m_MOD_sub1.default, .-__m_MOD_sub1.default
.type __m_MOD_sub1.avx, @function
__m_MOD_sub1.avx:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $528, %rsp
movq $.LC0, -520(%rbp)
movl $21, -512(%rbp)
movl $128, -528(%rbp)
movl $6, -524(%rbp)
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write
leaq -528(%rbp), %rax
movl $4, %edx
movl $.LC1, %esi
movq %rax, %rdi
call _gfortran_transfer_integer_write
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write_done
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size __m_MOD_sub1.avx, .-__m_MOD_sub1.avx
.type __m_MOD_sub1.sse, @function
__m_MOD_sub1.sse:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $528, %rsp
movq $.LC0, -520(%rbp)
movl $21, -512(%rbp)
movl $128, -528(%rbp)
movl $6, -524(%rbp)
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write
leaq -528(%rbp), %rax
movl $4, %edx
movl $.LC1, %esi
movq %rax, %rdi
call _gfortran_transfer_integer_write
leaq -528(%rbp), %rax
movq %rax, %rdi
call _gfortran_st_write_done
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size __m_MOD_sub1.sse, .-__m_MOD_sub1.sse
.section
.text.__m_MOD_sub1.resolver,"axG",@progbits,__m_MOD_sub1.resolver,comdat
.weak __m_MOD_sub1.resolver
.type __m_MOD_sub1.resolver, @function
__m_MOD_sub1.resolver:
.LFB4:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
call __cpu_indicator_init
movl __cpu_model+12(%rip), %eax
andl $512, %eax
testl %eax, %eax
jle .L5
movl $__m_MOD_sub1.avx, %eax
jmp .L4
.L5:
movl __cpu_model+12(%rip), %eax
andl $8, %eax
testl %eax, %eax
jle .L6
movl $__m_MOD_sub1.sse, %eax
jmp .L4
.L6:
movl $__m_MOD_sub1.default, %eax
.L4:
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE4:
.size __m_MOD_sub1.resolver, .-__m_MOD_sub1.resolver
.globl __m_MOD_sub1
.type __m_MOD_sub1, @gnu_indirect_function
.set __m_MOD_sub1,__m_MOD_sub1.resolver
.ident "GCC: (GNU) 13.0.0 20220916 (experimental) [master
r13-2694-g3e8c4b925a9]"
.section .note.GNU-stack,"",@progbits