On Sat, Jun 20, 2020 at 3:12 PM H.J. Lu <[email protected]> wrote:
>
> On Sat, Jun 20, 2020 at 11:37 AM Yichao Yu <[email protected]> wrote:
> >
> > On Sat, Jun 20, 2020 at 1:54 PM Yichao Yu <[email protected]> wrote:
> > >
> > > > > The current logic seems to be comparing the whole attribute tree
> > > > > between the callee
> > > > > and caller (or at least the tree starting from the target attribute).
> > > > > This is unnecessary and causes strange dependency of the indirection
> > > > > elimination on unrelated properties like `noinline`(PR95780) and
> > > > > `visibility`(PR95778).
> > > >
> > > > Does it fix PR95780 and PR95778? Can you include testcases for them?
> >
> > OK so replacing
> > https://github.com/gcc-mirror/gcc/commit/b8ce8129a560f64f8b2855c4a3812b7c3c0ebf3f#diff-e2d535917af8555baad2e9c8749e96a5
> > with/adding to the test the following one should work. I still
> > couldn't get test to run though......
> >
> Can you try the enclosed testcases?
The assembly produced are the following with my patch and if I
understand it correctly those should work. Unfortunately I don't know
how to actually run the test as a test (if that makes sense....).
yuyichao% PATH=~/projects/contrib/gcc/host-x86_64-pc-linux-gnu/gcc
~/projects/contrib/gcc/host-
x86_64-pc-linux-gnu/gcc/xg++ -O3 -fPIC pr95778-1.c -S -o -
-fno-asynchronous-unwind-tables -g0
-fno-exceptions
.file "pr95778-1.c"
.text
.p2align 4
.type _ZL2f2Pi.default.1, @function
_ZL2f2Pi.default.1:
movl (%rdi), %eax
ret
.size _ZL2f2Pi.default.1, .-_ZL2f2Pi.default.1
.p2align 4
.type _Z2g2Pi.default.1, @function
_Z2g2Pi.default.1:
jmp _ZL2f2Pi.default.1
.size _Z2g2Pi.default.1, .-_Z2g2Pi.default.1
.p2align 4
.type _ZL2f2Pi.avx2.0, @function
_ZL2f2Pi.avx2.0:
movl (%rdi), %eax
ret
.size _ZL2f2Pi.avx2.0, .-_ZL2f2Pi.avx2.0
.p2align 4
.type _Z2g2Pi.avx2.0, @function
_Z2g2Pi.avx2.0:
jmp _ZL2f2Pi.avx2.0
.size _Z2g2Pi.avx2.0, .-_Z2g2Pi.avx2.0
.section
.text._Z2g2Pi.resolver,"axG",@progbits,_Z2g2Pi.resolver,comdat
.p2align 4
.weak _Z2g2Pi.resolver
.type _Z2g2Pi.resolver, @function
_Z2g2Pi.resolver:
subq $8, %rsp
call __cpu_indicator_init@PLT
movq __cpu_model@GOTPCREL(%rip), %rax
leaq _Z2g2Pi.avx2.0(%rip), %rdx
testb $4, 13(%rax)
leaq _Z2g2Pi.default.1(%rip), %rax
cmovne %rdx, %rax
addq $8, %rsp
ret
.size _Z2g2Pi.resolver, .-_Z2g2Pi.resolver
.globl _Z2g2Pi
.type _Z2g2Pi, @gnu_indirect_function
.set _Z2g2Pi,_Z2g2Pi.resolver
.text
.p2align 4
.type _ZL2f2Pi.resolver, @function
_ZL2f2Pi.resolver:
subq $8, %rsp
call __cpu_indicator_init@PLT
movq __cpu_model@GOTPCREL(%rip), %rax
leaq _ZL2f2Pi.avx2.0(%rip), %rdx
testb $4, 13(%rax)
leaq _ZL2f2Pi.default.1(%rip), %rax
cmovne %rdx, %rax
addq $8, %rsp
ret
.size _ZL2f2Pi.resolver, .-_ZL2f2Pi.resolver
.ident "GCC: (GNU) 11.0.0 20200620 (experimental)"
.section .note.GNU-stack,"",@progbits
office:~
yuyichao% PATH=~/projects/contrib/gcc/host-x86_64-pc-linux-gnu/gcc
~/projects/contrib/gcc/host-
x86_64-pc-linux-gnu/gcc/xg++ -O3 -fPIC pr95778-2.c -S -o -
-fno-asynchronous-unwind-tables -g0
-fno-exceptions
.file "pr95778-2.c"
.text
.p2align 4
.type _Z2f2Pi.default.1, @function
_Z2f2Pi.default.1:
movl (%rdi), %eax
ret
.size _Z2f2Pi.default.1, .-_Z2f2Pi.default.1
.p2align 4
.type _Z2g2Pi.default.1, @function
_Z2g2Pi.default.1:
jmp _Z2f2Pi.default.1
.size _Z2g2Pi.default.1, .-_Z2g2Pi.default.1
.p2align 4
.type _Z2f2Pi.avx2.0, @function
_Z2f2Pi.avx2.0:
movl (%rdi), %eax
ret
.size _Z2f2Pi.avx2.0, .-_Z2f2Pi.avx2.0
.p2align 4
.type _Z2g2Pi.avx2.0, @function
_Z2g2Pi.avx2.0:
jmp _Z2f2Pi.avx2.0
.size _Z2g2Pi.avx2.0, .-_Z2g2Pi.avx2.0
.section
.text._Z2g2Pi.resolver,"axG",@progbits,_Z2g2Pi.resolver,comdat
.p2align 4
.weak _Z2g2Pi.resolver
.type _Z2g2Pi.resolver, @function
_Z2g2Pi.resolver:
subq $8, %rsp
call __cpu_indicator_init@PLT
movq __cpu_model@GOTPCREL(%rip), %rax
leaq _Z2g2Pi.avx2.0(%rip), %rdx
testb $4, 13(%rax)
leaq _Z2g2Pi.default.1(%rip), %rax
cmovne %rdx, %rax
addq $8, %rsp
ret
.size _Z2g2Pi.resolver, .-_Z2g2Pi.resolver
.globl _Z2g2Pi
.type _Z2g2Pi, @gnu_indirect_function
.set _Z2g2Pi,_Z2g2Pi.resolver
.section
.text._Z2f2Pi.resolver,"axG",@progbits,_Z2f2Pi.resolver,comdat
.p2align 4
.weak _Z2f2Pi.resolver
.type _Z2f2Pi.resolver, @function
_Z2f2Pi.resolver:
subq $8, %rsp
call __cpu_indicator_init@PLT
movq __cpu_model@GOTPCREL(%rip), %rax
leaq _Z2f2Pi.avx2.0(%rip), %rdx
testb $4, 13(%rax)
leaq _Z2f2Pi.default.1(%rip), %rax
cmovne %rdx, %rax
addq $8, %rsp
ret
.size _Z2f2Pi.resolver, .-_Z2f2Pi.resolver
.globl _Z2f2Pi
.type _Z2f2Pi, @gnu_indirect_function
.set _Z2f2Pi,_Z2f2Pi.resolver
.ident "GCC: (GNU) 11.0.0 20200620 (experimental)"
.section .note.GNU-stack,"",@progbits
>
> >
> > >
> > > Yes it at least partially fixes them. As mentioned in the other email,
> > > this still does not allow inlining but maybe that should be reported
> > > as another issue?
>
> Please open a separate bug.
OK, will do. thanks.
>
> > > I need to work on the test and might need some help.
> > > Right now I'm not sure how to write such a test (I assume blaming on
> > > the old implementation should tell me where the right test is though).
> > > Also, make -k check somehow gives me some segfaults from LLVM. I was
> > > wondering if it is related to that I was using an out-of-tree non
> > > bootstrap build so I'm recompiling a bootstrap in-source build...
> > >
> > > >
> > > > > This changes the comparison to be only on the `target` attribute
> > > > > which should be
> > > > > the intent of the code.
> > > > > ---
> > > > > gcc/multiple_target.c | 4 ++--
> > > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
> > > > > index c1cfe8ff978..9eb0afd58cc 100644
> > > > > --- a/gcc/multiple_target.c
> > > > > +++ b/gcc/multiple_target.c
> > > > > @@ -483,7 +483,7 @@ redirect_to_specific_clone (cgraph_node *node)
> > > > > DECL_ATTRIBUTES
> > > > > (e->callee->decl));
> > > > >
> > > > > /* Function is not calling proper target clone. */
> > > > > - if (!attribute_list_equal (attr_target, attr_target2))
> > > > > + if (attr_target2 == NULL_TREE || !attribute_value_equal
> > > > > (attr_target, attr_target2))
> > > > > {
> > > > > while (fv2->prev != NULL)
> > > > > fv2 = fv2->prev;
> > > > > @@ -494,7 +494,7 @@ redirect_to_specific_clone (cgraph_node *node)
> > > > > cgraph_node *callee = fv2->this_node;
> > > > > attr_target2 = lookup_attribute ("target",
> > > > > DECL_ATTRIBUTES
> > > > > (callee->decl));
> > > > > - if (attribute_list_equal (attr_target, attr_target2))
> > > > > + if (attr_target2 != NULL_TREE && attribute_value_equal
> > > > > (attr_target, attr_target2))
> > > > > {
> > > > > e->redirect_callee (callee);
> > > > > cgraph_edge::redirect_call_stmt_to_callee (e);
> > > > > --
> > > > > 2.27.0
> > > > >
> > > >
> > > >
> > > > --
> > > > H.J.
>
>
>
> --
> H.J.