https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104275
Bug ID: 104275
Summary: Os does not apply return value optimization while O2
and O3 does
Product: gcc
Version: 12.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: andij.cr at gmail dot com
Target Milestone: ---
tested from gcc 8 to gcc 11
an identity function (mark) interposed in a call stack that ends in a complex
type is reasonably elided in O2 and O3, but at Os it creates a somewhat strange
assembly.
tested on arm32 and x86_64.
for a less artificial example, where the problem still appears:
https://gcc.godbolt.org/z/GbKrGKa6f
code:
https://godbolt.org/z/v95jEvvzc
// condensed result of a constexpr trasformation.
// in this form, it would be nice if it was transparent to the value
template <typename Ts>
auto mark(Ts&& head) noexcept -> decltype(auto) {
return static_cast<Ts&&>(head);
}
#include <vector>
// generic producer of a complex type
auto generate() -> std::vector<double>;
// here is a stack of functions using mark
namespace {
// in an anonymous namespace to nudge the compiler to inline them
auto user_base() { return mark(generate()); }
auto user_mark() { return mark(user_base()); }
auto user_mark2() { return mark(user_mark()); }
auto user_mark3() { return mark(user_mark2()); }
} // namespace
// this function has a normal assembly at O2 and O3
// but a silly one at Os
auto user_mark4() { return mark(user_mark3()); }
compiled with
-std=c++17 -O2
user_mark4():
push r12
mov r12, rdi
sub rsp, 32
mov rdi, rsp
call generate()
mov rax, QWORD PTR [rsp]
mov QWORD PTR [r12], rax
mov rax, QWORD PTR [rsp+8]
mov QWORD PTR [r12+8], rax
mov rax, QWORD PTR [rsp+16]
mov QWORD PTR [r12+16], rax
add rsp, 32
mov rax, r12
pop r12
ret
compiled with
-std=c++17 -Os
user_mark4():
push r13
push r12
mov r12, rdi
push rbp
push rbx
sub rsp, 40
lea rdi, [rsp+8]
call generate()
lea rdi, [rsp+8]
mov r13, QWORD PTR [rsp+8]
mov rbp, QWORD PTR [rsp+16]
mov QWORD PTR [rsp+8], 0
mov rbx, QWORD PTR [rsp+24]
mov QWORD PTR [rsp+16], 0
mov QWORD PTR [rsp+24], 0
call std::_Vector_base<double, std::allocator<double>
>::~_Vector_base() [base object destructor]
lea rdi, [rsp+8]
mov QWORD PTR [rsp+24], 0
mov QWORD PTR [rsp+16], 0
mov QWORD PTR [rsp+8], 0
call std::_Vector_base<double, std::allocator<double>
>::~_Vector_base() [base object destructor]
lea rdi, [rsp+8]
mov QWORD PTR [rsp+24], 0
mov QWORD PTR [rsp+16], 0
mov QWORD PTR [rsp+8], 0
call std::_Vector_base<double, std::allocator<double>
>::~_Vector_base() [base object destructor]
lea rdi, [rsp+8]
mov QWORD PTR [rsp+24], 0
mov QWORD PTR [rsp+16], 0
mov QWORD PTR [rsp+8], 0
call std::_Vector_base<double, std::allocator<double>
>::~_Vector_base() [base object destructor]
mov QWORD PTR [r12], r13
lea rdi, [rsp+8]
mov QWORD PTR [r12+8], rbp
mov QWORD PTR [r12+16], rbx
mov QWORD PTR [rsp+24], 0
mov QWORD PTR [rsp+16], 0
mov QWORD PTR [rsp+8], 0
call std::_Vector_base<double, std::allocator<double>
>::~_Vector_base() [base object destructor]
add rsp, 40
mov rax, r12
pop rbx
pop rbp
pop r12
pop r13
ret