https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108337
Bug ID: 108337 Summary: Misaligned memory access issues when inline assembly is used with optimization on x86_64 Product: gcc Version: 12.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: eric-bugs at omnifarious dot org Target Milestone: --- The following code works fine when compiled this way: g++ -std=c++20 -march=znver2 -static -O1 -nostartfiles -nostdlib -Wl,-emain foo.cpp but when compiled this way generates a segfault before outputting anything: g++ -std=c++20 -march=znver2 -static -O2 -nostartfiles -nostdlib -Wl,-emain foo.cpp ---------- foo.cpp --------- using val_t = unsigned long; using call_id = unsigned short; template<typename _Tp> struct remove_reference { typedef _Tp type; }; template<typename _Tp> struct remove_reference<_Tp&> { typedef _Tp type; }; template<typename _Tp> struct remove_reference<_Tp&&> { typedef _Tp type; }; template<typename _Tp> [[__nodiscard__]] constexpr _Tp&& forward(typename remove_reference<_Tp>::type& __t) noexcept { return static_cast<_Tp&&>(__t); } struct syscall_param { syscall_param(val_t v) noexcept : value(v) { } syscall_param(void *v) noexcept : value(reinterpret_cast<val_t>(v)) { static_assert(sizeof(void *) == sizeof(val_t)); } syscall_param(void const *v) noexcept : value(reinterpret_cast<val_t>(v)) { static_assert(sizeof(void *) == sizeof(val_t)); } val_t value; }; inline val_t do_syscall(call_id callnum, syscall_param const &p1) noexcept { val_t retval; asm volatile ( "syscall\n\t" :"=a"(retval) :"a"(static_cast<unsigned long>(callnum)), "D"(p1.value) :"%rcx", "%r11", "memory" ); return retval; } inline val_t do_syscall(call_id callnum, syscall_param const &p1, syscall_param const &p2, syscall_param const &p3) noexcept { val_t retval; asm volatile ( "syscall\n\t" :"=a"(retval) :"a"(static_cast<unsigned long>(callnum)), "D"(p1.value), "S"(p2.value), "d"(p3.value) :"%rcx", "%r11", "memory" ); return retval; } template <typename... T> val_t syscall_expected(call_id callnum, T &&... args) noexcept { val_t result = do_syscall(callnum, forward<T>(args)...); return result; } inline val_t write(int fd, char const *data, unsigned long size) noexcept { return syscall_expected(1, fd, data, size); } inline void exit [[noreturn]](int status) noexcept { syscall_expected(231, status); __builtin_unreachable(); } int main(int argc, char *argv[]) { int i = 0; char msg[] = "Hello World 0!\n"; auto result = write(1, msg, sizeof(msg) - 1); i = 1; while (result >= 0 && i < 10) { msg[12] = i++ + '0'; result = write(1, msg, sizeof(msg) - 1); } exit(result >= 0 ? 0 : 1); }