https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63259
Bug ID: 63259 Summary: Detecting byteswap sequence Product: gcc Version: 4.9.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization Assignee: unassigned at gcc dot gnu.org Reporter: bisqwit at iki dot fi This is just silly. GCC optimizes the first function into single opcode (bswap), but not the other. For Clang, it's the other way around. unsigned byteswap_gcc(unsigned result) { result = ((result & 0xFFFF0000u) >>16) | ((result & 0x0000FFFFu) <<16); result = ((result & 0xFF00FF00u) >> 8) | ((result & 0x00FF00FFu) << 8); return result; } unsigned byteswap_clang(unsigned result) { result = ((result & 0xFF00FF00u) >> 8) | ((result & 0x00FF00FFu) << 8); result = ((result & 0xFFFF0000u) >>16) | ((result & 0x0000FFFFu) <<16); return result; } unsigned byteswap(unsigned v) { #ifdef __clang__ return byteswap_clang(v); #else return byteswap_gcc(v); #endif } GCC output: byteswap_gcc: movl %edi, %eax bswap %eax ret byteswap_clang: movl %edi, %eax andl $-16711936, %eax shrl $8, %eax movl %eax, %edx movl %edi, %eax andl $16711935, %eax sall $8, %eax orl %edx, %eax roll $16, %eax ret byteswap: movl %edi, %eax bswap %eax ret Clang output: byteswap_gcc: # @byteswap_gcc roll $16, %edi movl %edi, %eax shrl $8, %eax andl $16711935, %eax # imm = 0xFF00FF shll $8, %edi andl $-16711936, %edi # imm = 0xFFFFFFFFFF00FF00 orl %eax, %edi movl %edi, %eax retq byteswap_clang: # @byteswap_clang bswapl %edi movl %edi, %eax retq byteswap: # @byteswap bswapl %edi movl %edi, %eax retq Tested both -m32 and -m64, with options: -Ofast -S Tested versions: - gcc (Debian 4.9.1-11) 4.9.1 Target: x86_64-linux-gnu - Debian clang version 3.5.0-+rc1-2 (tags/RELEASE_35/rc1) (based on LLVM 3.5.0) Target: x86_64-pc-linux-gnu