Hi! The stack_protect_set_1_<mode> pattern intentionally clears the register it used as a temporary to read the canary from the register and push it back on the stack for security reasons, to make sure the stack canary isn't spilled somewhere else etc. On the following testcase, we end up with a weird: movq %fs:40, %rax movq %rax, 24(%rsp) xorl %eax, %eax movl $30, %eax sequence though, where the reporter rightfully complains it is a waste to clear the register and immediately set it to something else.
We really don't want to split this into two patterns, because then the scheduler and whatever other post-RA passes could stick some further code in between and increase the lifetime of the security sensitive data in the register. So, the following patch uses peephole2 to merge the stack_protect_set_1_<mode> insn with following setter of the same register. Only SImode and for TARGET_64BIT DImode moves are considered, as QI/HImode (unless actually emitted as SImode) moves don't overwrite the whole register, and for simplicity only the most common cases (no XMM/MM etc. sources). Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested (just check-gcc check-c++-all check-target-libstdc++-v3 with --target_board=unix/-fstack-protector-strong). The -fstack-protector-strong testing was done with additional logging whenever the peephole2's kick in. During the -m64 testing, it kicked in 17682 times, during -m32 testing 6344 times. Ok for trunk? 2019-12-10 Jakub Jelinek <ja...@redhat.com> PR target/92841 * config/i386/i386.md (*stack_protect_set_2_<mode>, *stack_protect_set_3): New define_insns and corresponding define_peephole2s. * gcc.target/i386/pr92841.c: New test. --- gcc/config/i386/i386.md.jj 2019-12-05 10:04:05.357235555 +0100 +++ gcc/config/i386/i386.md 2019-12-09 19:29:31.578885594 +0100 @@ -19732,6 +19732,98 @@ (define_insn "@stack_protect_set_1_<mode "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" [(set_attr "type" "multi")]) +;; Patterns and peephole2s to optimize stack_protect_set_1_<mode> +;; immediately followed by *mov{s,d}i_internal to the same register, +;; where we can avoid the xor{l} above. We don't split this, so that +;; scheduling or anything else doesn't separate the *stack_protect_set* +;; pattern from the set of the register that overwrites the register +;; with a new value. +(define_insn "*stack_protect_set_2_<mode>" + [(set (match_operand:PTR 0 "memory_operand" "=m") + (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")] + UNSPEC_SP_SET)) + (set (match_operand:SI 1 "register_operand" "=&r") + (match_operand:SI 2 "general_operand" "g")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && !reg_overlap_mentioned_p (operands[1], operands[2])" +{ + output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands); + output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands); + if (pic_32bit_operand (operands[2], SImode) + || ix86_use_lea_for_mov (insn, operands + 1)) + return "lea{l}\t{%E2, %1|%1, %E2}"; + else + return "mov{l}\t{%2, %1|%1, %2}"; +} + [(set_attr "type" "multi") + (set_attr "length" "24")]) + +(define_peephole2 + [(parallel [(set (match_operand:PTR 0 "memory_operand") + (unspec:PTR [(match_operand:PTR 1 "memory_operand")] + UNSPEC_SP_SET)) + (set (match_operand:PTR 2 "general_reg_operand") (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SI 3 "general_reg_operand") + (match_operand:SI 4 "general_operand"))] + "reload_completed + && REGNO (operands[2]) == REGNO (operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[4]) + && (general_reg_operand (operands[4], SImode) + || !register_operand (operands[4], SImode))" + [(parallel [(set (match_dup 0) + (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) + (set (match_dup 3) (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*stack_protect_set_3" + [(set (match_operand:DI 0 "memory_operand" "=m,m,m") + (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")] + UNSPEC_SP_SET)) + (set (match_operand:DI 1 "register_operand" "=&r,r,r") + (match_operand:DI 2 "general_operand" "Z,rem,i")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && reload_completed + && !reg_overlap_mentioned_p (operands[1], operands[2])" +{ + output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands); + output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands); + if (which_alternative == 0) + return "mov{l}\t{%k2, %k1|%k1, %k2}"; + else if (which_alternative == 2) + return "movabs{q}\t{%2, %1|%1, %2}"; + else if (pic_32bit_operand (operands[2], DImode) + || ix86_use_lea_for_mov (insn, operands + 1)) + return "lea{q}\t{%E2, %1|%1, %E2}"; + else + return "mov{q}\t{%2, %1|%1, %2}"; +} + [(set_attr "type" "multi") + (set_attr "length" "24")]) + +(define_peephole2 + [(parallel [(set (match_operand:DI 0 "memory_operand") + (unspec:DI [(match_operand:DI 1 "memory_operand")] + UNSPEC_SP_SET)) + (set (match_operand:DI 2 "general_reg_operand") (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 2) (match_operand:DI 3 "general_operand"))] + "TARGET_64BIT + && reload_completed + && !reg_overlap_mentioned_p (operands[2], operands[3]) + && (general_reg_operand (operands[3], DImode) + || memory_operand (operands[3], DImode) + || x86_64_zext_immediate_operand (operands[3], DImode) + || x86_64_immediate_operand (operands[3], DImode) + || (CONSTANT_P (operands[3]) + && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))" + [(parallel [(set (match_dup 0) + (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) + (set (match_dup 2) (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])]) + (define_expand "stack_protect_test" [(match_operand 0 "memory_operand") (match_operand 1 "memory_operand") --- gcc/testsuite/gcc.target/i386/pr92841.c.jj 2019-12-09 19:38:29.572759215 +0100 +++ gcc/testsuite/gcc.target/i386/pr92841.c 2019-12-09 19:40:59.642492417 +0100 @@ -0,0 +1,17 @@ +/* PR target/92841 */ +/* { dg-do compile { target fstack_protector } } */ +/* { dg-options "-O2 -fstack-protector-strong -masm=att" } */ +/* { dg-final { scan-assembler-not "xor\[lq]\t%(\[re]\[a-z0-9]*), %\\1\[\n\r]*\tmov\[lq]\t\[^\n\r]*, %\\1" } } */ + +const struct S { int b; } c[] = {30, 12, 20, 0, 11}; +void bar (int *); + +void +foo (void) +{ + int e[4]; + const struct S *a; + for (a = c; a < c + sizeof (c); a++) + if (a->b) + bar (e); +} Jakub