Hi!

The stack_protect_set_1_<mode> pattern intentionally clears the register it
used as a temporary to read the canary from the register and push it back
on the stack for security reasons, to make sure the stack canary isn't
spilled somewhere else etc.  On the following testcase, we end up with a
weird:
        movq    %fs:40, %rax
        movq    %rax, 24(%rsp)
        xorl    %eax, %eax
        movl    $30, %eax
sequence though, where the reporter rightfully complains it is a waste
to clear the register and immediately set it to something else.

We really don't want to split this into two patterns, because then the
scheduler and whatever other post-RA passes could stick some further
code in between and increase the lifetime of the security sensitive
data in the register.

So, the following patch uses peephole2 to merge the
stack_protect_set_1_<mode> insn with following setter of the same register.
Only SImode and for TARGET_64BIT DImode moves are considered, as QI/HImode
(unless actually emitted as SImode) moves don't overwrite the whole
register, and for simplicity only the most common cases (no XMM/MM etc.
sources).

Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested
(just check-gcc check-c++-all check-target-libstdc++-v3 with
--target_board=unix/-fstack-protector-strong).  The -fstack-protector-strong
testing was done with additional logging whenever the peephole2's kick in.
During the -m64 testing, it kicked in 17682 times, during -m32 testing
6344 times.  Ok for trunk?

2019-12-10  Jakub Jelinek  <ja...@redhat.com>

        PR target/92841
        * config/i386/i386.md (*stack_protect_set_2_<mode>,
        *stack_protect_set_3): New define_insns and corresponding
        define_peephole2s.

        * gcc.target/i386/pr92841.c: New test.

--- gcc/config/i386/i386.md.jj  2019-12-05 10:04:05.357235555 +0100
+++ gcc/config/i386/i386.md     2019-12-09 19:29:31.578885594 +0100
@@ -19732,6 +19732,98 @@ (define_insn "@stack_protect_set_1_<mode
   "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, 
%2}\;xor{l}\t%k2, %k2"
   [(set_attr "type" "multi")])
 
+;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
+;; immediately followed by *mov{s,d}i_internal to the same register,
+;; where we can avoid the xor{l} above.  We don't split this, so that
+;; scheduling or anything else doesn't separate the *stack_protect_set*
+;; pattern from the set of the register that overwrites the register
+;; with a new value.
+(define_insn "*stack_protect_set_2_<mode>"
+  [(set (match_operand:PTR 0 "memory_operand" "=m")
+       (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
+                   UNSPEC_SP_SET))
+   (set (match_operand:SI 1 "register_operand" "=&r")
+       (match_operand:SI 2 "general_operand" "g"))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && !reg_overlap_mentioned_p (operands[1], operands[2])"
+{
+  output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
+  output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
+  if (pic_32bit_operand (operands[2], SImode)
+      || ix86_use_lea_for_mov (insn, operands + 1))
+    return "lea{l}\t{%E2, %1|%1, %E2}";
+  else
+    return "mov{l}\t{%2, %1|%1, %2}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+(define_peephole2
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+                 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+                             UNSPEC_SP_SET))
+            (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
+            (clobber (reg:CC FLAGS_REG))])
+  (set (match_operand:SI 3 "general_reg_operand")
+       (match_operand:SI 4 "general_operand"))]
+ "reload_completed
+  && REGNO (operands[2]) == REGNO (operands[3])
+  && !reg_overlap_mentioned_p (operands[3], operands[4])
+  && (general_reg_operand (operands[4], SImode)
+      || !register_operand (operands[4], SImode))"
+ [(parallel [(set (match_dup 0)
+                 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+            (set (match_dup 3) (match_dup 4))
+            (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*stack_protect_set_3"
+  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
+       (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
+                  UNSPEC_SP_SET))
+   (set (match_operand:DI 1 "register_operand" "=&r,r,r")
+       (match_operand:DI 2 "general_operand" "Z,rem,i"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && reload_completed
+   && !reg_overlap_mentioned_p (operands[1], operands[2])"
+{
+  output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
+  output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
+  if (which_alternative == 0)
+    return "mov{l}\t{%k2, %k1|%k1, %k2}";
+  else if (which_alternative == 2)
+    return "movabs{q}\t{%2, %1|%1, %2}";
+  else if (pic_32bit_operand (operands[2], DImode)
+          || ix86_use_lea_for_mov (insn, operands + 1))
+    return "lea{q}\t{%E2, %1|%1, %E2}";
+  else
+    return "mov{q}\t{%2, %1|%1, %2}";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+(define_peephole2
+ [(parallel [(set (match_operand:DI 0 "memory_operand")
+                 (unspec:DI [(match_operand:DI 1 "memory_operand")]
+                            UNSPEC_SP_SET))
+            (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
+            (clobber (reg:CC FLAGS_REG))])
+  (set (match_dup 2) (match_operand:DI 3 "general_operand"))]
+ "TARGET_64BIT
+  && reload_completed
+  && !reg_overlap_mentioned_p (operands[2], operands[3])
+  && (general_reg_operand (operands[3], DImode)
+      || memory_operand (operands[3], DImode)
+      || x86_64_zext_immediate_operand (operands[3], DImode)
+      || x86_64_immediate_operand (operands[3], DImode)
+      || (CONSTANT_P (operands[3])
+         && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))"
+ [(parallel [(set (match_dup 0)
+                 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+            (set (match_dup 2) (match_dup 3))
+            (clobber (reg:CC FLAGS_REG))])])
+
 (define_expand "stack_protect_test"
   [(match_operand 0 "memory_operand")
    (match_operand 1 "memory_operand")
--- gcc/testsuite/gcc.target/i386/pr92841.c.jj  2019-12-09 19:38:29.572759215 
+0100
+++ gcc/testsuite/gcc.target/i386/pr92841.c     2019-12-09 19:40:59.642492417 
+0100
@@ -0,0 +1,17 @@
+/* PR target/92841 */
+/* { dg-do compile { target fstack_protector } } */
+/* { dg-options "-O2 -fstack-protector-strong -masm=att" } */
+/* { dg-final { scan-assembler-not "xor\[lq]\t%(\[re]\[a-z0-9]*), 
%\\1\[\n\r]*\tmov\[lq]\t\[^\n\r]*, %\\1" } } */
+
+const struct S { int b; } c[] = {30, 12, 20, 0, 11};
+void bar (int *);
+
+void
+foo (void)
+{
+  int e[4];
+  const struct S *a;
+  for (a = c; a < c + sizeof (c); a++)
+    if (a->b)
+      bar (e);
+}

        Jakub

Reply via email to