8 Regression] peephole2 uses uninitialized stack variables on i686

jakub at gcc dot gnu.org Thu, 11 May 2017 03:08:24 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80706


--- Comment #11 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
(In reply to Jakub Jelinek from comment #10)
> Perhaps if we had such a pattern that we'd split into a normal DFmode load
> (perhaps with unspec before reload to guarantee it is atomic load), we
> wouldn't need the temporary at all?

--- gcc/config/i386/predicates.md.jj    2017-01-01 12:45:42.000000000 +0100
+++ gcc/config/i386/predicates.md       2017-05-11 11:42:17.649136648 +0200
@@ -1657,3 +1657,14 @@ (define_predicate "register_or_constm1_o
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_int")
            (match_test "op == constm1_rtx"))))
+
+;; Return true if OP is a memory_operand, including volatile MEM.
+(define_predicate "volatile_memory_operand"
+  (match_code "mem,subreg")
+{
+  int save_volatile_ok = volatile_ok;
+  volatile_ok = 1;
+  bool ret = memory_operand (op, mode);
+  volatile_ok = save_volatile_ok;
+  return ret;
+})
--- gcc/config/i386/sync.md.jj  2017-05-11 10:16:03.000000000 +0200
+++ gcc/config/i386/sync.md     2017-05-11 11:42:45.777767179 +0200
@@ -210,6 +210,17 @@ (define_insn_and_split "atomic_loaddi_fp
   DONE;
 })

+(define_insn_and_split "*atomic_loaddf_fpu"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x,f")
+       (subreg:DF (unspec:DI [(match_operand:DI 1 "volatile_memory_operand"
+                                                  "m,m")]
+                             UNSPEC_LDA) 0))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (DFmode, operands[1]);")
+
 (define_peephole2
   [(set (match_operand:DF 0 "fp_register_operand")
        (unspec:DF [(match_operand:DI 1 "memory_operand")]

does that, unfortunately combine still fails, because the insn it wants to
match afterwards is:
(set (reg:DF 91)
    (plus:DF (reg:DF 92)
        (const_double:DF 1.0e+0 [0x0.8p+1])))
But the above patch at least helps a little bit on following testcase:
typedef union
{
  unsigned long long ll;
  double d;
} u_t;

u_t d = { .d = 5.0 };

void foo_d (double x)
{
  u_t tmp;

  tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
  tmp.d += x;
  __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
}
Before the #c7 patch, we get:
        fldl    d
        faddl   24(%esp)
        fstpl   d
        lock; orl       $0, (%esp)
with just the #c7 patch we get:
        fldl    d
        fstl    (%esp)
        faddl   24(%esp)
        fstl    d
        fstpl   (%esp)
        lock; orl       $0, (%esp)
so 2 useless stores.  With #c7 and this patch we get:
        fldl    d
        faddl   24(%esp)
        fstl    d
        fstpl   (%esp)
        lock; orl       $0, (%esp)
i.e. one useless store.  So, either we need combine or some other pre-reload
pass to figure out we have all uses of the atomic_loaddi_fpu pattern as
(subreg:DF (reg:DI ...)) and optimize that into the atomic_loaddf_fpu pattern
with uses changed into just the DFmode pseudo.  Allowing =f in
atomic_loaddi_fpu won't work, as DImode is not VALID_FP_MODE_P.

[Bug target/80706] [7/8 Regression] peephole2 uses uninitialized stack variables on i686

Reply via email to