Hi!

The following testcase ICEs, because the expander is called with
a subreg as operands[2], and gen_lowpart on it creates another subreg
from the same pseudo; the instructions rely on match_dup working:
(define_insn "*<avx512>_vpermi2var<mode>3_mask"
  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
        (vec_merge:VF_AVX512VL
          (unspec:VF_AVX512VL
            [(match_operand:<sseintvecmode> 2 "register_operand" "0")
             (match_operand:VF_AVX512VL 1 "register_operand" "v")
             (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
            UNSPEC_VPERMT2)
          (subreg:VF_AVX512VL (match_dup 2) 0)
          (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
and this only works if operands[2] is initially a REG.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2018-02-12  Jakub Jelinek  <ja...@redhat.com>

        PR target/84336
        * config/i386/sse.md (<avx512>_vpermi2var<mode>3_mask): Force
        operands[2] into a REG before using gen_lowpart on it.

        * gcc.target/i386/pr84336.c: New test.

--- gcc/config/i386/sse.md.jj   2018-02-06 13:13:03.911758746 +0100
+++ gcc/config/i386/sse.md      2018-02-12 18:55:27.257386614 +0100
@@ -18183,7 +18183,10 @@ (define_expand "<avx512>_vpermi2var<mode
          (match_dup 5)
          (match_operand:<avx512fmaskmode> 4 "register_operand")))]
   "TARGET_AVX512F"
-  "operands[5] = gen_lowpart (<MODE>mode, operands[2]);")
+{
+  operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
+  operands[5] = gen_lowpart (<MODE>mode, operands[2]);
+})
 
 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
   [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
--- gcc/testsuite/gcc.target/i386/pr84336.c.jj  2018-02-12 19:10:15.861401288 
+0100
+++ gcc/testsuite/gcc.target/i386/pr84336.c     2018-02-12 19:09:17.911405540 
+0100
@@ -0,0 +1,13 @@
+/* PR target/84336 */
+/* { dg-do compile } */
+/* { dg-options "-O0 -ftree-ter -mavx512f" } */
+
+#include <x86intrin.h>
+
+struct S { __m512i h; } b;
+
+__m512
+foo (__m512 a, __mmask16 c, __m512 d)
+{
+  return _mm512_mask2_permutex2var_ps (a, b.h, c, d);
+}

        Jakub

Reply via email to