gcc/ChangeLog:

        * config/i386/i386.md (x86_64_shld_nf): New define_insn.
        (x86_64_shld_ndd_nf): Ditto.
        (x86_64_shld_1_nf): Ditto.
        (x86_64_shld_ndd_1_nf): Ditto.
        (*x86_64_shld_shrd_1_nozext_nf): Ditto.
        (x86_shld_nf): Ditto.
        (x86_shld_ndd_nf): Ditto.
        (x86_shld_1_nf): Ditto.
        (x86_shld_ndd_1_nf): Ditto.
        (*x86_shld_shrd_1_nozext_nf): Ditto.
        (<insn><dwi>3_doubleword_lowpart_nf): Ditto.
        (x86_64_shrd_nf): Ditto.
        (x86_64_shrd_ndd_nf): Ditto.
        (x86_64_shrd_1_nf): Ditto.
        (x86_64_shrd_ndd_1_nf): Ditto.
        (*x86_64_shrd_shld_1_nozext_nf): Ditto.
        (x86_shrd_nf): Ditto.
        (x86_shrd_ndd_nf): Ditto.
        (x86_shrd_1_nf): Ditto.
        (x86_shrd_ndd_1_nf): Ditto.
        (*x86_shrd_shld_1_nozext_nf): Ditto.
---
 gcc/config/i386/i386.md | 377 +++++++++++++++++++++++++++++++---------
 1 file changed, 296 insertions(+), 81 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
731eb12d13a..4d684e8d919 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14552,7 +14552,7 @@
   DONE;
 })
 
-(define_insn "x86_64_shld"
+(define_insn "x86_64_shld<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
                  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ 
-14562,10 +14562,9 @@
                    (zero_extend:TI
                      (match_operand:DI 1 "register_operand" "r"))
                    (minus:QI (const_int 64)
-                             (and:QI (match_dup 2) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+                             (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_64BIT && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -14573,7 +14572,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd"
+(define_insn "x86_64_shld_ndd<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
                  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ 
-14583,14 +14582,13 @@
                    (zero_extend:TI
                      (match_operand:DI 2 "register_operand" "r"))
                    (minus:QI (const_int 64)
-                             (and:QI (match_dup 3) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+                             (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
-(define_insn "x86_64_shld_1"
+(define_insn "x86_64_shld_1<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
                           (match_operand:QI 2 "const_0_to_63_operand")) @@ 
-14598,11 +14596,11 @@
                  (lshiftrt:TI
                    (zero_extend:TI
                      (match_operand:DI 1 "register_operand" "r"))
-                   (match_operand:QI 3 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
   "TARGET_64BIT
-   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -14611,7 +14609,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd_1"
+(define_insn "x86_64_shld_ndd_1<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
                           (match_operand:QI 3 "const_0_to_63_operand")) @@ 
-14619,15 +14617,66 @@
                  (lshiftrt:TI
                    (zero_extend:TI
                      (match_operand:DI 2 "register_operand" "r"))
-                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
   "TARGET_APX_NDD
-   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "1")])
 
+(define_insn_and_split "*x86_64_shld_shrd_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+       (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
+                            (match_operand:QI 2 "const_0_to_63_operand"))
+               (lshiftrt:DI
+                 (match_operand:DI 1 "nonimmediate_operand")
+                 (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[1], operands[2], 
operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[4], operands[3], 
operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (DImode);
+     if (MEM_P (operands[4]))
+       {
+        operands[1] = force_reg (DImode, operands[1]);
+        emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+                                          operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+                                        operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[4], operands[1],
+                                        operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shld_1_nf (tmp, operands[1], operands[2], 
operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14730,7 +14779,7 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
-(define_insn "x86_shld"
+(define_insn "x86_shld<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
                  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ 
-14740,10 +14789,9 @@
                    (zero_extend:DI
                      (match_operand:SI 1 "register_operand" "r"))
                    (minus:QI (const_int 32)
-                             (and:QI (match_dup 2) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+                             (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "<nf_condition>"
+  "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
@@ -14752,7 +14800,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shld_ndd"
+(define_insn "x86_shld_ndd<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
         (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
                  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ 
-14762,15 +14810,14 @@
                    (zero_extend:DI
                      (match_operand:SI 2 "register_operand" "r"))
                    (minus:QI (const_int 32)
-                             (and:QI (match_dup 3) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+                             (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
 
-(define_insn "x86_shld_1"
+(define_insn "x86_shld_1<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
                           (match_operand:QI 2 "const_0_to_31_operand")) @@ 
-14778,10 +14825,10 @@
                  (lshiftrt:DI
                    (zero_extend:DI
                      (match_operand:SI 1 "register_operand" "r"))
-                   (match_operand:QI 3 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
-  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+                   (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+  && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -14791,7 +14838,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shld_ndd_1"
+(define_insn "x86_shld_ndd_1<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
                           (match_operand:QI 3 "const_0_to_31_operand")) @@ 
-14799,15 +14846,66 @@
                  (lshiftrt:DI
                    (zero_extend:DI
                      (match_operand:SI 2 "register_operand" "r"))
-                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
   "TARGET_APX_NDD 
-   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
-  "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 32 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shld_shrd_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+       (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
+                            (match_operand:QI 2 "const_0_to_31_operand"))
+              (lshiftrt:SI
+                  (match_operand:SI 1 "nonimmediate_operand")
+                  (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[1], operands[2], 
operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[4], operands[3], 
operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+     rtx tmp = gen_reg_rtx (SImode);
+     if (MEM_P (operands[4]))
+       {
+        operands[1] = force_reg (SImode, operands[1]);
+        emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+                                       operands[2], operands[3]));
+       }
+     else if (MEM_P (operands[1]))
+       emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[1], operands[4],
+                                     operands[3], operands[2]));
+     else
+       emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[4], operands[1],
+                                     operands[2], operands[3]));
+     emit_move_insn (operands[0], tmp);
+    }
+ else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shld_1_nf (tmp, operands[1], operands[2], 
operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shld_shrd_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15846,6 +15944,26 @@
 })
 
 ;; Split truncations of double word right shifts into x86_shrd_1.
+(define_insn_and_split "<insn><dwi>3_doubleword_lowpart_nf"
+  [(set (match_operand:DWIH 0 "register_operand" "=&r")
+       (subreg:DWIH
+         (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
+                            (match_operand:QI 2 "const_int_operand")) 0))]
+  "TARGET_APX_NF && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
+                 (subreg:DWIH
+                   (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+                                 (match_dup 4)) 0)))]
+{
+  split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], 
+&operands[3]);
+  operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL 
+(operands[2]));
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
   [(set (match_operand:DWIH 0 "register_operand" "=&r")
        (subreg:DWIH
@@ -15869,7 +15987,7 @@
     emit_move_insn (operands[0], operands[1]);
 })
 
-(define_insn "x86_64_shrd"
+(define_insn "x86_64_shrd<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
                  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") @@ 
-15879,10 +15997,9 @@
                    (zero_extend:TI
                      (match_operand:DI 1 "register_operand" "r"))
                    (minus:QI (const_int 64)
-                             (and:QI (match_dup 2) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+                             (and:QI (match_dup 2) (const_int 63)))) 0)))]
+  "TARGET_64BIT && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
@@ -15890,7 +16007,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shrd_ndd"
+(define_insn "x86_64_shrd_ndd<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
                  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") @@ 
-15900,15 +16017,13 @@
                    (zero_extend:TI
                      (match_operand:DI 2 "register_operand" "r"))
                    (minus:QI (const_int 64)
-                             (and:QI (match_dup 3) (const_int 63)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+                             (and:QI (match_dup 3) (const_int 63)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
-
-(define_insn "x86_64_shrd_1"
+(define_insn "x86_64_shrd_1<nf_name>"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
                             (match_operand:QI 2 "const_0_to_63_operand")) @@ 
-15916,11 +16031,11 @@
                  (ashift:TI
                    (zero_extend:TI
                      (match_operand:DI 1 "register_operand" "r"))
-                   (match_operand:QI 3 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
   "TARGET_64BIT
-   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
-  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -15929,7 +16044,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shrd_ndd_1"
+(define_insn "x86_64_shrd_ndd_1<nf_name>"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
                             (match_operand:QI 3 "const_0_to_63_operand")) @@ 
-15937,15 +16052,66 @@
                  (ashift:TI
                    (zero_extend:TI
                      (match_operand:DI 2 "register_operand" "r"))
-                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
   "TARGET_APX_NDD
-   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
-  "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+   && <nf_condition>"
+  "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI")])
 
+(define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+       (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
+                            (match_operand:QI 2 "const_0_to_63_operand"))
+               (ashift:DI
+                 (match_operand:DI 1 "nonimmediate_operand")
+                 (match_operand:QI 3 "const_0_to_63_operand"))))]
+  "TARGET_64BIT && TARGET_APX_NF
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_x86_64_shrd_1_nf (operands[0], operands[1], operands[2], 
operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_x86_64_shld_1_nf (operands[0], operands[4], operands[3], 
operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      if (MEM_P (operands[4]))
+        {
+         operands[1] = force_reg (DImode, operands[1]);
+         emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+                                           operands[2], operands[3]));
+        }
+       else if (MEM_P (operands[1]))
+         emit_insn (gen_x86_64_shld_ndd_1_nf (tmp, operands[1], operands[4],
+                                          operands[3], operands[2]));
+       else
+         emit_insn (gen_x86_64_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+                                          operands[2], operands[3]));
+       emit_move_insn (operands[0], tmp);
+    }
+  else
+   {
+     operands[1] = force_reg (DImode, operands[1]);
+     rtx tmp = gen_reg_rtx (DImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_64_shrd_1_nf (tmp, operands[1], operands[2], 
operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
   [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16048,7 +16214,7 @@
   emit_move_insn (operands[4], operands[0]);
 })
 
-(define_insn "x86_shrd"
+(define_insn "x86_shrd<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
                  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") @@ 
-16058,10 +16224,9 @@
                    (zero_extend:DI
                      (match_operand:SI 1 "register_operand" "r"))
                    (minus:QI (const_int 32)
-                             (and:QI (match_dup 2) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+                             (and:QI (match_dup 2) (const_int 31)))) 0)))]
+  "<nf_condition>"
+  "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
@@ -16070,7 +16235,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shrd_ndd"
+(define_insn "x86_shrd_ndd<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
                  (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") @@ 
-16080,14 +16245,13 @@
                    (zero_extend:DI
                      (match_operand:SI 2 "register_operand" "r"))
                    (minus:QI (const_int 32)
-                             (and:QI (match_dup 3) (const_int 31)))) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+                             (and:QI (match_dup 3) (const_int 31)))) 0)))]
+  "TARGET_APX_NDD && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
-(define_insn "x86_shrd_1"
+(define_insn "x86_shrd_1<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
                             (match_operand:QI 2 "const_0_to_31_operand")) @@ 
-16095,10 +16259,10 @@
                  (ashift:DI
                    (zero_extend:DI
                      (match_operand:SI 1 "register_operand" "r"))
-                   (match_operand:QI 3 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
-  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+                   (match_operand:QI 3 "const_0_to_63_operand")) 0)))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "length_immediate" "1")
@@ -16108,7 +16272,7 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_shrd_ndd_1"
+(define_insn "x86_shrd_ndd_1<nf_name>"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
                             (match_operand:QI 3 "const_0_to_31_operand")) @@ 
-16116,15 +16280,66 @@
                  (ashift:DI
                    (zero_extend:DI
                      (match_operand:SI 2 "register_operand" "r"))
-                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+                   (match_operand:QI 4 "const_0_to_63_operand")) 0)))]
   "TARGET_APX_NDD
-   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
-  "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))
+   && <nf_condition>"
+  "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*x86_shrd_shld_1_nozext_nf"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+       (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
+                            (match_operand:QI 2 "const_0_to_31_operand"))
+              (ashift:SI
+                  (match_operand:SI 1 "nonimmediate_operand")
+                  (match_operand:QI 3 "const_0_to_31_operand"))))]
+  "TARGET_APX_NF &&
+  INTVAL (operands[3]) == 32 - INTVAL (operands[2])
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (rtx_equal_p (operands[4], operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_x86_shrd_1_nf (operands[0], operands[1], operands[2], 
operands[3]));
+    }
+  else if (rtx_equal_p (operands[1], operands[0]))
+    {
+      operands[4] = force_reg (SImode, operands[4]);
+      emit_insn (gen_x86_shld_1_nf (operands[0], operands[4], operands[3], 
operands[2]));
+    }
+  else if (TARGET_APX_NDD)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      if (MEM_P (operands[4]))
+        {
+         operands[1] = force_reg (SImode, operands[1]);
+         emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+                                        operands[2], operands[3]));
+        }
+      else if (MEM_P (operands[1]))
+        emit_insn (gen_x86_shld_ndd_1_nf (tmp, operands[1], operands[4],
+                                      operands[3], operands[2]));
+      else
+        emit_insn (gen_x86_shrd_ndd_1_nf (tmp, operands[4], operands[1],
+                                      operands[2], operands[3]));
+      emit_move_insn (operands[0], tmp);
+     }
+   else
+   {
+     operands[1] = force_reg (SImode, operands[1]);
+     rtx tmp = gen_reg_rtx (SImode);
+     emit_move_insn (tmp, operands[4]);
+     emit_insn (gen_x86_shrd_1_nf (tmp, operands[1], operands[2], 
operands[3]));
+     emit_move_insn (operands[0], tmp);
+   }
+   DONE;
+})
 
 (define_insn_and_split "*x86_shrd_shld_1_nozext"
   [(set (match_operand:SI 0 "nonimmediate_operand")
--
2.31.1

Reply via email to