diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 016b01b..c0012a9 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -10774,6 +10774,29 @@ _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
 					       __R);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+			  __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+						 (__v2df) __B,
+						 (__v2df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+			   const int __R)
+{
+  return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+						 (__v2df) __B,
+						 (__v2df)
+						 _mm_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
@@ -10783,6 +10806,29 @@ _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
 					      __R);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+			  __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 (__v4sf) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+			   const int __R)
+{
+  return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 (__v4sf)
+						 _mm_setzero_ps (),
+						 (__mmask8) __U, __R);
+}
+
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
@@ -10792,6 +10838,29 @@ _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
 					       __R);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+			  __m128d __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+						 (__v2df) __B,
+						 (__v2df) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+			   const int __R)
+{
+  return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+						 (__v2df) __B,
+						 (__v2df)
+						 _mm_setzero_pd (),
+						 (__mmask8) __U, __R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
@@ -10801,18 +10870,66 @@ _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
 					      __R);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+			  __m128 __B, const int __R)
+{
+  return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 (__v4sf) __W,
+						 (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+			   const int __R)
+{
+  return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+						 (__v4sf) __B,
+						 (__v4sf)
+						 _mm_setzero_ps (),
+						 (__mmask8) __U, __R);
+}
+
 #else
 #define _mm_max_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_addsd_round(A, B, C)
+    (__m128d)__builtin_ia32_maxsd_round(A, B, C)
+
+#define _mm_mask_max_round_sd(W, U, A, B, C) \
+    (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_max_round_sd(U, A, B, C)   \
+    (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
 
 #define _mm_max_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_addss_round(A, B, C)
+    (__m128)__builtin_ia32_maxss_round(A, B, C)
+
+#define _mm_mask_max_round_ss(W, U, A, B, C) \
+    (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_max_round_ss(U, A, B, C)   \
+    (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
 
 #define _mm_min_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_subsd_round(A, B, C)
+    (__m128d)__builtin_ia32_minsd_round(A, B, C)
+
+#define _mm_mask_min_round_sd(W, U, A, B, C) \
+    (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_min_round_sd(U, A, B, C)   \
+    (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
 
 #define _mm_min_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_subss_round(A, B, C)
+    (__m128)__builtin_ia32_minss_round(A, B, C)
+
+#define _mm_mask_min_round_ss(W, U, A, B, C) \
+    (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_min_round_ss(U, A, B, C)   \
+    (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
 #endif
 
 extern __inline __m512d
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index fce32ef..c94abd3 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2442,11 +2442,15 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 850970f..cf9f102 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1910,7 +1910,7 @@
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
+(define_insn "<sse>_vm<code><mode>3<mask_name><round_saeonly_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (smaxmin:VF_128
@@ -1921,7 +1921,7 @@
   "TARGET_SSE"
   "@
    <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
+   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "btver2_sse_attr" "maxmin")
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 6672039..8377555 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -245,11 +245,15 @@
 #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
+#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 8)
 #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
+#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 8)
 #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
+#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
+#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
index 20a0a3d..c1c8f8d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-1.c
@@ -1,13 +1,18 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
-volatile __m128d x1, x2;
+volatile __m128d x1, x2, x3;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_max_round_sd (x1, m, x2, x3,  _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_max_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c
new file mode 100644
index 0000000..29db777
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxsd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_max (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+      res2.a[i] = DEFAULT_VALUE;
+
+  res1.x = _mm_max_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res2.x = _mm_mask_max_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res3.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  if (check_union128d (res1, res_ref))
+    abort();
+  
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
index 0c692aa..ecd8757 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-1.c
@@ -1,13 +1,18 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
-volatile __m128 x1, x2;
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_max_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_max_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c
new file mode 100644
index 0000000..7676ae5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmaxss-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_max (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    {
+      r[i] = s1[i];
+    }
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+      res2.a[i] = DEFAULT_VALUE;
+
+  res1.x = _mm_max_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res2.x = _mm_mask_max_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res3.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_max (res_ref, src1.a, src2.a);
+
+  if (check_union128 (res1, res_ref))
+    abort();
+  
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
index 29c0948..22ada87 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-1.c
@@ -1,13 +1,18 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
-volatile __m128d x1, x2;
+volatile __m128d x1, x2, x3;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_min_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_min_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c
new file mode 100644
index 0000000..79a051f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminsd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+static void
+calc_min (double *r, double *s1, double *s2)
+{
+  r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
+  r[1] = s1[1];
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128d res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  double res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+      res2.a[i] = DEFAULT_VALUE;
+
+  res1.x = _mm_min_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res2.x = _mm_mask_min_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res3.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_min (res_ref, src1.a, src2.a);
+
+  if (check_union128d (res1, res_ref))
+    abort();
+  
+  MASK_MERGE (d) (res_ref, mask, 1);
+  if (check_union128d (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (d) (res_ref, mask, 1);
+  if (check_union128d (res3, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
index 62a4d38..5703a88 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-1.c
@@ -1,13 +1,18 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
-volatile __m128 x1, x2;
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
 
 void extern
 avx512f_test (void)
 {
   x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
+  x1 = _mm_mask_min_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
+  x1 = _mm_maskz_min_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c
new file mode 100644
index 0000000..ad1fcb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vminss-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#include "avx512f-helper.h"
+
+#define SIZE (128 / 32)
+#include "avx512f-mask-type.h"
+
+static void
+calc_min (float *r, float *s1, float *s2)
+{
+  r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
+  int i;
+  for (i = 1; i < SIZE; i++)
+    {
+      r[i] = s1[i];
+    }
+}
+
+void
+avx512f_test (void)
+{
+  int i, sign;
+  union128 res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  float res_ref[SIZE];
+
+  sign = -1;
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1.5 + 34.67 * i * sign;
+      src2.a[i] = -22.17 * i * sign + 1.0;
+      sign = sign * -1;
+    }
+  for (i = 0; i < SIZE; i++)
+      res2.a[i] = DEFAULT_VALUE;
+
+  res1.x = _mm_min_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res2.x = _mm_mask_min_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+  res3.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
+
+  calc_min (res_ref, src1.a, src2.a);
+
+  if (check_union128 (res1, res_ref))
+    abort();
+  
+  MASK_MERGE () (res_ref, mask, 1);
+  if (check_union128 (res2, res_ref))
+    abort ();
+
+  MASK_ZERO () (res_ref, mask, 1);
+  if (check_union128 (res3, res_ref))
+    abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 0541129..79879d7 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -262,11 +262,15 @@
 #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
+#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
+#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
+#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
+#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index ce341a2..547314a 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -391,9 +391,13 @@ test_3 (_mm512_maskz_insertf64x4, __m512d, __mmask8, __m512d, __m256d, 1)
 test_3 (_mm512_maskz_inserti32x4, __m512i, __mmask16, __m512i, __m128i, 1)
 test_3 (_mm512_maskz_inserti64x4, __m512i, __mmask8, __m512i, __m256i, 1)
 test_3 (_mm512_maskz_max_round_pd, __m512d, __mmask8, __m512d, __m512d, 8)
+test_3 (_mm_maskz_max_round_sd, __m128d, __mmask8, __m128d, __m128d, 8)
 test_3 (_mm512_maskz_max_round_ps, __m512, __mmask16, __m512, __m512, 8)
+test_3 (_mm_maskz_max_round_ss, __m128, __mmask8, __m128, __m128, 8)
 test_3 (_mm512_maskz_min_round_pd, __m512d, __mmask8, __m512d, __m512d, 8)
+test_3 (_mm_maskz_min_round_sd, __m128d, __mmask8, __m128d, __m128d, 8)
 test_3 (_mm512_maskz_min_round_ps, __m512, __mmask16, __m512, __m512, 8)
+test_3 (_mm_maskz_min_round_ss, __m128, __mmask8, __m128, __m128, 8)
 test_3 (_mm512_maskz_mul_round_pd, __m512d, __mmask8, __m512d, __m512d, 9)
 test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
 test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9)
@@ -483,9 +487,13 @@ test_4 (_mm512_mask_insertf64x4, __m512d, __m512d, __mmask8, __m512d, __m256d, 1
 test_4 (_mm512_mask_inserti32x4, __m512i, __m512i, __mmask16, __m512i, __m128i, 1)
 test_4 (_mm512_mask_inserti64x4, __m512i, __m512i, __mmask8, __m512i, __m256i, 1)
 test_4 (_mm512_mask_max_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 8)
+test_4 (_mm_mask_max_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 8)
 test_4 (_mm512_mask_max_round_ps, __m512, __m512, __mmask16, __m512, __m512, 8)
+test_4 (_mm_mask_max_round_ss, __m128, __m128, __mmask8, __m128, __m128, 8)
 test_4 (_mm512_mask_min_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 8)
+test_4 (_mm_mask_min_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 8)
 test_4 (_mm512_mask_min_round_ps, __m512, __m512, __mmask16, __m512, __m512, 8)
+test_4 (_mm_mask_min_round_ss, __m128, __m128, __mmask8, __m128, __m128, 8)
 test_4 (_mm512_mask_mul_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9)
 test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
 test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index e2b4f89..96c663b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -263,11 +263,15 @@
 #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
+#define __builtin_ia32_maxsd_mask_round(A, B, C, D, E) __builtin_ia32_maxsd_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
+#define __builtin_ia32_maxss_mask_round(A, B, C, D, E) __builtin_ia32_maxss_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
+#define __builtin_ia32_minsd_mask_round(A, B, C, D, E) __builtin_ia32_minsd_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
+#define __builtin_ia32_minss_mask_round(A, B, C, D, E) __builtin_ia32_minss_mask_round(A, B, C, D, 4)
 #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8)
 #define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/testround-1.c b/gcc/testsuite/gcc.target/i386/testround-1.c
index 829ce78..2c13381 100644
--- a/gcc/testsuite/gcc.target/i386/testround-1.c
+++ b/gcc/testsuite/gcc.target/i386/testround-1.c
@@ -187,12 +187,24 @@ test_round (void)
   m512 = _mm512_max_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_max_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_max_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_min_round_pd (m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 7); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_min_round_ps (m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_min_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_min_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
 
   m256i = _mm512_cvtt_roundpd_epi32 (m512d, 7); /* { dg-error "incorrect rounding operand" } */
   m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 7); /* { dg-error "incorrect rounding operand" } */
@@ -450,12 +462,24 @@ test_sae_only (void)
   m512 = _mm512_max_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_mask_max_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_maskz_max_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_max_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_mask_max_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_maskz_max_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_max_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_mask_max_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_maskz_max_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_min_round_pd (m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_mask_min_round_pd (m512d, mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */
   m512d = _mm512_maskz_min_round_pd (mmask8, m512d, m512d, 3); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_min_round_ps (m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_mask_min_round_ps (m512, mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
   m512 = _mm512_maskz_min_round_ps (mmask16, m512, m512, 3); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_min_round_sd (m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_mask_min_round_sd (m128d, mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128d = _mm_maskz_min_round_sd (mmask8, m128d, m128d, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_min_round_ss (m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_mask_min_round_ss (m128, mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
+  m128 = _mm_maskz_min_round_ss (mmask8, m128, m128, 7); /* { dg-error "incorrect rounding operand" } */
 
   m256i = _mm512_cvtt_roundpd_epi32 (m512d, 3); /* { dg-error "incorrect rounding operand" } */
   m256i = _mm512_mask_cvtt_roundpd_epi32 (m256i, mmask8, m512d, 3); /* { dg-error "incorrect rounding operand" } */
