commit e00159729e4070bf8e019ee0714ea8d4ed498cc6
Author: Petr Murzin <petr.murzin@intel.com>
Date:   Fri Jul 31 13:42:51 2015 +0300

    [AVX512F] Add scatter support for vectorizer

diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index ee31ee3..b892f08 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1021,6 +1021,10 @@ DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V8DI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V16SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, HI, V8DI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V16SI, V8DI, INT)
 
 DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 128c5af..1e01c9f8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30386,6 +30386,10 @@ enum ix86_builtins
   IX86_BUILTIN_GATHER3SIV16SI,
   IX86_BUILTIN_GATHER3SIV8DF,
   IX86_BUILTIN_GATHER3SIV8DI,
+  IX86_BUILTIN_SCATTERALTSIV8DF,
+  IX86_BUILTIN_SCATTERALTDIV16SF,
+  IX86_BUILTIN_SCATTERALTSIV8DI,
+  IX86_BUILTIN_SCATTERALTDIV16SI,
   IX86_BUILTIN_SCATTERDIV16SF,
   IX86_BUILTIN_SCATTERDIV16SI,
   IX86_BUILTIN_SCATTERDIV8DF,
@@ -34202,6 +34206,21 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
 	       VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
 	       IX86_BUILTIN_SCATTERDIV2DI);
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
+	       VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
+	       IX86_BUILTIN_SCATTERALTSIV8DF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
+	       VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
+	       IX86_BUILTIN_SCATTERALTDIV16SF);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
+	       VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
+	       IX86_BUILTIN_SCATTERALTSIV8DI);
+
+  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
+	       VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
+	       IX86_BUILTIN_SCATTERALTDIV16SI);
 
   /* AVX512PF */
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
@@ -39851,6 +39870,18 @@ addcarryx:
     case IX86_BUILTIN_GATHERPFDPD:
       icode = CODE_FOR_avx512pf_gatherpfv8sidf;
       goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERALTSIV8DF:
+      icode = CODE_FOR_avx512f_scattersiv8df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV16SF:
+      icode = CODE_FOR_avx512f_scatterdiv16sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTSIV8DI:
+      icode = CODE_FOR_avx512f_scattersiv8di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERALTDIV16SI:
+      icode = CODE_FOR_avx512f_scatterdiv16si;
+      goto scatter_gen;
     case IX86_BUILTIN_GATHERPFDPS:
       icode = CODE_FOR_avx512pf_gatherpfv16sisf;
       goto vec_prefetch_gen;
@@ -40114,6 +40145,36 @@ addcarryx:
       mode3 = insn_data[icode].operand[3].mode;
       mode4 = insn_data[icode].operand[4].mode;
 
+      /* Scatter instruction stores operand op3 to memory with
+	 indices from op2 and scale from op4 under writemask op1.
+	 If index operand op2 has more elements then source operand
+	 op3 one need to use only its low half. And vice versa.  */
+      switch (fcode)
+	{
+	case IX86_BUILTIN_SCATTERALTSIV8DF:
+	case IX86_BUILTIN_SCATTERALTSIV8DI:
+	  half = gen_reg_rtx (V8SImode);
+	  if (!nonimmediate_operand (op2, V16SImode))
+	    op2 = copy_to_mode_reg (V16SImode, op2);
+	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
+	  op2 = half;
+	  break;
+	case IX86_BUILTIN_SCATTERALTDIV16SF:
+	case IX86_BUILTIN_SCATTERALTDIV16SI:
+	  half = gen_reg_rtx (mode3);
+	  if (mode3 == V8SFmode)
+	    gen = gen_vec_extract_lo_v16sf;
+	  else
+	    gen = gen_vec_extract_lo_v16si;
+	  if (!nonimmediate_operand (op3, GET_MODE (op3)))
+	    op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+	  emit_insn (gen (half, op3));
+	  op3 = half;
+	  break;
+	default:
+	  break;
+	}
+
       /* Force memory operand only with base register here.  But we
 	 don't want to do it on memory operand for other builtin
 	 functions.  */
@@ -41193,6 +41254,62 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
   return ix86_get_builtin (code);
 }
 
+/* Returns a decl of a function that implements scatter store with
+   register type VECTYPE and index type INDEX_TYPE and SCALE.
+   Return NULL_TREE if it is not available.  */
+
+static tree
+ix86_vectorize_builtin_scatter (const_tree vectype,
+				const_tree index_type, int scale)
+{
+  bool si;
+  enum ix86_builtins code;
+
+  if (! TARGET_AVX512F)
+    return NULL_TREE;
+
+  if ((TREE_CODE (index_type) != INTEGER_TYPE
+       && !POINTER_TYPE_P (index_type))
+      || (TYPE_MODE (index_type) != SImode
+	  && TYPE_MODE (index_type) != DImode))
+    return NULL_TREE;
+
+  if (TYPE_PRECISION (index_type) > POINTER_SIZE)
+    return NULL_TREE;
+
+  /* v*scatter* insn sign extends index to pointer mode.  */
+  if (TYPE_PRECISION (index_type) < POINTER_SIZE
+      && TYPE_UNSIGNED (index_type))
+    return NULL_TREE;
+
+  /* Scale can be 1, 2, 4 or 8.  */
+  if (scale <= 0
+      || scale > 8
+      || (scale & (scale - 1)) != 0)
+    return NULL_TREE;
+
+  si = TYPE_MODE (index_type) == SImode;
+  switch (TYPE_MODE (vectype))
+    {
+    case V8DFmode:
+      code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
+      break;
+    case V8DImode:
+      code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
+      break;
+    case V16SFmode:
+      code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
+      break;
+    case V16SImode:
+      code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
+      break;
+    default:
+      return NULL_TREE;
+    }
+
+  return ix86_builtins[code];
+}
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */
 
@@ -52324,6 +52441,9 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 #undef TARGET_VECTORIZE_BUILTIN_GATHER
 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
 
+#undef TARGET_VECTORIZE_BUILTIN_SCATTER
+#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
+
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index f95646c..02dab1a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5739,6 +5739,13 @@ in vectorized loops in current function, or non-negative number if it is
 usable.  In that case, the smaller the number is, the more desirable it is
 to use it.
 @end deftypefn
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_SCATTER (const_tree @var{vectype}, const_tree @var{index_type}, int @var{scale})
+Target builtin that implements vector scatter operation.  @var{vectype}
+is the vector type of the store and @var{index_type} is scalar type of
+the index, scaled by @var{scale}.
+The default is @code{NULL_TREE} which means to not vectorize scatter
+stores.
+@end deftypefn
 
 @node Anchored Addresses
 @section Anchored Addresses
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2383fb9..e2655a8 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4244,6 +4244,13 @@ address;  but often a machine-dependent strategy can generate better code.
 @hook TARGET_SIMD_CLONE_ADJUST
 
 @hook TARGET_SIMD_CLONE_USABLE
+@hook TARGET_VECTORIZE_BUILTIN_SCATTER
+Target builtin that implements vector scatter operation.  @var{vectype}
+is the vector type of the store and @var{index_type} is scalar type of
+the index, scaled by @var{scale}.
+The default is @code{NULL_TREE} which means to not vectorize scatter
+stores.
+@end deftypefn
 
 @node Anchored Addresses
 @section Anchored Addresses
diff --git a/gcc/target.def b/gcc/target.def
index 4edc209..7eef7c1 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1801,6 +1801,14 @@ loads.",
  (const_tree mem_vectype, const_tree index_type, int scale),
  NULL)
 
+/* Target builtin that implements vector scatter operation.  */
+DEFHOOK
+(builtin_scatter,
+ "",
+ tree,
+ (const_tree vectype, const_tree index_type, int scale),
+ NULL)
+
 /* Target function to initialize the cost model for a loop or block.  */
 DEFHOOK
 (init_cost,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-scatter-1.c b/gcc/testsuite/gcc.target/i386/avx512f-scatter-1.c
new file mode 100644
index 0000000..7631849
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-scatter-1.c
@@ -0,0 +1,216 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f -DAVX512F" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+float vf1[N], vf2[2*N+16];
+double vd1[N], vd2[2*N+16];
+int vi1[N], vi2[2*N+16], k[N];
+long vl1[N], vl2[2*N+16], l[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[k[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vi2[k[i]] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[k[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vi2[k[i] + x] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[k[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vl2[k[i]] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vl2[k[i] + x] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[l[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vi2[l[i]] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[l[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vi2[l[i] + x] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[l[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vl2[l[i]] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[l[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vl2[l[i] + x] = vl1[i];
+}
+
+static void
+avx512f_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      vf1[i] = 17.0f + i;
+      vd1[i] = 19.0 + i;
+      vi1[i] = 21 + i;
+      vl1[i] = 23L + i;
+    }
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      k[i] = (i % 2) ? (N / 2 + i) : (N / 2 - i / 2);
+      l[i] = 2 * i + i % 2;
+    }
+
+  f1 ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (vf2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17
+	|| vi2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 21)
+      abort ();
+
+  f3 (12);
+  f4 (14);
+  for (i = 0; i < N; i++)
+    if (vf2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 12] != i + 17
+	|| vi2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 14] != i + 21)
+      abort ();
+
+  f5 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (vd2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19
+	|| vl2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 23)
+      abort ();
+
+  f7 (7);
+  f8 (9);
+  for (i = 0; i < N; i++)
+    if (vd2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 7] != i + 19
+	|| vl2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 9] != i + 23)
+      abort ();
+
+  f9 ();
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (vf2[2 * i + i % 2] != i + 17
+	|| vi2[2 * i + i % 2] != i + 21)
+      abort ();
+
+  f11 (2);
+  f12 (4);
+  for (i = 0; i < N; i++)
+    if (vf2[2 * i + i % 2 + 2] != i + 17
+	|| vi2[2 * i + i % 2 + 4] != i + 21)
+      abort ();
+
+  f13 ();
+  f14 ();
+  for (i = 0; i < N; i++)
+    if (vd2[2 * i + i % 2] != i + 19
+	|| vl2[2 * i + i % 2] != i + 23)
+      abort ();
+
+  f15 (13);
+  f16 (15);
+  for (i = 0; i < N; i++)
+    if (vd2[2 * i + i % 2 + 13] != i + 19
+	|| vl2[2 * i + i % 2 + 15] != i + 23)
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-scatter-2.c b/gcc/testsuite/gcc.target/i386/avx512f-scatter-2.c
new file mode 100644
index 0000000..5eabab6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-scatter-2.c
@@ -0,0 +1,215 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f -DAVX512F" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+float vf1[N], vf2[2*N+16];
+double vd1[N], vd2[2*N+16];
+int k[N];
+long l[N];
+short n[2*N+16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[k[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[k[i]] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[k[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[k[i] + x] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[k[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[k[i]] = (int) vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[l[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[l[i]] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vf2[l[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[l[i] + x] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[l[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[l[i]] = (int) vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    vd2[l[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (long x)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    n[l[i] + x] = (int) vd1[i];
+}
+
+static void
+avx512f_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      vf1[i] = 17.0f + i;
+      vd1[i] = 19.0 + i;
+    }
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      k[i] = (i % 2) ? (N / 2 + i) : (N / 2 - i / 2);
+      l[i] = 2 * i + i % 2;
+    }
+
+  f1 ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (vf2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17
+	|| n[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17)
+      abort ();
+
+  f3 (12);
+  f4 (14);
+  for (i = 0; i < N; i++)
+    if (vf2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 12] != i + 17
+	|| n[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 14] != i + 17)
+      abort ();
+
+  f5 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (vd2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19
+	|| n[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19)
+      abort ();
+
+  f7 (7);
+  f8 (9);
+  for (i = 0; i < N; i++)
+    if (vd2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 7] != i + 19
+	|| n[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 9] != i + 19)
+      abort ();
+
+  f9 ();
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (vf2[2 * i + i % 2] != i + 17
+	|| n[2 * i + i % 2] != i + 17)
+      abort ();
+
+  f11 (2);
+  f12 (4);
+  for (i = 0; i < N; i++)
+    if (vf2[2 * i + i % 2 + 2] != i + 17
+	|| n[2 * i + i % 2 + 4] != i + 17)
+      abort ();
+
+  f13 ();
+  f14 ();
+  for (i = 0; i < N; i++)
+    if (vd2[2 * i + i % 2] != i + 19
+	|| n[2 * i + i % 2] != i + 19)
+      abort ();
+
+  f15 (13);
+  f16 (15);
+  for (i = 0; i < N; i++)
+    if (vd2[2 * i + i % 2 + 13] != i + 19
+	|| n[2 * i + i % 2 + 15] != i + 19)
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-scatter-3.c b/gcc/testsuite/gcc.target/i386/avx512f-scatter-3.c
new file mode 100644
index 0000000..dccbdb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-scatter-3.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f -DAVX512F" } */
+
+#include "avx512f-check.h"
+
+#define N 1024
+int a[N], b[N];
+
+__attribute__((noinline, noclone)) void
+foo (float *__restrict p, float *__restrict q,
+     int s1, int s2, int s3)
+{
+  int i;
+  for (i = 0; i < (N / 8); i++)
+    p[a[i] * s1 + b[i] * s2 + s3] = q[i];
+}
+
+static void
+avx512f_test (void)
+{
+  int i;
+  float c[N], d[N];
+  for (i = 0; i < N; i++)
+    {
+      a[i] = (i * 7) & (N / 8 - 1);
+      b[i] = (i * 13) & (N / 8 - 1);
+      c[i] = 179.13 + i;
+    }
+  foo (d, c, 3, 2, 4);
+  for (i = 0; i < (N / 8); i++)
+    if (d[a[i] * 3 + b[i] * 2 + 4] != (float) (179.13 + i))
+      abort ();
+}
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 731fe7d..2de0369 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "params.h"
 
+
+
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
 
@@ -268,7 +270,9 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 	}
 
       if (STMT_VINFO_GATHER_P (stmtinfo_a)
-	  || STMT_VINFO_GATHER_P (stmtinfo_b))
+	  || STMT_VINFO_GATHER_P (stmtinfo_b)
+	  || STMT_VINFO_SCATTER_P (stmtinfo_a)
+	  || STMT_VINFO_SCATTER_P (stmtinfo_b))
 	{
 	  if (dump_enabled_p ())
 	    {
@@ -316,7 +320,9 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 	}
 
       if (STMT_VINFO_GATHER_P (stmtinfo_a)
-	  || STMT_VINFO_GATHER_P (stmtinfo_b))
+	  || STMT_VINFO_GATHER_P (stmtinfo_b)
+	  || STMT_VINFO_SCATTER_P (stmtinfo_a)
+	  || STMT_VINFO_SCATTER_P (stmtinfo_b))
 	{
 	  if (dump_enabled_p ())
 	    {
@@ -2307,10 +2313,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, vect_location,
 	                     "zero step in outer loop.\n");
-	  if (DR_IS_READ (dr))
-  	    return true;
-	  else
-	    return false;
+	  return (DR_IS_READ (dr)) ? true : false;
 	}
     }
 
@@ -2956,12 +2959,12 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   return true;
 }
 
-/* Check whether a non-affine read in stmt is suitable for gather load
-   and if so, return a builtin decl for that operation.  */
+/* Check whether a non-affine read or write in stmt is suitable for gather load
+   or scatter store and if so, return a builtin decl for that operation.  */
 
 tree
-vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
-		   tree *offp, int *scalep)
+vect_check_gather_scatter (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
+			   tree *offp, int *scalep, bool is_load)
 {
   HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2990,7 +2993,7 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
 	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
     }
 
-  /* The gather builtins need address of the form
+  /* The gather and scatter builtins need address of the form
      loop_invariant + vector * {1, 2, 4, 8}
      or
      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
@@ -3153,8 +3156,13 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
   if (offtype == NULL_TREE)
     offtype = TREE_TYPE (off);
 
-  decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
-					   offtype, scale);
+  if (is_load)
+    decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
+					     offtype, scale);
+  else
+    decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
+					      offtype, scale);
+
   if (decl == NULL_TREE)
     return NULL_TREE;
 
@@ -3304,6 +3312,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
       stmt_vec_info stmt_info;
       tree base, offset, init;
       bool gather = false;
+      bool scatter = false;
       bool simd_lane_access = false;
       int vf;
 
@@ -3342,18 +3351,22 @@ again:
 	    = DR_IS_READ (dr)
 	      && !TREE_THIS_VOLATILE (DR_REF (dr))
 	      && targetm.vectorize.builtin_gather != NULL;
+	  bool maybe_scatter
+	    = DR_IS_WRITE (dr)
+	      && !TREE_THIS_VOLATILE (DR_REF (dr))
+	      && targetm.vectorize.builtin_scatter != NULL;
 	  bool maybe_simd_lane_access
 	    = loop_vinfo && loop->simduid;
 
-	  /* If target supports vector gather loads, or if this might be
-	     a SIMD lane access, see if they can't be used.  */
+	  /* If target supports vector gather loads or scatter stores, or if
+	     this might be a SIMD lane access, see if they can't be used.  */
 	  if (loop_vinfo
-	      && (maybe_gather || maybe_simd_lane_access)
+	      && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
 	      && !nested_in_vect_loop_p (loop, stmt))
 	    {
 	      struct data_reference *newdr
 		= create_data_ref (NULL, loop_containing_stmt (stmt),
-				   DR_REF (dr), stmt, true);
+				   DR_REF (dr), stmt, maybe_scatter ? false : true);
 	      gcc_assert (newdr != NULL && DR_REF (newdr));
 	      if (DR_BASE_ADDRESS (newdr)
 		  && DR_OFFSET (newdr)
@@ -3406,17 +3419,18 @@ again:
 			    }
 			}
 		    }
-		  if (!simd_lane_access && maybe_gather)
+		  if (!simd_lane_access && (maybe_gather || maybe_scatter))
 		    {
 		      dr = newdr;
-		      gather = true;
+		      gather = DR_IS_READ (dr);
+		      scatter = DR_IS_WRITE (dr);
 		    }
 		}
-	      if (!gather && !simd_lane_access)
+	      if (!gather && !scatter && !simd_lane_access)
 		free_data_ref (newdr);
 	    }
 
-	  if (!gather && !simd_lane_access)
+	  if (!gather && !scatter && !simd_lane_access)
 	    {
 	      if (dump_enabled_p ())
 		{
@@ -3444,7 +3458,7 @@ again:
           if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    free_data_ref (dr);
 	  return false;
         }
@@ -3479,7 +3493,7 @@ again:
           if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    free_data_ref (dr);
           return false;
         }
@@ -3499,7 +3513,7 @@ again:
           if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    free_data_ref (dr);
           return false;
 	}
@@ -3524,7 +3538,7 @@ again:
 	  if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    free_data_ref (dr);
 	  return false;
 	}
@@ -3662,7 +3676,7 @@ again:
           if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    free_data_ref (dr);
           return false;
         }
@@ -3695,10 +3709,10 @@ again:
           if (bb_vinfo)
 	    break;
 
-	  if (gather || simd_lane_access)
+	  if (gather || scatter || simd_lane_access)
 	    {
 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
-	      if (gather)
+	      if (gather || scatter)
 		free_data_ref (dr);
 	    }
 	  return false;
@@ -3722,23 +3736,29 @@ again:
       if (vf > *min_vf)
 	*min_vf = vf;
 
-      if (gather)
+      if (gather || scatter)
 	{
 	  tree off;
 
-	  gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
-	  if (gather
+	  gather = 0 != vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL, true);
+	  scatter = 0 != vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL, false);
+
+          if ((gather || scatter)
 	      && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
-	    gather = false;
-	  if (!gather)
+            {
+	      gather = false;
+	      scatter = false;
+            }
+
+	  if (!gather && !scatter)
 	    {
 	      STMT_VINFO_DATA_REF (stmt_info) = NULL;
 	      free_data_ref (dr);
 	      if (dump_enabled_p ())
 		{
-		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
-                                   "not vectorized: not suitable for gather "
-                                   "load ");
+		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				   strcat("not vectorized: not suitable for ",
+					  (STMT_VINFO_GATHER_P (stmt_info)) ? "gather " "load " : "scatter " "store "));
 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 		}
@@ -3747,7 +3767,9 @@ again:
 
 	  datarefs[i] = dr;
 	  STMT_VINFO_GATHER_P (stmt_info) = true;
+	  STMT_VINFO_SCATTER_P (stmt_info) = true;
 	}
+
       else if (loop_vinfo
 	       && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
 	{
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index f06e57c..8b3f539 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -810,14 +810,22 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
               return false;
           }
 
-      if (STMT_VINFO_GATHER_P (stmt_vinfo))
+      if (STMT_VINFO_GATHER_P (stmt_vinfo) || STMT_VINFO_SCATTER_P (stmt_vinfo))
 	{
 	  tree off;
-	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
+	  tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL,
+						 (STMT_VINFO_GATHER_P (stmt_vinfo)) ? true : false);
 	  gcc_assert (decl);
 	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
 			    &worklist, true))
-	    return false;
+            {
+	      if (STMT_VINFO_SCATTER_P (stmt_vinfo) &&
+                  !process_use (stmt, gimple_assign_rhs1 (stmt), loop_vinfo, live_p,
+			        relevant, &worklist, true))
+                worklist.release();
+
+	      return false;
+            }
 	}
     } /* while worklist */
 
@@ -1819,8 +1827,8 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
     {
       gimple def_stmt;
       tree def;
-      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
-				       &gather_off, &gather_scale);
+      gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
+				       &gather_off, &gather_scale, true);
       gcc_assert (gather_decl);
       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
 				 &def_stmt, &def, &gather_dt,
@@ -5142,6 +5150,12 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   unsigned int vec_num;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   tree aggr_type;
+  tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
+  tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
+  int scatter_scale = 1;
+  enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
+  enum vect_def_type scatter_src_dt = vect_unknown_def_type;
+  gimple new_stmt;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5299,6 +5313,32 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
         }
     }
 
+  if (STMT_VINFO_SCATTER_P (stmt_info))
+    {
+      gimple def_stmt;
+      tree def;
+      scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
+						&scatter_off, &scatter_scale, false);
+      gcc_assert (scatter_decl);
+      if (!vect_is_simple_use_1 (scatter_off, NULL, loop_vinfo, bb_vinfo,
+				 &def_stmt, &def, &scatter_idx_dt,
+				 &scatter_off_vectype))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "scatter index use not simple.");
+	  return false;
+	}
+      if (!vect_is_simple_use (gimple_assign_rhs1 (stmt), NULL, loop_vinfo, bb_vinfo,
+			       &def_stmt, &def, &scatter_src_dt))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                             "scatter source use not simple.");
+	  return false;
+	}
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
@@ -5313,6 +5353,150 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
   ensure_base_align (stmt_info, dr);
 
+  if (STMT_VINFO_SCATTER_P (stmt_info))
+    {
+      tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
+      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
+      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
+      tree ptr, mask, var, scale, perm_mask = NULL_TREE;
+      edge pe = loop_preheader_edge (loop);
+      gimple_seq seq;
+      basic_block new_bb;
+      enum { NARROW, NONE, WIDEN } modifier;
+      int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
+
+      if (nunits == scatter_off_nunits)
+	modifier = NONE;
+      else if (nunits == scatter_off_nunits / 2)
+	{
+	  unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
+	  modifier = WIDEN;
+
+	  for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
+	    sel[i] = i | nunits;
+
+	  perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
+	  gcc_assert (perm_mask != NULL_TREE);
+	}
+      else if (nunits == scatter_off_nunits * 2)
+	{
+	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
+	  modifier = NARROW;
+
+	  for (i = 0; i < (unsigned int) nunits; ++i)
+	    sel[i] = i | scatter_off_nunits;
+
+	  perm_mask = vect_gen_perm_mask_checked (vectype, sel);
+	  gcc_assert (perm_mask != NULL_TREE);
+	  ncopies *= 2;
+	}
+      else
+	gcc_unreachable ();
+
+      rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
+      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
+      scaletype = TREE_VALUE (arglist);
+
+      gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
+			   && TREE_CODE (rettype) == VOID_TYPE);
+
+      ptr = fold_convert (ptrtype, scatter_base);
+      if (!is_gimple_min_invariant (ptr))
+	{
+	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
+	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+	  gcc_assert (!new_bb);
+	}
+
+      /* Currently we support only unconditional scatter stores,
+	 so mask should be all ones.  */
+      mask = build_int_cst (masktype, -1);
+      mask = vect_init_vector (stmt, mask, masktype, NULL);
+
+      scale = build_int_cst (scaletype, scatter_scale);
+
+      prev_stmt_info = NULL;
+      for (j = 0; j < ncopies; ++j)
+	{
+	  if (j == 0)
+	    {
+	      src = vec_oprnd1
+		= vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt, NULL);
+	      op = vec_oprnd0
+		= vect_get_vec_def_for_operand (scatter_off, stmt, NULL);
+	    }
+	  else if (modifier != NONE && (j & 1))
+	    {
+	      if (modifier == WIDEN)
+		{
+		  src = vec_oprnd1
+		    = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+		  op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
+					     stmt, gsi);
+		}
+	      else if (modifier == NARROW)
+		{
+		  src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
+					      stmt, gsi);
+		  op = vec_oprnd0
+		    = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+		}
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    {
+	      src = vec_oprnd1
+		= vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
+	      op = vec_oprnd0
+		= vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
+	    }
+
+	  if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
+	    {
+	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
+			  == TYPE_VECTOR_SUBPARTS (srctype));
+	      var = vect_get_new_vect_var (srctype, vect_simple_var, NULL);
+	      var = make_ssa_name (var, NULL);
+	      src = build1 (VIEW_CONVERT_EXPR, srctype, src);
+	      new_stmt
+		= gimple_build_assign (var, VIEW_CONVERT_EXPR,
+						src, NULL_TREE);
+	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      src = var;
+	    }
+
+	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
+	    {
+	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
+			  == TYPE_VECTOR_SUBPARTS (idxtype));
+	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
+	      var = make_ssa_name (var, NULL);
+	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
+	      new_stmt
+		= gimple_build_assign (var, VIEW_CONVERT_EXPR,
+						op, NULL_TREE);
+	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      op = var;
+	    }
+
+	  new_stmt
+	    = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
+
+	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+	  if (prev_stmt_info == NULL)
+	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+	  else
+	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+	  prev_stmt_info = vinfo_for_stmt (new_stmt);
+	}
+      return true;
+    }
+
   if (grouped_store)
     {
       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
@@ -5586,8 +5770,6 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
-      gimple new_stmt;
-
       if (j == 0)
 	{
           if (slp)
@@ -5853,10 +6035,12 @@ permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
 {
   tree vectype = TREE_TYPE (x);
   tree perm_dest, data_ref;
+  tree scalar_dest = TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+		     ? gimple_assign_lhs (stmt) : x;
   gimple perm_stmt;
 
-  perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
-  data_ref = make_ssa_name (perm_dest);
+  perm_dest = vect_create_destination_var (scalar_dest, vectype);
+  data_ref = make_ssa_name (perm_dest, NULL);
 
   /* Generate the permute statement.  */
   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
@@ -6136,8 +6320,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       gimple def_stmt;
       tree def;
-      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
-				       &gather_off, &gather_scale);
+      gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
+					       &gather_off, &gather_scale, true);
       gcc_assert (gather_decl);
       if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
 				 &def_stmt, &def, &gather_dt,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dfa8795..3b8bce4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -652,6 +652,9 @@ typedef struct _stmt_vec_info {
   /* True if this is an access with loop-invariant stride.  */
   bool strided_p;
 
+  /* For stores only, true if this is a scatter store.  */
+  bool scatter_p;
+
   /* For both loads and stores.  */
   bool simd_lane_access_p;
 } *stmt_vec_info;
@@ -669,6 +672,8 @@ typedef struct _stmt_vec_info {
 #define STMT_VINFO_DATA_REF(S)             (S)->data_ref_info
 #define STMT_VINFO_GATHER_P(S)		   (S)->gather_p
 #define STMT_VINFO_STRIDED_P(S)	   	   (S)->strided_p
+#define STMT_VINFO_STRIDE_LOAD_P(S)	   (S)->stride_load_p
+#define STMT_VINFO_SCATTER_P(S)		   (S)->scatter_p
 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
 
 #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address
@@ -1060,8 +1065,8 @@ extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
 extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
 extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
 extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
-extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
-			       int *);
+extern tree vect_check_gather_scatter (gimple, loop_vec_info, tree *,
+				       tree *, int *, bool);
 extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *,
 				    unsigned *);
 extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
