Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 179636)
+++ gcc/doc/invoke.texi	(working copy)
@@ -271,7 +271,8 @@ Objective-C and Objective-C++ Dialects}.
 -Wunused-label  -Wunused-local-typedefs -Wunused-parameter @gol
 -Wno-unused-result -Wunused-value @gol -Wunused-variable @gol
 -Wunused-but-set-parameter -Wunused-but-set-variable @gol
--Wvariadic-macros -Wvla -Wvolatile-register-var  -Wwrite-strings}
+-Wvariadic-macros -Wvector-operation-performance -Wvla 
+-Wvolatile-register-var  -Wwrite-strings}
 
 @item C and Objective-C-only Warning Options
 @gccoptlist{-Wbad-function-cast  -Wmissing-declarations @gol
@@ -4535,6 +4536,18 @@ Warn if variadic macros are used in peda
 alternate syntax when in pedantic ISO C99 mode.  This is default.
 To inhibit the warning messages, use @option{-Wno-variadic-macros}.
 
+@item -Wvector-operation-performance
+@opindex Wvector-operation-performance
+@opindex Wno-vector-operation-performance
+Warn if vector operation is not implemented via SIMD capabilities of the
+architecture.  Mainly useful for the performance tuning.
+Vector operation can be implemented @code{piecewise} which means that the
+scalar operation is performed on every vector element; 
+@code{in parallel} which means that the vector operation is implemented
+using scalars of wider type, which normally is more performance efficient;
+and @code{as a single scalar} which means that vector fits into a
+scalar type.
+
 @item -Wvla
 @opindex Wvla
 @opindex Wno-vla
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-3.c	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (8, short) v0 = {argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (8, short) v1 = {-4, argc, 2, 11, 1, 17, -8, argc};
+  vector (8, short) res[] = 
+  {
+    v0 + v1,	      /* { dg-warning "expanded in parallel" }  */
+    v0 - v1,          /* { dg-warning "expanded in parallel" }  */
+    v0 > v1,          /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,          /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),	      /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)    /* { dg-warning "expanded piecewise" }  */
+  };
+  
+  return res[argc][argc];
+}
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-1.c	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (4, int) v0 = {argc, 1, 15, 38};
+  vector (4, int) v1 = {-4, argc, 2, 11};
+  vector (4, int) res[] = 
+  {
+    v0 + v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 - v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 > v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,	  /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),	    /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)  /* { dg-warning "expanded piecewise" }  */  
+  };
+
+  return res[argc][argc];
+}
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-2.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (16, signed char) v0 = {argc, 1, 15, 38, 12, -1, argc, 2, 
+				 argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (16, signed char) v1 = {-4, argc, 2, 11, 1, 17, -8, argc,
+				 argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (16, signed char) res[] = 
+  {
+    v0 + v1,		  /* { dg-warning "expanded in parallel" }  */
+    v0 - v1,              /* { dg-warning "expanded in parallel" }  */
+    v0 > v1,              /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,              /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),        /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)     /* { dg-warning "expanded piecewise" }  */
+  };
+ 
+  return res[argc][argc];
+}
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 179636)
+++ gcc/common.opt	(working copy)
@@ -694,6 +694,10 @@ Wcoverage-mismatch
 Common Var(warn_coverage_mismatch) Init(1) Warning
 Warn in case profiles in -fprofile-use do not match
 
+Wvector-operation-performance
+Common Var(warn_vector_operation_performance) Warning
+Warn when a vector operation is compiled outside the SIMD
+
 Xassembler
 Driver Separate
 
Index: gcc/tree-vect-generic.c
===================================================================
--- gcc/tree-vect-generic.c	(revision 179636)
+++ gcc/tree-vect-generic.c	(working copy)
@@ -235,6 +235,14 @@ expand_vector_piecewise (gimple_stmt_ite
   int delta = tree_low_cst (part_width, 1)
 	      / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
   int i;
+  location_t loc = gimple_location (gsi_stmt (*gsi));
+
+  if (gimple_expr_type (gsi_stmt (*gsi)) == type)
+    warning_at (loc, OPT_Wvector_operation_performance,
+		"vector operation will be expanded piecewise");
+  else
+    warning_at (loc, OPT_Wvector_operation_performance,
+		"vector operation will be expanded in parallel");
 
   v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
   for (i = 0; i < nunits;
@@ -260,6 +268,7 @@ expand_vector_parallel (gimple_stmt_iter
   tree result, compute_type;
   enum machine_mode mode;
   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
+  location_t loc = gimple_location (gsi_stmt (*gsi));
 
   /* We have three strategies.  If the type is already correct, just do
      the operation an element at a time.  Else, if the vector is wider than
@@ -284,6 +293,9 @@ expand_vector_parallel (gimple_stmt_iter
       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
       compute_type = lang_hooks.types.type_for_mode (mode, 1);
       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
+      warning_at (loc, OPT_Wvector_operation_performance,
+	          "vector operation will be expanded with a "
+		  "single scalar operation");
     }
 
   return result;
@@ -308,7 +320,7 @@ expand_vector_addition (gimple_stmt_iter
     return expand_vector_parallel (gsi, f_parallel,
 				   type, a, b, code);
   else
-    return expand_vector_piecewise (gsi, f,
+    return expand_vector_piecewise (gsi, f, 
 				    type, TREE_TYPE (type),
 				    a, b, code);
 }
@@ -400,8 +412,8 @@ expand_vector_operation (gimple_stmt_ite
       case PLUS_EXPR:
       case MINUS_EXPR:
         if (!TYPE_OVERFLOW_TRAPS (type))
-          return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
-		      		         gimple_assign_rhs1 (assign),
+	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
+					 gimple_assign_rhs1 (assign),
 					 gimple_assign_rhs2 (assign), code);
 	break;
 
@@ -626,10 +638,14 @@ lower_vec_shuffle (gimple_stmt_iterator 
   tree constr, t, si, i_val;
   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
+  location_t loc = gimple_location (gsi_stmt (*gsi));
   unsigned i;
 
   if (expand_vec_shuffle_expr_p (TYPE_MODE (vect_type), vec0, vec1, mask))
     return;
+  
+  warning_at (loc, OPT_Wvector_operation_performance,
+              "vector shuffling operation will be expanded piecewise");
 
   v = VEC_alloc (constructor_elt, gc, elements);
   for (i = 0; i < elements; i++)
Index: gcc/c-parser.c
===================================================================
--- gcc/c-parser.c	(revision 179636)
+++ gcc/c-parser.c	(working copy)
@@ -6533,17 +6533,22 @@ c_parser_postfix_expression (c_parser *p
 	      }
 
 	    if (VEC_length (c_expr_t, cexpr_list) == 2)
-	      expr.value =
-		c_build_vec_shuffle_expr
-		  (loc, VEC_index (c_expr_t, cexpr_list, 0)->value,
-		   NULL_TREE, VEC_index (c_expr_t, cexpr_list, 1)->value);
-
+	      {
+		expr.value =
+		  c_build_vec_shuffle_expr
+		    (loc, VEC_index (c_expr_t, cexpr_list, 0)->value,
+		     NULL_TREE, VEC_index (c_expr_t, cexpr_list, 1)->value);
+		SET_EXPR_LOCATION (expr.value, loc);
+	      }
 	    else if (VEC_length (c_expr_t, cexpr_list) == 3)
-	      expr.value =
-		c_build_vec_shuffle_expr
-		  (loc, VEC_index (c_expr_t, cexpr_list, 0)->value,
-		   VEC_index (c_expr_t, cexpr_list, 1)->value,
-		   VEC_index (c_expr_t, cexpr_list, 2)->value);
+	      {
+		expr.value =
+		  c_build_vec_shuffle_expr
+		    (loc, VEC_index (c_expr_t, cexpr_list, 0)->value,
+		     VEC_index (c_expr_t, cexpr_list, 1)->value,
+		     VEC_index (c_expr_t, cexpr_list, 2)->value);
+		SET_EXPR_LOCATION (expr.value, loc);
+	      }
 	    else
 	      {
 		error_at (loc, "wrong number of arguments to "
