Thanks for the review again. Since only formatting has changed I have
replied to the same thread instead of creating a new thread with v6.

I saw many differences when I ran the clang-format on the whole file
therefore I just manually updated the existing function. Would it be
possible to automate the format of correction before submitting a
patch? For example, if all files were formatted with clang-format, I
could write new changes without worrying about formatting and then use
clang format which just would format my changes.

Thanks and regards,
Avinash Jayakar

---
 .../gcc.dg/vect/pr104116-ceil-div-2.c         |  29 ++
 .../gcc.dg/vect/pr104116-ceil-div-pow2.c      |  30 ++
 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c |  30 ++
 .../gcc.dg/vect/pr104116-ceil-mod-2.c         |  30 ++
 .../gcc.dg/vect/pr104116-ceil-mod-pow2.c      |  30 ++
 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c |  30 ++
 .../gcc.dg/vect/pr104116-ceil-udiv-2.c        |  29 ++
 .../gcc.dg/vect/pr104116-ceil-udiv-pow2.c     |  29 ++
 .../gcc.dg/vect/pr104116-ceil-udiv.c          |  29 ++
 .../gcc.dg/vect/pr104116-ceil-umod-2.c        |  30 ++
 .../gcc.dg/vect/pr104116-ceil-umod-pow2.c     |  30 ++
 .../gcc.dg/vect/pr104116-ceil-umod.c          |  30 ++
 .../gcc.dg/vect/pr104116-floor-div-2.c        |  30 ++
 .../gcc.dg/vect/pr104116-floor-div-pow2.c     |  30 ++
 .../gcc.dg/vect/pr104116-floor-div.c          |  30 ++
 .../gcc.dg/vect/pr104116-floor-mod-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-floor-mod-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-floor-mod.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-div-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-round-div-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-round-div.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-mod-2.c        |  31 ++
 .../gcc.dg/vect/pr104116-round-mod-pow2.c     |  31 ++
 .../gcc.dg/vect/pr104116-round-mod.c          |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv-2.c       |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv-pow2.c    |  31 ++
 .../gcc.dg/vect/pr104116-round-udiv.c         |  32 ++
 .../gcc.dg/vect/pr104116-round-umod-2.c       |  31 ++
 .../gcc.dg/vect/pr104116-round-umod-pow2.c    |  31 ++
 .../gcc.dg/vect/pr104116-round-umod.c         |  31 ++
 gcc/testsuite/gcc.dg/vect/pr104116.h          | 201 ++++++++++
 gcc/tree-vect-patterns.cc                     | 359 +++++++++++++++++-
 32 files changed, 1457 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-
pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod-
pow2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116.h

diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
new file mode 100644
index 00000000000..7078776a577
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__CEIL_DIV, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
new file mode 100644
index 00000000000..7aa9ae84627
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_DIV, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 8);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
new file mode 100644
index 00000000000..6f903ffda92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-div.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_DIV, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_div (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
new file mode 100644
index 00000000000..ee6dfb92de9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
new file mode 100644
index 00000000000..de409ea349a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
new file mode 100644
index 00000000000..f2ba9367461
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-mod.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN(__CEIL_MOD, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int *a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = cl_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
new file mode 100644
index 00000000000..db1f797c1da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 2u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 2);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
new file mode 100644
index 00000000000..06b4257d58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv-pow2.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 8u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 8);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
new file mode 100644
index 00000000000..ef6e8563ce0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-udiv.c
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__CEIL_DIV, 19u, udiv)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *ua = (unsigned int*)&uarr;
+  init_uarr(ua, N);
+  udiv(ua);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_udiv (0xf0000000 + i, 19);
+    if (expected != ua[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
new file mode 100644
index 00000000000..2d0a5dbaf77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
new file mode 100644
index 00000000000..2d0a5dbaf77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
new file mode 100644
index 00000000000..949a5091e36
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-ceil-umod.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+
+TEST_FN_UNSIGNED (__CEIL_MOD, 19u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int *a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    unsigned int expected = cl_umod (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
new file mode 100644
index 00000000000..d93e0513984
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 2, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
new file mode 100644
index 00000000000..9e986a75dd1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div-pow2.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 8, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
new file mode 100644
index 00000000000..89dd270364c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-div.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_DIV, 19, div_2)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div_2(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_div (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
new file mode 100644
index 00000000000..0c5c1621e0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 2, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
new file mode 100644
index 00000000000..f3de1450000
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 8, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
new file mode 100644
index 00000000000..3e6bbe978b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-mod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__FLOOR_MOD, 19, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = fl_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
new file mode 100644
index 00000000000..c242ccb9b61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 2, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
new file mode 100644
index 00000000000..365c2c59866
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 8, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
new file mode 100644
index 00000000000..5c377d118ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-div.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_DIV, 19, div)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  div(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_div (i - N/2, 19);
+    if (expected != a[i])
+      abort();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
new file mode 100644
index 00000000000..6430b3ea9ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 2, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
new file mode 100644
index 00000000000..46c1789e939
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 8, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
new file mode 100644
index 00000000000..e7ca44e2f84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-mod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN(__ROUND_MOD, 19, mod)
+
+int main (void)
+{
+  check_vect ();
+  int * a = (int*)&arr;
+  init_arr(a, N);
+  mod(a);
+  for (int i=0; i<N; i++)
+  {
+    int expected = rd_mod (i - N/2, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
new file mode 100644
index 00000000000..4d42f4e3c02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 2u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
new file mode 100644
index 00000000000..137b249dc44
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 8u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
new file mode 100644
index 00000000000..183a930aef9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-udiv.c
@@ -0,0 +1,32 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_DIV, 19u, div)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  div(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_udiv (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort ();
+
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
new file mode 100644
index 00000000000..f321e0e5c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 2u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 2);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
new file mode 100644
index 00000000000..041ecd17f56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod-pow2.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 8u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 8);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
new file mode 100644
index 00000000000..b5ddad1d472
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-round-umod.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+
+
+#include "pr104116.h"
+#include "tree-vect.h"
+
+TEST_FN_UNSIGNED(__ROUND_MOD, 19u, mod)
+
+int main (void)
+{
+  check_vect ();
+  unsigned int * a = (unsigned int*)&uarr;
+  init_uarr(a, N);
+  mod(a);
+  for (unsigned int i=0; i<N; i++)
+  {
+    unsigned int expected = rd_umod (0xf0000000 + i, 19);
+    if (expected != a[i])
+      abort ();
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1
"vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116.h
b/gcc/testsuite/gcc.dg/vect/pr104116.h
new file mode 100644
index 00000000000..03fbc5f764b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116.h
@@ -0,0 +1,201 @@
+#define TEST_FN(OP, CONST, NAME) \
+__attribute__((noinline)) \
+void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (int * a) \
+{ \
+  int i; \
+  long unsigned int _1; \
+  long unsigned int _2; \
+  int * _3; \
+  int _4; \
+  int _5; \
+  unsigned int _12; \
+  unsigned int _13; \
+ \
+  __BB(2,guessed_local(10737416)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(3,loop_header(1),guessed_local(1063004408)): \
+  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+  _13 = __PHI (__BB5: _12, __BB2: 1024u); \
+  _1 = (long unsigned int) i_14; \
+  _2 = _1 * 4ul; \
+  _3 = a_9(D) + _2; \
+  _4 = __MEM <int> (_3); \
+  _5 = _4 OP CONST; \
+  __MEM <int> (_3) = _5; \
+  i_11 = i_14 + 1; \
+  _12 = _13 - 1u; \
+  if (_12 != 0u) \
+    goto __BB5(guessed(132861994)); \
+  else \
+    goto __BB4(guessed(1355734)); \
+ \
+  __BB(5,guessed_local(1052266995)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(4,guessed_local(10737416)): \
+  return; \
+ \
+} \
+
+#define TEST_FN_UNSIGNED(OP, CONST, NAME) \
+__attribute__((noinline)) \
+void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (unsigned int * a) \
+{ \
+  int i; \
+  long unsigned int _1; \
+  long unsigned int _2; \
+  unsigned int * _3; \
+  unsigned int _4; \
+  unsigned int _5; \
+  unsigned int _12; \
+  unsigned int _13; \
+ \
+  __BB(2,guessed_local(10737416)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(3,loop_header(1),guessed_local(1063004408)): \
+  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+  _13 = __PHI (__BB5: _12, __BB2: 1024u); \
+  _1 = (long unsigned int) i_14; \
+  _2 = _1 * 4ul; \
+  _3 = a_9(D) + _2; \
+  _4 = __MEM <unsigned int> (_3); \
+  _5 = _4 OP CONST; \
+  __MEM <unsigned int> (_3) = _5; \
+  i_11 = i_14 + 1; \
+  _12 = _13 - 1u; \
+  if (_12 != 0u) \
+    goto __BB5(guessed(132861994)); \
+  else \
+    goto __BB4(guessed(1355734)); \
+ \
+  __BB(5,guessed_local(1052266995)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(4,guessed_local(10737416)): \
+  return; \
+} \
+
+
+#define N 1024
+int arr[N];
+__attribute__((optimize("O0")))
+void init_arr (int *a, int n)
+{
+       for (int i=0; i<n; i++)
+               a[i] = i - n/2;
+}
+
+unsigned int uarr[N];
+__attribute__((optimize("O0")))
+void init_uarr (unsigned int *a, int n)
+{
+  for (unsigned int i=0; i<n; i++)
+    a[i] = 0xf0000000 + i;
+}
+
+int cl_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (r != 0 && (x ^ y) >= 0)
+    q++;
+  return q;
+}
+
+unsigned int cl_udiv (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > 0)
+      q++;
+  return q;
+}
+
+int cl_mod (int x, int y)
+{
+  int r = x % y;
+  if (r != 0 && (x ^ y) >= 0)
+    r -= y;
+  return r;
+}
+
+unsigned int cl_umod (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > 0)
+      r-=y;
+  return r;
+}
+
+int fl_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (r != 0 && (x ^ y) < 0)
+    q--;
+  return q;
+}
+
+
+int fl_mod (int x, int y)
+{
+  int r = x % y;
+  if (r != 0 && (x ^ y) < 0)
+    r += y;
+  return r;
+}
+
+int abs(int x)
+{
+  if (x < 0) return -x;
+  return x;
+}
+
+int rd_mod (int x, int y)
+{
+  int r = x % y;
+  if (abs(r) > abs((y-1) >> 1))
+  {
+    if ((x ^ y) < 0)
+      r += y;
+    else 
+      r -= y;
+  }
+  return r;
+}
+
+int rd_div (int x, int y)
+{
+  int r = x % y;
+  int q = x / y;
+  if (abs(r) > abs((y-1) >> 1))
+  {
+    if ((x ^ y) < 0)
+      q--;
+    else 
+      q++;
+  }
+  return q;
+}
+
+unsigned int rd_umod (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  if (r > ((y-1) >> 1))
+      r -= y;
+  return r;
+}
+
+unsigned int rd_udiv (unsigned int x, unsigned int y)
+{
+  unsigned int r = x % y;
+  unsigned int q = x / y;
+  if (r > ((y-1) >> 1))
+      q++;
+  return q;
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 74a9a1929ba..f3c737eb940 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4837,6 +4837,281 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo,
stmt_vec_info stmt_vinfo,
   return NULL;
 }
 
+
+/* Function add_code_for_floorceilround_divmod
+   A helper function to add compensation code for implementing
FLOOR_MOD_EXPR,
+   FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
+   ROUND_DIV_EXPR
+   The quotient and remainder are needed for implemented these
operators.
+   FLOOR cases
+   r = x %[fl] y; r = x/[fl] y;
+   is
+   r = x % y; if (r && (x ^ y) < 0) r += y;
+   r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
+   Produce following sequence
+   v0 = x^y
+   v1 = -r
+   v2 = r | -r
+   v3 = v0 & v2
+   v4 = v3 < 0
+   if (floor_mod)
+     v5 = v4 ? y : 0
+     v6 = r + v5
+   if (floor_div)
+     v5 = v4 ? 1 : 0
+     v6 = d - 1
+   Similar sequences of vector instructions are produces for following
cases
+   CEIL cases
+   r = x %[cl] y; r = x/[cl] y;
+   is
+   r = x % y; if (r && (x ^ y) >= 0) r -= y;
+   r = x % y; if (r) r -= y; (unsigned)
+   r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
+   r = x % y; d = x/y; if (r) d++; (unsigned)
+   ROUND cases
+   r = x %[rd] y; r = x/[rd] y;
+   is
+   r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r +=
y;
+   r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
+   r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else
d--;
+   r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
+   Inputs:
+     VECTYPE: Vector type of the operands
+     STMT_VINFO: Statement where pattern begins
+     RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
+     Q: The quotient of division
+     R: Remainder of division
+     OPRDN0/OPRND1: Actual operands involved
+     ITYPE: tree type of oprnd0
+   Output:
+     NULL if vectorization not possible
+     Gimple statement based on rhs_code
+*/
+static gimple *
+add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
+                                   stmt_vec_info stmt_vinfo,
+                                   enum tree_code rhs_code, tree q,
tree r,
+                                   tree oprnd0, tree oprnd1, tree
itype)
+{
+  gimple *def_stmt;
+  tree mask_vectype = truth_type_for (vectype);
+  if (!mask_vectype)
+    return NULL;
+  tree bool_cond;
+  bool unsigned_p = TYPE_UNSIGNED (itype);
+
+  switch (rhs_code)
+    {
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_MOD_EXPR:
+    case CEIL_DIV_EXPR:
+      {
+       if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
+           || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
+           || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
+           || !target_has_vecop_for_code (PLUS_EXPR, vectype)
+           || !target_has_vecop_for_code (MINUS_EXPR, vectype)
+           || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+           || !expand_vec_cond_expr_p (vectype, mask_vectype))
+         return NULL;
+       if (unsigned_p)
+         {
+           gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code ==
CEIL_DIV_EXPR);
+
+           if (!expand_vec_cmp_expr_p (vectype, mask_vectype,
GT_EXPR))
+             return NULL;
+           bool is_mod = rhs_code == CEIL_MOD_EXPR;
+           // r > 0
+           bool_cond = vect_recog_temp_ssa_var (boolean_type_node,
NULL);
+           def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
+                                           build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
mask_vectype,
+                                   itype);
+
+           // (r > 0) ? y : 0 (mod)
+           // (r > 0) ? 1 : 0 (ceil)
+           tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt
+             = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
+                                    is_mod ? oprnd1 : build_int_cst
(itype, 1),
+                                    build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // r -= (r > 0) ? y : 0 (mod)
+           // d += (x^y < 0 && r) ? -1 : 0 (ceil)
+           tree result = vect_recog_temp_ssa_var (itype, NULL);
+           return gimple_build_assign (result, is_mod ? MINUS_EXPR :
PLUS_EXPR,
+                                       is_mod ? r : q, extr_cond);
+         }
+       else
+         {
+           bool ceil_p
+             = (rhs_code == CEIL_MOD_EXPR || rhs_code ==
CEIL_DIV_EXPR);
+           if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR,
vectype))
+             return NULL;
+           // x ^ y
+           tree xort = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR,
oprnd0, oprnd1);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           tree cond_reg = xort;
+           // ~(x ^ y) (ceil)
+           if (ceil_p)
+             {
+               cond_reg = vect_recog_temp_ssa_var (itype, NULL);
+               def_stmt = gimple_build_assign (cond_reg,
BIT_NOT_EXPR, xort);
+               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+             }
+
+           // -r
+           tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // r | -r , sign bit is set if r!=0
+           tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt
+             = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r,
negate_r);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // (x ^ y) & (r | -r)
+           // ~(x ^ y) & (r | -r) (ceil)
+           tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype,
NULL);
+           def_stmt = gimple_build_assign (r_or_negr_and_xor,
BIT_AND_EXPR,
+                                           r_or_negr, cond_reg);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0
&& r!=0)
+           bool_cond = vect_recog_temp_ssa_var (boolean_type_node,
NULL);
+           def_stmt
+             = gimple_build_assign (bool_cond, LT_EXPR,
r_or_negr_and_xor,
+                                    build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
mask_vectype,
+                                   itype);
+
+           // (x^y < 0 && r) ? y : 0 (mod)
+           // (x^y < 0 && r) ? -1 : 0 (div)
+           bool is_mod
+             = (rhs_code == FLOOR_MOD_EXPR || rhs_code ==
CEIL_MOD_EXPR);
+           tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (extr_cond, COND_EXPR,
bool_cond,
+                                           is_mod ? oprnd1
+                                                  : build_int_cst
(itype, -1),
+                                           build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
+           // d += (x^y < 0 && r) ? -1 : 0 (floor div)
+           // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
+           // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
+           tree result = vect_recog_temp_ssa_var (itype, NULL);
+           return gimple_build_assign (result,
+                                       (rhs_code == FLOOR_MOD_EXPR
+                                        || rhs_code ==
FLOOR_DIV_EXPR)
+                                         ? PLUS_EXPR
+                                         : MINUS_EXPR,
+                                       is_mod ? r : q, extr_cond);
+         }
+      }
+    case ROUND_MOD_EXPR:
+    case ROUND_DIV_EXPR:
+      {
+       if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
+           || !target_has_vecop_for_code (PLUS_EXPR, vectype)
+           || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+           || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
+           || !expand_vec_cond_expr_p (vectype, mask_vectype))
+         return NULL;
+
+       bool is_mod = rhs_code == ROUND_MOD_EXPR;
+       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
+       unsigned HOST_WIDE_INT abs_d
+         = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned
HOST_WIDE_INT) d);
+       unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
+       if (!unsigned_p)
+         {
+           // check availibility of abs expression for vector
+           if (!target_has_vecop_for_code (ABS_EXPR, vectype))
+             return NULL;
+           // abs (r)
+           tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // abs (r) > (abs (y-1) >> 1)
+           tree round_p = vect_recog_temp_ssa_var (boolean_type_node,
NULL);
+           def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
+                                           build_int_cst (itype,
mid_d));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
mask_vectype,
+                                   itype);
+
+           // x ^ y
+           tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt
+             = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0,
oprnd1);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           // x ^ y < 0
+           bool_cond = vect_recog_temp_ssa_var (boolean_type_node,
NULL);
+           def_stmt = gimple_build_assign (bool_cond, LT_EXPR,
cond_reg,
+                                           build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
mask_vectype,
+                                   itype);
+
+           // x ^ y < 0 ? y : -y (mod)
+           // x ^ y < 0 ? -1 : 1 (div)
+           tree val1 = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt
+             = gimple_build_assign (val1, COND_EXPR, bool_cond,
+                                    build_int_cst (itype, is_mod ? d
: -1),
+                                    build_int_cst (itype, is_mod ? -d
: 1));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+           int precision = TYPE_PRECISION (itype);
+           wide_int wmask = wi::mask (precision, false, precision);
+
+           // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
+           tree val2 = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
+                                           wide_int_to_tree (itype,
wmask),
+                                           build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           tree fval = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1,
val2);
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           tree result = vect_recog_temp_ssa_var (itype, NULL);
+           return gimple_build_assign (result, PLUS_EXPR, is_mod ? r
: q,
+                                       fval);
+         }
+       else
+         {
+           // r > (y-1 >> 1)
+           tree round_p = vect_recog_temp_ssa_var (boolean_type_node,
NULL);
+           def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
+                                           build_int_cst (itype,
mid_d));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
mask_vectype,
+                                   itype);
+
+           // (r > (y-1)>>1) ? -d : 1
+           tree val2 = vect_recog_temp_ssa_var (itype, NULL);
+           def_stmt
+             = gimple_build_assign (val2, COND_EXPR, round_p,
+                                    build_int_cst (itype, is_mod ? -d
: 1),
+                                    build_int_cst (itype, 0));
+           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+           tree result = vect_recog_temp_ssa_var (itype, NULL);
+           return gimple_build_assign (result, PLUS_EXPR, is_mod ? r
: q,
+                                       val2);
+         }
+      }
+    default:
+      return NULL;
+    }
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
@@ -4881,7 +5156,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 {
   gimple *last_stmt = stmt_vinfo->stmt;
   tree oprnd0, oprnd1, vectype, itype, cond;
-  gimple *pattern_stmt, *def_stmt;
+  gimple *pattern_stmt = NULL;
+  gimple *def_stmt = NULL;
   enum tree_code rhs_code;
   optab optab;
   tree q, cst;
@@ -4898,6 +5174,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     case TRUNC_DIV_EXPR:
     case EXACT_DIV_EXPR:
     case TRUNC_MOD_EXPR:
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
+    case CEIL_MOD_EXPR:
+    case CEIL_DIV_EXPR:
+    case ROUND_MOD_EXPR:
+    case ROUND_DIV_EXPR:
       break;
     default:
       return NULL;
@@ -4929,9 +5211,16 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     }
 
   prec = TYPE_PRECISION (itype);
+
+  bool is_flclrd_moddiv_p =
+    rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
+    || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
+    || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
   if (integer_pow2p (oprnd1))
     {
-      if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
+      if ((TYPE_UNSIGNED (itype)
+          && (rhs_code == FLOOR_MOD_EXPR || rhs_code ==
FLOOR_DIV_EXPR))
+         || tree_int_cst_sgn (oprnd1) != 1)
        return NULL;
 
       /* Pattern detected.  */
@@ -4948,18 +5237,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          tree var_div = vect_recog_temp_ssa_var (itype, NULL);
          gimple *div_stmt = gimple_build_call_internal (ifn, 2,
oprnd0, shift);
          gimple_call_set_lhs (div_stmt, var_div);
-
-         if (rhs_code == TRUNC_MOD_EXPR)
+         if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
            {
              append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
+             tree t1 = vect_recog_temp_ssa_var (itype, NULL);
              def_stmt
-               = gimple_build_assign (vect_recog_temp_ssa_var (itype,
NULL),
-                                      LSHIFT_EXPR, var_div, shift);
+               = gimple_build_assign (t1, LSHIFT_EXPR, var_div,
shift);
              append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
              pattern_stmt
                = gimple_build_assign (vect_recog_temp_ssa_var (itype,
NULL),
-                                      MINUS_EXPR, oprnd0,
-                                      gimple_assign_lhs (def_stmt));
+                                      MINUS_EXPR, oprnd0, t1);
+             if (is_flclrd_moddiv_p)
+               {
+                 append_pattern_def_seq (vinfo, stmt_vinfo,
pattern_stmt);
+                 pattern_stmt
+                   = add_code_for_floorceilround_divmod (vectype,
vinfo,
+                                                         stmt_vinfo,
rhs_code,
+                                                         var_div, t1,
oprnd0,
+                                                         oprnd1,
itype);
+                 if (pattern_stmt == NULL)
+                   return NULL;
+               }
            }
          else
            pattern_stmt = div_stmt;
@@ -4973,8 +5271,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                      build_int_cst (itype, 0));
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
                              truth_type_for (vectype), itype);
+      tree div_result = NULL_TREE;
       if (rhs_code == TRUNC_DIV_EXPR
-         || rhs_code == EXACT_DIV_EXPR)
+         || rhs_code == EXACT_DIV_EXPR
+         || rhs_code == FLOOR_DIV_EXPR
+         || rhs_code == CEIL_DIV_EXPR
+         || rhs_code == ROUND_DIV_EXPR)
        {
          tree var = vect_recog_temp_ssa_var (itype, NULL);
          tree shift;
@@ -4991,12 +5293,17 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
          shift = build_int_cst (itype, tree_log2 (oprnd1));
+         div_result = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype,
NULL),
-                                  RSHIFT_EXPR, var, shift);
+           = gimple_build_assign (div_result, RSHIFT_EXPR, var,
shift);
        }
-      else
+      if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
        {
+         if (rhs_code == FLOOR_DIV_EXPR
+             || rhs_code == CEIL_DIV_EXPR
+             || rhs_code == ROUND_DIV_EXPR)
+           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+
          tree signmask;
          if (compare_tree_int (oprnd1, 2) == 0)
            {
@@ -5041,10 +5348,21 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                                build_int_cst (itype,
1)));
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
+         tree r = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype,
NULL),
-                                  MINUS_EXPR, gimple_assign_lhs
(def_stmt),
+           = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs
(def_stmt),
                                   signmask);
+         if (is_flclrd_moddiv_p)
+           {
+             append_pattern_def_seq (vinfo, stmt_vinfo,
pattern_stmt);
+             pattern_stmt
+               = add_code_for_floorceilround_divmod (vectype, vinfo,
+                                                     stmt_vinfo,
rhs_code,
+                                                     div_result, r,
oprnd0,
+                                                     oprnd1, itype);
+             if (pattern_stmt == NULL)
+               return NULL;
+           }
        }
 
       return pattern_stmt;
@@ -5351,7 +5669,7 @@ vect_recog_divmod_pattern (vec_info *vinfo,
        }
     }
 
-  if (rhs_code == TRUNC_MOD_EXPR)
+  if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
     {
       tree r, t1;
 
@@ -5366,6 +5684,17 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 
       r = vect_recog_temp_ssa_var (itype, NULL);
       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+      if (is_flclrd_moddiv_p)
+       {
+       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+       pattern_stmt
+         = add_code_for_floorceilround_divmod (vectype, vinfo,
stmt_vinfo,
+                                               rhs_code, q, r,
oprnd0, oprnd1,
+                                               itype);
+       if (pattern_stmt == NULL)
+         return NULL;
+       }
     }
 
   /* Pattern detected.  */
-- 
2.51.0

Reply via email to