[PATCH] D151356: [OpenMP] Fix transformed loop's var privacy

2023-06-02 Thread Joel E. Denny via Phabricator via cfe-commits
jdenny added a comment.

Thanks for the review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151356/new/

https://reviews.llvm.org/D151356

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151356: [OpenMP] Fix transformed loop's var privacy

2023-06-02 Thread Joel E. Denny via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG19841e4dcaab: [OpenMP] Fix transformed loops var 
privacy (authored by jdenny).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151356/new/

https://reviews.llvm.org/D151356

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
  openmp/libomptarget/test/offloading/target-tile.c

Index: openmp/libomptarget/test/offloading/target-tile.c
===
--- /dev/null
+++ openmp/libomptarget/test/offloading/target-tile.c
@@ -0,0 +1,62 @@
+// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when
+// strictly nested within omp target.
+
+// RUN: %libomptarget-compile-generic -fopenmp-version=51
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+#include 
+
+#define I_NTILES 8
+#define J_NTILES 9
+#define I_NELEMS 2
+#define J_NELEMS 3
+
+int main() {
+  int order[I_NTILES][J_NTILES][I_NELEMS][J_NELEMS];
+  int i, j;
+  #pragma omp target map(tofrom: i, j)
+  {
+int next = 0;
+#pragma omp tile sizes(I_NELEMS, J_NELEMS)
+for (i = 0; i < I_NTILES * I_NELEMS; ++i) {
+  for (j = 0; j < J_NTILES * J_NELEMS; ++j) {
+int iTile = i / I_NELEMS;
+int jTile = j / J_NELEMS;
+int iElem = i % I_NELEMS;
+int jElem = j % J_NELEMS;
+order[iTile][jTile][iElem][jElem] = next++;
+  }
+}
+  }
+  int expected = 0;
+  for (int iTile = 0; iTile < I_NTILES; ++iTile) {
+for (int jTile = 0; jTile < J_NTILES; ++jTile) {
+  for (int iElem = 0; iElem < I_NELEMS; ++iElem) {
+for (int jElem = 0; jElem < J_NELEMS; ++jElem) {
+  int actual = order[iTile][jTile][iElem][jElem];
+  if (expected != actual) {
+printf("error: order[%d][%d][%d][%d] = %d, expected %d\n",
+   iTile, jTile, iElem, jElem, actual, expected);
+return 1;
+  }
+  ++expected;
+}
+  }
+}
+  }
+  // Tiling leaves the loop variables with their values from the final iteration
+  // rather than with the usual +1.
+  expected = I_NTILES * I_NELEMS - 1;
+  if (i != expected) {
+printf("error: i = %d, expected %d\n", i, expected);
+return 1;
+  }
+  expected = J_NTILES * J_NELEMS - 1;
+  if (j != expected) {
+printf("error: j = %d, expected %d\n", j, expected);
+return 1;
+  }
+  // CHECK: success
+  printf("success\n");
+  return 0;
+}
Index: clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
===
--- clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
+++ clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
@@ -21,7 +21,7 @@
 // IR-NEXT:store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4
 // IR-NEXT:store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4
 // IR-NEXT:store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[END_ADDR]], ptr %[[STEP_ADDR]], ptr %[[START_ADDR]])
+// IR-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[START_ADDR]], ptr %[[END_ADDR]], ptr %[[STEP_ADDR]])
 // IR-NEXT:ret void
 // IR-NEXT:  }
 extern "C" void func(int start, int end, int step) {
@@ -36,9 +36,9 @@
 // IR-NEXT:  [[ENTRY:.*]]:
 // IR-NEXT:%[[DOTGLOBAL_TID__ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[DOTBOUND_TID__ADDR:.+]] = alloca ptr, align 8
+// IR-NEXT:%[[START_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[END_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[STEP_ADDR:.+]] = alloca ptr, align 8
-// IR-NEXT:%[[START_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[DOTOMP_IV:.+]] = alloca i32, align 4
 // IR-NEXT:%[[TMP:.+]] = alloca i32, align 4
 // IR-NEXT:%[[I:.+]] = alloca i32, align 4
@@ -57,12 +57,12 @@
 // IR-NEXT:%[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
 // IR-NEXT:store ptr %[[DOTGLOBAL_TID_:.+]], ptr %[[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:store ptr %[[DOTBOUND_TID_:.+]], ptr %[[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT:store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8
 // IR-NEXT:store ptr %[[END:.+]], ptr %[[END_ADDR]], align 8
 // IR-NEXT:store ptr %[[STEP:.+]], ptr %[[STEP_ADDR]], align 8
-// IR-NEXT:store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8
+// IR-NEXT:%[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8
 // IR-NEXT:%[[TMP0:.+]] = load ptr, ptr %[[END_ADDR]], align 8
 // IR-NEXT:%[[TMP1:.+]] = load ptr, ptr %[[STEP_ADDR]], align 8
-// IR-NEXT:%[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8
 // IR-NEXT:%[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4
 // IR-NEXT:store i32 %[[TMP3]], ptr %[[I]], align 4
 // 

[PATCH] D151356: [OpenMP] Fix transformed loop's var privacy

2023-05-24 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev accepted this revision.
ABataev added a comment.
This revision is now accepted and ready to land.

LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D151356/new/

https://reviews.llvm.org/D151356

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D151356: [OpenMP] Fix transformed loop's var privacy

2023-05-24 Thread Joel E. Denny via Phabricator via cfe-commits
jdenny created this revision.
jdenny added reviewers: Meinersbur, ABataev.
jdenny added a project: OpenMP.
Herald added subscribers: sunshaoce, zzheng, guansong, yaxunl.
Herald added a project: All.
jdenny requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added subscribers: openmp-commits, cfe-commits, jplehr, sstefan1.
Herald added a project: clang.

Without this patch, the following example crashes Clang:

  #pragma omp target map(i)
  #pragma omp tile sizes(2)
  for (i = 0; i < N; ++i)
;

This patch fixes the crash by changing `Sema::isOpenMPPrivateDecl` not 
to identify `i` as private just because it's the loop variable of a
`tile` construct.

While OpenMP TR11 and earlier do specify privacy for loop variables of
loops *generated* from a `tile` construct, I haven't found text
stating that the original loop variable must be private in the above
example, so this patch leaves it shared.  Even so, it is a bit 
unexpected that value of `i` after the loop is `N - 1` instead of `N`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D151356

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
  openmp/libomptarget/test/offloading/target-tile.c

Index: openmp/libomptarget/test/offloading/target-tile.c
===
--- /dev/null
+++ openmp/libomptarget/test/offloading/target-tile.c
@@ -0,0 +1,62 @@
+// Check that omp tile (introduced in OpenMP 5.1) is permitted and behaves when
+// strictly nested within omp target.
+
+// RUN: %libomptarget-compile-generic -fopenmp-version=51
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+#include 
+
+#define I_NTILES 8
+#define J_NTILES 9
+#define I_NELEMS 2
+#define J_NELEMS 3
+
+int main() {
+  int order[I_NTILES][J_NTILES][I_NELEMS][J_NELEMS];
+  int i, j;
+  #pragma omp target map(tofrom: i, j)
+  {
+int next = 0;
+#pragma omp tile sizes(I_NELEMS, J_NELEMS)
+for (i = 0; i < I_NTILES * I_NELEMS; ++i) {
+  for (j = 0; j < J_NTILES * J_NELEMS; ++j) {
+int iTile = i / I_NELEMS;
+int jTile = j / J_NELEMS;
+int iElem = i % I_NELEMS;
+int jElem = j % J_NELEMS;
+order[iTile][jTile][iElem][jElem] = next++;
+  }
+}
+  }
+  int expected = 0;
+  for (int iTile = 0; iTile < I_NTILES; ++iTile) {
+for (int jTile = 0; jTile < J_NTILES; ++jTile) {
+  for (int iElem = 0; iElem < I_NELEMS; ++iElem) {
+for (int jElem = 0; jElem < J_NELEMS; ++jElem) {
+  int actual = order[iTile][jTile][iElem][jElem];
+  if (expected != actual) {
+printf("error: order[%d][%d][%d][%d] = %d, expected %d\n",
+   iTile, jTile, iElem, jElem, actual, expected);
+return 1;
+  }
+  ++expected;
+}
+  }
+}
+  }
+  // Tiling leaves the loop variables with their values from the final iteration
+  // rather than with the usual +1.
+  expected = I_NTILES * I_NELEMS - 1;
+  if (i != expected) {
+printf("error: i = %d, expected %d\n", i, expected);
+return 1;
+  }
+  expected = J_NTILES * J_NELEMS - 1;
+  if (j != expected) {
+printf("error: j = %d, expected %d\n", j, expected);
+return 1;
+  }
+  // CHECK: success
+  printf("success\n");
+  return 0;
+}
Index: clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
===
--- clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
+++ clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
@@ -21,7 +21,7 @@
 // IR-NEXT:store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4
 // IR-NEXT:store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4
 // IR-NEXT:store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[END_ADDR]], ptr %[[STEP_ADDR]], ptr %[[START_ADDR]])
+// IR-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @func.omp_outlined, ptr %[[START_ADDR]], ptr %[[END_ADDR]], ptr %[[STEP_ADDR]])
 // IR-NEXT:ret void
 // IR-NEXT:  }
 extern "C" void func(int start, int end, int step) {
@@ -36,9 +36,9 @@
 // IR-NEXT:  [[ENTRY:.*]]:
 // IR-NEXT:%[[DOTGLOBAL_TID__ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[DOTBOUND_TID__ADDR:.+]] = alloca ptr, align 8
+// IR-NEXT:%[[START_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[END_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[STEP_ADDR:.+]] = alloca ptr, align 8
-// IR-NEXT:%[[START_ADDR:.+]] = alloca ptr, align 8
 // IR-NEXT:%[[DOTOMP_IV:.+]] = alloca i32, align 4
 // IR-NEXT:%[[TMP:.+]] = alloca i32, align 4
 // IR-NEXT:%[[I:.+]] = alloca i32, align 4
@@ -57,12 +57,12 @@
 // IR-NEXT:%[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
 // IR-NEXT:store ptr %[[DOTGLOBAL_TID_:.+]], ptr %[[DOTGLOBAL_TID__ADDR]], align 8
 // IR-NEXT:store ptr