[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-19 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 added a comment.

Thank you so much for Alex's review!!!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-19 Thread Jennifer Yu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc274b1986680: Add implicit map for a list item appears in a 
reduction clause. (authored by jyu2).

Changed prior to commit:
  https://reviews.llvm.org/D108132?vs=366991=367597#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/OpenMP/reduction_implicit_map.cpp
  openmp/libomptarget/test/mapping/reduction_implicit_map.cpp

Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===
--- /dev/null
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// amdgcn does not have printf definition
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
+#include 
+
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+:output[0]) \
+ map(to:input[0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+}
+int main()
+{
+  const int size = 100;
+  int *array = new int[size];
+  int result = 0;
+  for (int i = 0; i < size; i++)
+array[i] = i + 1;
+  sum(array, size, );
+  // CHECK: Result=5050
+  printf("Result=%d\n", result);
+  delete[] array;
+  return 0;
+}
+
Index: clang/test/OpenMP/reduction_implicit_map.cpp
===
--- /dev/null
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple nvptx64-unknown-unknown -DCUA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
+// RUN:  -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple powerpc64le-unknown-unknown -DDIAG\
+// RUN:   -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK1
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple i386-unknown-unknown \
+// RUN:   -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK2
+
+
+#if defined(CUDA)
+// expected-no-diagnostics
+
+int foo(int n) {
+  double *e;
+  //no error and no implicit map generated for e[:1]
+  #pragma omp target parallel reduction(+: e[:1])
+*e=10;
+  ;
+  return 0;
+}
+// CHECK-NOT @.offload_maptypes
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
+#elif defined(DIAG)
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 ):a(s2.a) { }
+  S2  +(S2 );
+};
+int bar() {
+ S2 o[5];
+  //warnig "copyable and not guaranteed to be mapped correctly" and
+  //implicit map generated.
+#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
+  for (int i = 0; i < 10; i++);
+  double b[10][10][10];
+  //no error no implicit map generated, the map for b is generated but not
+  //for b[0:2][2:4][1].
+#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
+  for (long long i = 0; i < 10; ++i);
+  return 0;
+}
+// map for variable o
+// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+// map for b:
+// CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000]
+// CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
+#else
+// expected-no-diagnostics
+
+// generate implicit map for array elements or array sections in reduction
+// clause. In following case: the implicit map is generate for output[0]
+// with map size 4 and output[:3] with map size 12.
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+: output[0]) \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+#pragma omp target teams distribute parallel for reduction(+: output[:3])  \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+  int a[10];
+#pragma omp target parallel reduction(+: a[:2])
+  for (int i = 0; i < size; i++)
+;
+#pragma omp target parallel reduction(+: a[3])
+  for (int i = 0; i < size; i++)
+;
+}
+//CHECK2: 

[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev accepted this revision.
ABataev added a comment.
This revision is now accepted and ready to land.

LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 updated this revision to Diff 366991.
jyu2 added a comment.

Address Alex's comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/OpenMP/reduction_implicit_map.cpp
  openmp/libomptarget/test/mapping/reduction_implicit_map.cpp

Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===
--- /dev/null
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// amdgcn does not have printf definition
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
+#include 
+
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+:output[0]) \
+ map(to:input[0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+}
+int main()
+{
+  const int size = 100;
+  int *array = new int[size];
+  int result = 0;
+  for (int i = 0; i < size; i++)
+array[i] = i + 1;
+  sum(array, size, );
+  // CHECK: Result=5050
+  printf("Result=%d\n", result);
+  delete[] array;
+  return 0;
+}
+
Index: clang/test/OpenMP/reduction_implicit_map.cpp
===
--- /dev/null
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple nvptx64-unknown-unknown -DCUA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
+// RUN:  -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple powerpc64le-unknown-unknown -DDIAG\
+// RUN:   -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK1
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple i386-unknown-unknown \
+// RUN:   -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK2
+
+
+#if defined(CUDA)
+// expected-no-diagnostics
+
+int foo(int n) {
+  double *e;
+  //no error and no implicit map generated for e[:1]
+  #pragma omp target parallel reduction(+: e[:1])
+*e=10;
+  ;
+  return 0;
+}
+// CHECK-NOT @.offload_maptypes
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
+#elif defined(DIAG)
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 ):a(s2.a) { }
+  S2  +(S2 );
+};
+int bar() {
+ S2 o[5];
+  //warnig "copyable and not guaranteed to be mapped correctly" and
+  //implicit map generated.
+#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
+  for (int i = 0; i < 10; i++);
+  double b[10][10][10];
+  //no error no implicit map generated, the map for b is generated but not
+  //for b[0:2][2:4][1].
+#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
+  for (long long i = 0; i < 10; ++i);
+  return 0;
+}
+// map for variable o
+// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+// map for b:
+// CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000]
+// CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
+#else
+// expected-no-diagnostics
+
+// generate implicit map for array elements or array sections in reduction
+// clause. In following case: the implicit map is generate for output[0]
+// with map size 4 and output[:3] with map size 12.
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+: output[0]) \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+#pragma omp target teams distribute parallel for reduction(+: output[:3])  \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+  int a[10];
+#pragma omp target parallel reduction(+: a[:2])
+  for (int i = 0; i < size; i++)
+;
+#pragma omp target parallel reduction(+: a[3])
+  for (int i = 0; i < size; i++)
+;
+}
+//CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
+//CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547]
+//CHECK2: 

[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added inline comments.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19481-19482
 if (VD && DSAS->isThreadPrivate(VD)) {
+  if (NoDiagnose)
+continue;
   DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);

jyu2 wrote:
> ABataev wrote:
> > jyu2 wrote:
> > > ABataev wrote:
> > > > Hmm, should not we still diagnose this case?
> > > The rule is skip the error as well as skip adding implicit map clause.  
> > > So that we don't get regression for old code.
> > I think we already have the check for it for the reduction clause, so I 
> > think we can skip this check here.
> In general yes.  But not for some special case, please look 
> clang/test/OpenMP/reduction_implicit_map.cpp, in the first test,  if I remove 
> this change the error will be emit.  In CUDA device code, the local variable 
> is  implicitly threat as thridprivate.
> 
> 
> #if defined(CUDA)
> // expected-no-diagnostics
> 
> int foo(int n) {
>   double *e;
>   //no error and no implicit map generated for e[:1]
>   #pragma omp target parallel reduction(+: e[:1])
> *e=10;
>   ;
>   return 0;
> }
I just was not quite clear, I agree with your previous answer ;)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 added inline comments.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:5825
+  for (Expr *E : RC->varlists())
+if (!dyn_cast(E))
+  ImplicitExprs.emplace_back(E);

ABataev wrote:
> jyu2 wrote:
> > ABataev wrote:
> > > `isa`. Also, what if this is a `MemberExpr`?
> > Yes isa.   Changed.  Thanks.
> > 
> > For reduction MemeberExpr is not allowed, it is only allowed variable name, 
> > array element or array section.
> I would also do this check if it is a not template parsing/analysis mode + 
> also would do this for `E->IgnoreParensImpCasts()`
The code already under non DependentContext().  I add IgnoreParenImpCasts().  
Thanks.

 if (AStmt && !CurContext->isDependentContext() && Kind != OMPD_atomic &&
  Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master &&
  Kind != OMPD_masked && !isOpenMPLoopTransformationDirective(Kind))



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19481-19482
 if (VD && DSAS->isThreadPrivate(VD)) {
+  if (NoDiagnose)
+continue;
   DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);

ABataev wrote:
> jyu2 wrote:
> > ABataev wrote:
> > > Hmm, should not we still diagnose this case?
> > The rule is skip the error as well as skip adding implicit map clause.  So 
> > that we don't get regression for old code.
> I think we already have the check for it for the reduction clause, so I think 
> we can skip this check here.
In general yes.  But not for some special case, please look 
clang/test/OpenMP/reduction_implicit_map.cpp, in the first test,  if I remove 
this change the error will be emit.  In CUDA device code, the local variable is 
 implicitly threat as thridprivate.


#if defined(CUDA)
// expected-no-diagnostics

int foo(int n) {
  double *e;
  //no error and no implicit map generated for e[:1]
  #pragma omp target parallel reduction(+: e[:1])
*e=10;
  ;
  return 0;
}


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added inline comments.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:5825
+  for (Expr *E : RC->varlists())
+if (!dyn_cast(E))
+  ImplicitExprs.emplace_back(E);

jyu2 wrote:
> ABataev wrote:
> > `isa`. Also, what if this is a `MemberExpr`?
> Yes isa.   Changed.  Thanks.
> 
> For reduction MemeberExpr is not allowed, it is only allowed variable name, 
> array element or array section.
I would also do this check if it is a not template parsing/analysis mode + also 
would do this for `E->IgnoreParensImpCasts()`



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19481-19482
 if (VD && DSAS->isThreadPrivate(VD)) {
+  if (NoDiagnose)
+continue;
   DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);

jyu2 wrote:
> ABataev wrote:
> > Hmm, should not we still diagnose this case?
> The rule is skip the error as well as skip adding implicit map clause.  So 
> that we don't get regression for old code.
I think we already have the check for it for the reduction clause, so I think 
we can skip this check here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 marked 4 inline comments as not done.
jyu2 added a comment.






Comment at: clang/lib/Sema/SemaOpenMP.cpp:5825
+  for (Expr *E : RC->varlists())
+if (!dyn_cast(E))
+  ImplicitExprs.emplace_back(E);

ABataev wrote:
> `isa`. Also, what if this is a `MemberExpr`?
Yes isa.   Changed.  Thanks.

For reduction MemeberExpr is not allowed, it is only allowed variable name, 
array element or array section.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:18475-18476
+  !QTy.isTriviallyCopyableType(SemaRef.Context)) {
+if (NoDiagnose)
+  return true;
 SemaRef.Diag(SL, diag::warn_omp_non_trivial_type_mapped) << QTy << SR;

ABataev wrote:
> It still might be good to emit the warning if the reduction type is mappable. 
> Also, need to extend this check in general and avoid emitting it if the user 
> defined mapper for the type is provided (in a separate patch, not here).
Thanks.  Make sense, in this case, implicit map will be add, so warning should 
be emit. Changed.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19435
 SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(),
-/*NoDiagnose=*/false);
+/*NoDiagnose=*/NoDiagnose);
 if (!BE)

ABataev wrote:
> You can remove `/*NoDiagnose=*/` here.
Changed.  Thanks.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19481-19482
 if (VD && DSAS->isThreadPrivate(VD)) {
+  if (NoDiagnose)
+continue;
   DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);

ABataev wrote:
> Hmm, should not we still diagnose this case?
The rule is skip the error as well as skip adding implicit map clause.  So that 
we don't get regression for old code.



Comment at: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp:4
+// amdgcn does not have printf definition
+// XFAIL: amdgcn-amd-amdhsa
+

ABataev wrote:
> Can we make UNSUPPORTED instead of XFAIL?
changed.  Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 updated this revision to Diff 366945.
jyu2 edited the summary of this revision.
jyu2 added a comment.

Address Alex's comments.  Thanks Alex.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/OpenMP/reduction_implicit_map.cpp
  openmp/libomptarget/test/mapping/reduction_implicit_map.cpp

Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===
--- /dev/null
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// amdgcn does not have printf definition
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
+#include 
+
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+:output[0]) \
+ map(to:input[0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+}
+int main()
+{
+  const int size = 100;
+  int *array = new int[size];
+  int result = 0;
+  for (int i = 0; i < size; i++)
+array[i] = i + 1;
+  sum(array, size, );
+  // CHECK: Result=5050
+  printf("Result=%d\n", result);
+  delete[] array;
+  return 0;
+}
+
Index: clang/test/OpenMP/reduction_implicit_map.cpp
===
--- /dev/null
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple nvptx64-unknown-unknown -DCUA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
+// RUN:  -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple powerpc64le-unknown-unknown -DDIAG\
+// RUN:   -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK1
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple i386-unknown-unknown \
+// RUN:   -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK2
+
+
+#if defined(CUDA)
+// expected-no-diagnostics
+
+int foo(int n) {
+  double *e;
+  //no error and no implicit map generated for e[:1]
+  #pragma omp target parallel reduction(+: e[:1])
+*e=10;
+  ;
+  return 0;
+}
+// CHECK-NOT @.offload_maptypes
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
+#elif defined(DIAG)
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 ):a(s2.a) { }
+  S2  +(S2 );
+};
+int bar() {
+ S2 o[5];
+  //warnig "copyable and not guaranteed to be mapped correctly" and
+  //implicit map generated.
+#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}}
+  for (int i = 0; i < 10; i++);
+  double b[10][10][10];
+  //no error no implicit map generated, the map for b is generated but not
+  //for b[0:2][2:4][1].
+#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
+  for (long long i = 0; i < 10; ++i);
+  return 0;
+}
+// map for variable o
+// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+// map for b:
+// CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000]
+// CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
+#else
+// expected-no-diagnostics
+
+// generate implicit map for array elements or array sections in reduction
+// clause. In following case: the implicit map is generate for output[0]
+// with map size 4 and output[:3] with map size 12.
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+: output[0]) \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+#pragma omp target teams distribute parallel for reduction(+: output[:3])  \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+  int a[10];
+#pragma omp target parallel reduction(+: a[:2])
+  for (int i = 0; i < size; i++)
+;
+#pragma omp target parallel reduction(+: a[3])
+  for (int i = 0; i < size; i++)
+;
+}
+//CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
+//CHECK2: @.offload_maptypes.10 = private unnamed_addr 

[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added inline comments.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:5825
+  for (Expr *E : RC->varlists())
+if (!dyn_cast(E))
+  ImplicitExprs.emplace_back(E);

`isa`. Also, what if this is a `MemberExpr`?



Comment at: clang/lib/Sema/SemaOpenMP.cpp:18475-18476
+  !QTy.isTriviallyCopyableType(SemaRef.Context)) {
+if (NoDiagnose)
+  return true;
 SemaRef.Diag(SL, diag::warn_omp_non_trivial_type_mapped) << QTy << SR;

It still might be good to emit the warning if the reduction type is mappable. 
Also, need to extend this check in general and avoid emitting it if the user 
defined mapper for the type is provided (in a separate patch, not here).



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19435
 SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(),
-/*NoDiagnose=*/false);
+/*NoDiagnose=*/NoDiagnose);
 if (!BE)

You can remove `/*NoDiagnose=*/` here.



Comment at: clang/lib/Sema/SemaOpenMP.cpp:19481-19482
 if (VD && DSAS->isThreadPrivate(VD)) {
+  if (NoDiagnose)
+continue;
   DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);

Hmm, should not we still diagnose this case?



Comment at: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp:4
+// amdgcn does not have printf definition
+// XFAIL: amdgcn-amd-amdhsa
+

Can we make UNSUPPORTED instead of XFAIL?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-17 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 updated this revision to Diff 366894.
jyu2 added a comment.
Herald added a project: OpenMP.
Herald added a subscriber: openmp-commits.

Fix format and add new runtime test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/OpenMP/reduction_implicit_map.cpp
  openmp/libomptarget/test/mapping/reduction_implicit_map.cpp

Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===
--- /dev/null
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// amdgcn does not have printf definition
+// XFAIL: amdgcn-amd-amdhsa
+
+#include 
+
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+:output[0]) \
+ map(to:input[0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+}
+int main()
+{
+  const int size = 100;
+  int *array = new int[size];
+  int result = 0;
+  for (int i = 0; i < size; i++)
+array[i] = i + 1;
+  sum(array, size, );
+  // CHECK: Result=5050
+  printf("Result=%d\n", result);
+  delete[] array;
+  return 0;
+}
+
Index: clang/test/OpenMP/reduction_implicit_map.cpp
===
--- /dev/null
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -0,0 +1,119 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple nvptx64-unknown-unknown -DCUA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
+// RUN:  -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple powerpc64le-unknown-unknown -DDIAG\
+// RUN:   -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK1
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple i386-unknown-unknown \
+// RUN:   -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK2
+
+// expected-no-diagnostics
+
+#if defined(CUDA)
+int foo(int n) {
+  double *e;
+  //no error and no implicit map generated for e[:1]
+  #pragma omp target parallel reduction(+: e[:1])
+*e=10;
+  ;
+  return 0;
+}
+// CHECK-NOT @.offload_maptypes
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
+#elif defined(DIAG)
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 ):a(s2.a) { }
+  S2  +(S2 );
+};
+int bar() {
+ S2 o[5];
+  //no warnig "copyable and not guaranteed to be mapped correctly" and
+  //implicit map generated.
+#pragma omp target reduction(+:o[0])
+  for (int i = 0; i < 10; i++);
+  double b[10][10][10];
+  //no error no implicit map generated, the map for b is generated but not
+  //for b[0:2][2:4][1].
+#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
+  for (long long i = 0; i < 10; ++i);
+  return 0;
+}
+// map for variable o
+// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 20]
+// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+// map for b:
+// CHECK1: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 8000]
+// CHECK1: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 547]
+#else
+// generate implicit map for array elements or array sections in reduction
+// clause. In following case: the implicit map is generate for output[0]
+// with map size 4 and output[:3] with map size 12.
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+: output[0]) \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+#pragma omp target teams distribute parallel for reduction(+: output[:3])  \
+ map(to: input [0:size])
+  for (int i = 0; i < size; i++)
+output[0] += input[i];
+  int a[10];
+#pragma omp target parallel reduction(+: a[:2])
+  for (int i = 0; i < size; i++)
+;
+#pragma omp target parallel reduction(+: a[3])
+  for (int i = 0; i < size; i++)
+;
+}
+//CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
+//CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547]
+//CHECK2: @.offload_sizes.13 = private unnamed_addr constant [2 x i64] [i64 4, i64 

[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

In D108132#2947080 , @jyu2 wrote:

>>> I am not sure I can do that. Do you mean when generate map adding coding 
>>> code to look though reduction clause and generate map for it?
>
>
>
>> Yes, exactly.
>
> We are missing mappable checking for example:
>
> #pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
>
> In this, we should not add map clause, since the section is not contiguous 
> storage.

.
OK, and what's the problem to check for this in codegen? Also, we can map 
non-contiguous storage, at least in some cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 added a comment.

>> I am not sure I can do that. Do you mean when generate map adding coding 
>> code to look though reduction clause and generate map for it?



> Yes, exactly.

We are missing mappable checking for example:

#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])

In this, we should not add map clause, since the section is not contiguous 
storage.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

In D108132#2947053 , @jyu2 wrote:

> Hi ABataev,
> Thanks for  reviedw.
>
> In D108132#2946927 , @ABataev wrote:
>
>> Why it can not be performed in codegen?
>
> I am not sure I can do that.  Do you mean when generate map adding coding 
> code to look though reduction clause and generate map for it?

Yes, exactly.

> Here is the runtime test, I am trying to find way on how to add runtime test 
> in clang.  But in my added test reduction_implicit_map.cpp, I did checked IR 
> for this.

Add it to libomptarget.

> The command line: without may change:
> cmplrllvm-25845>clang -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu o.cpp -g
> cmplrllvm-25845>./a.out
> Segmentation fault (core dumped)
> with may change:
> cmplrllvm-25845> ./a.out
> Result=5050
>
> test
> 
>
> extern "C" int printf(const char *,...);
> void sum(int* input, int size, int* output)
> {
>
>   #pragma omp target teams distribute parallel for reduction(+:output[0]) 
> map(to:input[0:size]) //map(output[0])
>   for(int i=0; i   output[0] += input[i];
>
> }
> int main()
> {
>
>   const int size = 100;
>   int *array = new int[size];
>   int result = 0;
>   for (int i = 0; i < size; i++)
>   array[i] = i + 1;
>   sum(array, size, );
>   printf("Result=%d\n", result);
>   delete[] array;
>   return 0;
>
> }




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 added a comment.

Hi ABataev,
Thanks for  reviedw.

In D108132#2946927 , @ABataev wrote:

> Why it can not be performed in codegen?

I am not sure I can do that.  Do you mean when generate map adding coding code 
to look though reduction clause and generate map for it?

Here is the runtime test, I am trying to find way on how to add runtime test in 
clang.  But in my added test reduction_implicit_map.cpp, I did checked IR for 
this.

The command line: without may change:
cmplrllvm-25845>clang -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu o.cpp -g
cmplrllvm-25845>./a.out
Segmentation fault (core dumped)
with may change:
cmplrllvm-25845> ./a.out
Result=5050

test


extern "C" int printf(const char *,...);
void sum(int* input, int size, int* output)
{

  #pragma omp target teams distribute parallel for reduction(+:output[0]) 
map(to:input[0:size]) //map(output[0])
  for(int i=0; ihttps://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

Also, would be good to see a runtime test, which reveals the issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

Why it can not be performed in codegen?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108132/new/

https://reviews.llvm.org/D108132

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108132: Add implicit map for a list item appears in a reduction clause.

2021-08-16 Thread Jennifer Yu via Phabricator via cfe-commits
jyu2 created this revision.
jyu2 added reviewers: ABataev, mikerice, jdoerfert.
jyu2 requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: clang.

A new rule is added in 5.0:
If a list item appears in a reduction, lastprivate or linear clause
on a combined target construct then it is treated as if it also appears
in a map clause with a map-type of tofrom.

Currently map clauses for all capture variables are added implicitly.
But missing for list item of expression for array elements or array
sections.

The change is to add implicit map clause for array of elements used in
reduction clause. Skip adding map clause if the expression is not
mappable.
Noted: For linear and lastprivate, since only variable name is
accepted, the map has been added though capture variables.

To do so:
During the mappable checking, if error, ignore diagnose and skip
adding implicit map clause.

The changes:
1> Add code to generate implicit map in ActOnOpenMPExecutableDirective,

  for omp 5.0 and up.

2> Add extra default parameter NoDiagnose in ActOnOpenMPMapClause:
Use that to skip error as well as skip adding implicit map during the
mappable checking.

Note: there are only three places need to be check for NoDiagnose. Rest
of them either the check is for < omp 5.0 or the error already generated for
reduction clause.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D108132

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/TreeTransform.h
  clang/test/OpenMP/reduction_implicit_map.cpp

Index: clang/test/OpenMP/reduction_implicit_map.cpp
===
--- /dev/null
+++ clang/test/OpenMP/reduction_implicit_map.cpp
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
+// RUN:  %t-ppc-host.bc
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
+// RUN:  -triple nvptx64-unknown-unknown -DCUA \
+// RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \
+// RUN:  -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \
+// RUN:  -o - | FileCheck %s --check-prefix CHECK
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple powerpc64le-unknown-unknown -DDIAG\
+// RUN:   -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK1
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ \
+// RUN:   -triple i386-unknown-unknown \
+// RUN:   -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \
+// RUN:   %s -o - | FileCheck  %s \
+// RUN:   --check-prefix=CHECK2
+
+// expected-no-diagnostics
+
+#if defined(CUDA)
+int foo(int n) {
+  double *e;
+  //no error and no implicit map generated for e[:1]
+  #pragma omp target parallel reduction(+: e[:1])
+*e=10;
+  ;
+  return 0;
+}
+// CHECK-NOT @.offload_maptypes
+// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
+#elif defined(DIAG)
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 ):a(s2.a) { }
+  S2  +(S2 );
+};
+int bar() {
+ S2 o[5];
+  //no warnig "copyable and not guaranteed to be mapped correctly" and
+  //implicit map generated.
+#pragma omp target reduction(+:o[0])
+  for (int i = 0; i < 10; i++);
+  double b[10][10][10];
+  //no error no implicit map generated, the map for b is generated but not
+  //for b[0:2][2:4][1].
+#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1])
+  for (long long i = 0; i < 10; ++i);
+  return 0;
+}
+// map for variable o
+// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 20]
+// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
+// map for b:
+// CHECK1: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 8000]
+// CHECK1: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 547]
+#else
+// generate implicit map for array elements or array sections in reduction
+// clause. In following case: the implicit map is generate for output[0]
+// with map size 4 and output[:3] with map size 12.
+void sum(int* input, int size, int* output)
+{
+#pragma omp target teams distribute parallel for reduction(+:output[0]) map(to:input[0:size])
+for(int i=0; i VarList,
   const OMPVarListLocTy , ArrayRef UnresolvedMappers) {
-return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc,
-  MapperIdScopeSpec, MapperId, MapType,
-  IsMapTypeImplicit, MapLoc, ColonLoc,
-  VarList, Locs, UnresolvedMappers);
+return getSema().ActOnOpenMPMapClause(
+MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId,
+MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs,
+/*NoDiagnose=*/false,