[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-30 Thread Alexey Bataev via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG142d0d3ed8e0: [OPENMP]Fix PR46824: Global declare target 
pointer cannot be accessed in target… (authored by ABataev).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/target_data_codegen.cpp
  clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
  clang/test/OpenMP/target_map_codegen.cpp
  clang/test/OpenMP/target_update_codegen.cpp
  openmp/libomptarget/src/omptarget.cpp
  openmp/libomptarget/test/env/base_ptr_ref_count.c

Index: openmp/libomptarget/test/env/base_ptr_ref_count.c
===
--- /dev/null
+++ openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
+// REQUIRES: libomptarget-debug
+
+#include 
+
+int *allocate(size_t n) {
+  int *ptr = malloc(sizeof(int) * n);
+#pragma omp target enter data map(to : ptr[:n])
+  return ptr;
+}
+
+void deallocate(int *ptr, size_t n) {
+#pragma omp target exit data map(delete : ptr[:n])
+  free(ptr);
+}
+
+#pragma omp declare target
+int *cnt;
+void foo() {
+  ++(*cnt);
+}
+#pragma omp end declare target
+
+int main(void) {
+  int *A = allocate(10);
+  int *V = allocate(10);
+  deallocate(A, 10);
+  deallocate(V, 10);
+// CHECK-NOT: RefCount=2
+  cnt = malloc(sizeof(int));
+  *cnt = 0;
+#pragma omp target map(cnt[:1])
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  *cnt = 0;
+#pragma omp target data map(cnt[:1])
+#pragma omp target
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  free(cnt);
+
+  return 0;
+}
+
+
Index: openmp/libomptarget/src/omptarget.cpp
===
--- openmp/libomptarget/src/omptarget.cpp
+++ openmp/libomptarget/src/omptarget.cpp
@@ -880,14 +880,9 @@
   return OFFLOAD_FAIL;
 }
   }
-} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-  TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-  false, IsHostPtr);
-  TgtBaseOffset = 0; // no offset for ptrs.
-  DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
- "object " DPxMOD "\n",
- DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase));
 } else {
+  if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+HstPtrBase = *reinterpret_cast(HstPtrBase);
   TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
   false, IsHostPtr);
   TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Index: clang/test/OpenMP/target_update_codegen.cpp
===
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -310,22 +310,23 @@
 
 #ifdef CK5
 
-// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK5-DAG: [[P0:%.+]] = getelementptr 

[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-29 Thread Ye Luo via Phabricator via cfe-commits
ye-luo accepted this revision.
ye-luo added a comment.
This revision is now accepted and ready to land.

LGTM. My applications run as expected now. PR46824, PR46012, PR46868 all work 
fine.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-29 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev updated this revision to Diff 281690.
ABataev added a comment.

Fixed comment in test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/target_data_codegen.cpp
  clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
  clang/test/OpenMP/target_map_codegen.cpp
  clang/test/OpenMP/target_update_codegen.cpp
  openmp/libomptarget/src/omptarget.cpp
  openmp/libomptarget/test/env/base_ptr_ref_count.c

Index: openmp/libomptarget/test/env/base_ptr_ref_count.c
===
--- /dev/null
+++ openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
+// REQUIRES: libomptarget-debug
+
+#include 
+
+int *allocate(size_t n) {
+  int *ptr = malloc(sizeof(int) * n);
+#pragma omp target enter data map(to : ptr[:n])
+  return ptr;
+}
+
+void deallocate(int *ptr, size_t n) {
+#pragma omp target exit data map(delete : ptr[:n])
+  free(ptr);
+}
+
+#pragma omp declare target
+int *cnt;
+void foo() {
+  ++(*cnt);
+}
+#pragma omp end declare target
+
+int main(void) {
+  int *A = allocate(10);
+  int *V = allocate(10);
+  deallocate(A, 10);
+  deallocate(V, 10);
+// CHECK-NOT: RefCount=2
+  cnt = malloc(sizeof(int));
+  *cnt = 0;
+#pragma omp target map(cnt[:1])
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  *cnt = 0;
+#pragma omp target data map(cnt[:1])
+#pragma omp target
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  free(cnt);
+
+  return 0;
+}
+
+
Index: openmp/libomptarget/src/omptarget.cpp
===
--- openmp/libomptarget/src/omptarget.cpp
+++ openmp/libomptarget/src/omptarget.cpp
@@ -880,14 +880,9 @@
   return OFFLOAD_FAIL;
 }
   }
-} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-  TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-  false, IsHostPtr);
-  TgtBaseOffset = 0; // no offset for ptrs.
-  DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
- "object " DPxMOD "\n",
- DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase));
 } else {
+  if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+HstPtrBase = *reinterpret_cast(HstPtrBase);
   TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
   false, IsHostPtr);
   TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Index: clang/test/OpenMP/target_update_codegen.cpp
===
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -310,22 +310,23 @@
 
 #ifdef CK5
 
-// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK5-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32***
+  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], 

[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-29 Thread George Rokos via Phabricator via cfe-commits
grokos added inline comments.



Comment at: clang/test/OpenMP/target_data_codegen.cpp:659-660
   // PRESENT=0x1000 | TARGET_PARAM=0x20 = 0x1020
-  // MEMBER_OF_9=0x9 | PRESENT=0x1000 | FROM=0x2 | TO=0x1 = 
0x91003
-  // MEMBER_OF_9=0x9 | PRESENT=0x1000 | PTR_AND_OBJ=0x10 | 
FROM=0x2 | TO=0x1 = 0x91013
-  // MEMBER_OF_9=0x9 | FROM=0x2 | TO=0x1 = 0x90003
-  // MEMBER_OF_9=0x9 | PTR_AND_OBJ=0x10 = 0x90010
+  // MEMBER_OF_7=0x9 | PRESENT=0x1000 | FROM=0x2 | TO=0x1 = 
0x71003
+  // MEMBER_OF_7=0x9 | PTR_AND_OBJ=0x10 = 0x70010
   // PTR_AND_OBJ=0x10 = 0x10

MEMBER_OF_7=0x9 --> MEMBER_OF_7=0x7


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-29 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev updated this revision to Diff 281665.
ABataev added a comment.

Rebase + apply to global pointers only.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/target_data_codegen.cpp
  clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
  clang/test/OpenMP/target_map_codegen.cpp
  clang/test/OpenMP/target_update_codegen.cpp
  openmp/libomptarget/src/omptarget.cpp
  openmp/libomptarget/test/env/base_ptr_ref_count.c

Index: openmp/libomptarget/test/env/base_ptr_ref_count.c
===
--- /dev/null
+++ openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
+// REQUIRES: libomptarget-debug
+
+#include 
+
+int *allocate(size_t n) {
+  int *ptr = malloc(sizeof(int) * n);
+#pragma omp target enter data map(to : ptr[:n])
+  return ptr;
+}
+
+void deallocate(int *ptr, size_t n) {
+#pragma omp target exit data map(delete : ptr[:n])
+  free(ptr);
+}
+
+#pragma omp declare target
+int *cnt;
+void foo() {
+  ++(*cnt);
+}
+#pragma omp end declare target
+
+int main(void) {
+  int *A = allocate(10);
+  int *V = allocate(10);
+  deallocate(A, 10);
+  deallocate(V, 10);
+// CHECK-NOT: RefCount=2
+  cnt = malloc(sizeof(int));
+  *cnt = 0;
+#pragma omp target map(cnt[:1])
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  *cnt = 0;
+#pragma omp target data map(cnt[:1])
+#pragma omp target
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  free(cnt);
+
+  return 0;
+}
+
+
Index: openmp/libomptarget/src/omptarget.cpp
===
--- openmp/libomptarget/src/omptarget.cpp
+++ openmp/libomptarget/src/omptarget.cpp
@@ -880,16 +880,11 @@
   return OFFLOAD_FAIL;
 }
   }
-} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-  TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-  false, IsHostPtr);
-  TgtBaseOffset = 0; // no offset for ptrs.
-  DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
- "object " DPxMOD "\n",
- DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase));
 } else {
+  if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+HstPtrBase = *reinterpret_cast(HstPtrBase);
   TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
-  false, IsHostPtr);
+  false, IsHostPtr);
   TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
 #ifdef OMPTARGET_DEBUG
   void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
Index: clang/test/OpenMP/target_update_codegen.cpp
===
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -310,22 +310,23 @@
 
 #ifdef CK5
 
-// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], 

[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-29 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

In D84767#2180433 , @ye-luo wrote:

> In D84767#2180280 , @ye-luo wrote:
>
>> This patch
>> GPU activities:   96.99%  350.05ms10  35.005ms  1.5680us  350.00ms  
>> [CUDA memcpy HtoD]
>> before the July21 change
>> GPU activities:   95.33%  20.317ms 4  5.0793ms  1.6000us  20.305ms  
>> [CUDA memcpy HtoD]
>> Still more transfer than it should.
>
> @ABataev could you have a look? My July 21 compiler was built before 
> "[OPENMP]Fix PR46012: declare target pointer cannot be accessed in target 
> region." gets in.

Are you talking about the `number of calls` value? The total number of calls 
will increase after the patch anyway, PTR_AND_OBJ adds 1 extra mem transfer for 
transferring translated pointer address.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-28 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

In D84767#2180280 , @ye-luo wrote:

> This patch
> GPU activities:   96.99%  350.05ms10  35.005ms  1.5680us  350.00ms  
> [CUDA memcpy HtoD]
> before the July21 change
> GPU activities:   95.33%  20.317ms 4  5.0793ms  1.6000us  20.305ms  
> [CUDA memcpy HtoD]
> Still more transfer than it should.

@ABataev could you have a look. My July 21 compiler was built before 
"[OPENMP]Fix PR46012: declare target pointer cannot be accessed in target 
region." gets in.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-28 Thread Ye Luo via Phabricator via cfe-commits
ye-luo added a comment.

This patch
GPU activities:   96.99%  350.05ms10  35.005ms  1.5680us  350.00ms  
[CUDA memcpy HtoD]
before the July21 change
GPU activities:   95.33%  20.317ms 4  5.0793ms  1.6000us  20.305ms  
[CUDA memcpy HtoD]
Still more transfer than it should.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-28 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev updated this revision to Diff 281369.
ABataev added a comment.

Fixed ref count for base pointer + extra test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/target_data_codegen.cpp
  clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
  clang/test/OpenMP/target_map_codegen.cpp
  clang/test/OpenMP/target_update_codegen.cpp
  openmp/libomptarget/src/omptarget.cpp
  openmp/libomptarget/test/env/base_ptr_ref_count.c

Index: openmp/libomptarget/test/env/base_ptr_ref_count.c
===
--- /dev/null
+++ openmp/libomptarget/test/env/base_ptr_ref_count.c
@@ -0,0 +1,51 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda
+// REQUIRES: libomptarget-debug
+
+#include 
+
+int *allocate(size_t n) {
+  int *ptr = malloc(sizeof(int) * n);
+#pragma omp target enter data map(to : ptr[:n])
+  return ptr;
+}
+
+void deallocate(int *ptr, size_t n) {
+#pragma omp target exit data map(delete : ptr[:n])
+  free(ptr);
+}
+
+#pragma omp declare target
+int *cnt;
+void foo() {
+  ++(*cnt);
+}
+#pragma omp end declare target
+
+int main(void) {
+  int *A = allocate(10);
+  int *V = allocate(10);
+  deallocate(A, 10);
+  deallocate(V, 10);
+// CHECK-NOT: RefCount=2
+  cnt = malloc(sizeof(int));
+  *cnt = 0;
+#pragma omp target map(cnt[:1])
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  *cnt = 0;
+#pragma omp target data map(cnt[:1])
+#pragma omp target
+  foo();
+  printf("Cnt = %d.\n", *cnt);
+// CHECK: Cnt = 1.
+  free(cnt);
+
+  return 0;
+}
+
+
Index: openmp/libomptarget/src/omptarget.cpp
===
--- openmp/libomptarget/src/omptarget.cpp
+++ openmp/libomptarget/src/omptarget.cpp
@@ -316,6 +316,11 @@
 // may be considered a hack, we could revise the scheme in the future.
 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
+  // Without OMP_TGT_MAPTYPE_MEMBER_OF, OMP_TGT_MAPTYPE_PTR_AND_OBJ can be
+  // used only if complex data with base pointer is mapped. No need to
+  // update ref counter for the base pointer in this case.
+  UpdateRef =
+  UpdateRef && (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY);
   DP("Has a pointer entry: \n");
   // Base is address of pointer.
   //
@@ -872,14 +877,9 @@
   return OFFLOAD_FAIL;
 }
   }
-} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-  TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-  false, IsHostPtr);
-  TgtBaseOffset = 0; // no offset for ptrs.
-  DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
- "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
- DPxPTR(HstPtrBase));
 } else {
+  if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+HstPtrBase = *reinterpret_cast(HstPtrBase);
   TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
   false, IsHostPtr);
   TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Index: clang/test/OpenMP/target_update_codegen.cpp
===
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -310,18 +310,18 @@
 
 #ifdef CK5
 
-// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 49]
 
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], 

[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-28 Thread Ye Luo via Phabricator via cfe-commits
ye-luo requested changes to this revision.
ye-luo added a comment.
This revision now requires changes to proceed.

Please check the reproducer in https://bugs.llvm.org/show_bug.cgi?id=46868 with 
LIBOMPTARGET_DEBUG=1.
The reference counting on the base pointer variable has side effects. It was 
not cleaned up when these variables leave its scope.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84767/new/

https://reviews.llvm.org/D84767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84767: [OPENMP]Fix PR46824: Global declare target pointer cannot be accessed in target region.

2020-07-28 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev created this revision.
ABataev added reviewers: jdoerfert, ye-luo, RaviNarayanaswamy, grokos.
Herald added subscribers: openmp-commits, guansong, yaxunl.
Herald added projects: clang, OpenMP.
ABataev requested review of this revision.
Herald added a subscriber: sstefan1.

Need to map the base pointer for all directives, not only target
data-based ones.
The base pointer is mapped for array sections, array subscript, array
shaping and other array-like constructs with the base pointer. Also,
codegen for use_device_ptr clause was modified to correctly handle
mapping combination of array like constructs + use_device_ptr clause.
The data for use_device_ptr clause is emitted as the last records in the
data mapping array.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84767

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/test/OpenMP/target_data_codegen.cpp
  clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp
  clang/test/OpenMP/target_map_codegen.cpp
  clang/test/OpenMP/target_update_codegen.cpp
  openmp/libomptarget/src/omptarget.cpp

Index: openmp/libomptarget/src/omptarget.cpp
===
--- openmp/libomptarget/src/omptarget.cpp
+++ openmp/libomptarget/src/omptarget.cpp
@@ -872,14 +872,9 @@
   return OFFLOAD_FAIL;
 }
   }
-} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-  TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-  false, IsHostPtr);
-  TgtBaseOffset = 0; // no offset for ptrs.
-  DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
- "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
- DPxPTR(HstPtrBase));
 } else {
+  if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
+HstPtrBase = *reinterpret_cast(HstPtrBase);
   TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
   false, IsHostPtr);
   TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Index: clang/test/OpenMP/target_update_codegen.cpp
===
--- clang/test/OpenMP/target_update_codegen.cpp
+++ clang/test/OpenMP/target_update_codegen.cpp
@@ -310,18 +310,18 @@
 
 #ifdef CK5
 
-// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 49]
 
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
   // CK5-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32***
   // CK5-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32**
   // CK5-DAG: store i32** [[B_ADDR:%.+]], i32*** [[BPC0]]
@@ -351,18 +351,18 @@
 
 #ifdef CK6
 
-// CK6: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4]
-// CK6: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17]
+// CK6: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// CK6: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 49]
 
 // CK6-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+  // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK6-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK6-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK6-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK6-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}}