ABataev updated this revision to Diff 268587.
ABataev added a comment.

Rebase and fixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80730/new/

https://reviews.llvm.org/D80730

Files:
  clang/lib/AST/OpenMPClause.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_data_use_device_addr_codegen.cpp

Index: clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
@@ -0,0 +1,224 @@
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[SIZES1:@.+]] = private unnamed_addr constant [5 x i64] zeroinitializer
+// 96 = 0x60 = OMP_MAP_TARGET_PARAM | OMP_MAP_RETURN_PARAM
+// CHECK-DAG: [[MAPTYPES1:@.+]] = private unnamed_addr constant [5 x i64] [i64 96, i64 96, i64 96, i64 96, i64 96]
+// 32 = 0x20 = OMP_MAP_TARGET_PARAM
+// 281474976710720 = 0x1000000000040 = OMP_MAP_MEMBER_OF | OMP_MAP_RETURN_PARAM
+// CHECK-DAG: [[MAPTYPES2:@.+]] = private unnamed_addr constant [5 x i64] [i64 32, i64 281474976710720, i64 281474976710720, i64 281474976710720, i64 281474976710720]
+struct S {
+  int a = 0;
+  int *ptr = &a;
+  int &ref = a;
+  int arr[4];
+  S() {}
+  void foo() {
+#pragma omp target data use_device_addr(a, ptr [3:4], ref, ptr[0], arr[:a])
+    ++a, ++*ptr, ++ref, ++arr[0];
+  }
+};
+
+int main() {
+  float a = 0;
+  float *ptr = &a;
+  float &ref = a;
+  float arr[4];
+  float vla[(int)a];
+  S s;
+  s.foo();
+#pragma omp target data use_device_addr(a, ptr [3:4], ref, ptr[0], arr[:(int)a], vla[0])
+  ++a, ++*ptr, ++ref, ++arr[0], ++vla[0];
+  return a;
+}
+
+// CHECK-LABEL: @main()
+// CHECK: [[A_ADDR:%.+]] = alloca float,
+// CHECK: [[PTR_ADDR:%.+]] = alloca float*,
+// CHECK: [[REF_ADDR:%.+]] = alloca float*,
+// CHECK: [[ARR_ADDR:%.+]] = alloca [4 x float],
+// CHECK: [[BPTRS:%.+]] = alloca [5 x i8*],
+// CHECK: [[PTRS:%.+]] = alloca [5 x i8*],
+// CHECK: [[VLA_ADDR:%.+]] = alloca float, i64 %{{.+}},
+// CHECK: [[PTR:%.+]] = load float*, float** [[PTR_ADDR]],
+// CHECK: [[REF:%.+]] = load float*, float** [[REF_ADDR]],
+// CHECK: [[ARR:%.+]] = getelementptr inbounds [4 x float], [4 x float]* [[ARR_ADDR]], i64 0, i64 0
+// CHECK: [[BPTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[BPTR0_A_ADDR:%.+]] = bitcast i8** [[BPTR0]] to float**
+// CHECK: store float* [[A_ADDR]], float** [[BPTR0_A_ADDR]],
+// CHECK: [[PTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: [[PTR0_A_ADDR:%.+]] = bitcast i8** [[PTR0]] to float**
+// CHECK: store float* [[A_ADDR]], float** [[PTR0_A_ADDR]],
+// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 1
+// CHECK: [[BPTR1_PTR_ADDR:%.+]] = bitcast i8** [[BPTR1]] to float**
+// CHECK: store float* [[PTR]], float** [[BPTR1_PTR_ADDR]],
+// CHECK: [[PTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 1
+// CHECK: [[PTR1_PTR_ADDR:%.+]] = bitcast i8** [[PTR1]] to float**
+// CHECK: store float* [[PTR]], float** [[PTR1_PTR_ADDR]],
+// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 2
+// CHECK: [[BPTR2_REF_ADDR:%.+]] = bitcast i8** [[BPTR2]] to float**
+// CHECK: store float* [[REF]], float** [[BPTR2_REF_ADDR]],
+// CHECK: [[PTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 2
+// CHECK: [[PTR2_REF_ADDR:%.+]] = bitcast i8** [[PTR2]] to float**
+// CHECK: store float* [[REF]], float** [[PTR2_REF_ADDR]],
+// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 3
+// CHECK: [[BPTR3_ARR_ADDR:%.+]] = bitcast i8** [[BPTR3]] to float**
+// CHECK: store float* [[ARR]], float** [[BPTR3_ARR_ADDR]],
+// CHECK: [[PTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 3
+// CHECK: [[PTR3_ARR_ADDR:%.+]] = bitcast i8** [[PTR3]] to float**
+// CHECK: store float* [[ARR]], float** [[PTR3_ARR_ADDR]],
+// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 4
+// CHECK: [[BPTR4_VLA_ADDR:%.+]] = bitcast i8** [[BPTR4]] to float**
+// CHECK: store float* [[VLA_ADDR]], float** [[BPTR4_VLA_ADDR]],
+// CHECK: [[PTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 4
+// CHECK: [[PTR4_VLA_ADDR:%.+]] = bitcast i8** [[PTR4]] to float**
+// CHECK: store float* [[VLA_ADDR]], float** [[PTR4_VLA_ADDR]],
+// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+// CHECK: [[A_REF:%.+]] = load float*, float** [[BPTR0_A_ADDR]],
+// CHECK: [[REF_REF:%.+]] = load float*, float** [[BPTR2_REF_ADDR]],
+// CHECK: store float* [[REF_REF]], float** [[TMP_REF_ADDR:%.+]],
+// CHECK: [[ARR:%.+]] = load float*, float** [[BPTR3_ARR_ADDR]],
+// CHECK: [[ARR_REF:%.+]] = bitcast float* [[ARR]] to [4 x float]*
+// CHECK: [[VLA_REF:%.+]] = load float*, float** [[BPTR4_VLA_ADDR]],
+// CHECK: [[A:%.+]] = load float, float* [[A_REF]],
+// CHECK: [[INC:%.+]] = fadd float [[A]], 1.000000e+00
+// CHECK: store float [[INC]], float* [[A_REF]],
+// CHECK: [[PTR_ADDR:%.+]] = load float*, float** [[BPTR1_PTR_ADDR]],
+// CHECK: [[VAL:%.+]] = load float, float* [[PTR_ADDR]],
+// CHECK: [[INC:%.+]] = fadd float [[VAL]], 1.000000e+00
+// CHECK: store float [[INC]], float* [[PTR_ADDR]],
+// CHECK: [[REF_ADDR:%.+]] = load float*, float** [[TMP_REF_ADDR]],
+// CHECK: [[REF:%.+]] = load float, float* [[REF_ADDR]],
+// CHECK: [[INC:%.+]] = fadd float [[REF]], 1.000000e+00
+// CHECK: store float [[INC]], float* [[REF_ADDR]],
+// CHECK: [[ARR0_ADDR:%.+]] = getelementptr inbounds [4 x float], [4 x float]* [[ARR_REF]], i64 0, i64 0
+// CHECK: [[ARR0:%.+]] = load float, float* [[ARR0_ADDR]],
+// CHECK: [[INC:%.+]] = fadd float [[ARR0]], 1.000000e+00
+// CHECK: store float [[INC]], float* [[ARR0_ADDR]],
+// CHECK: [[VLA0_ADDR:%.+]] = getelementptr inbounds float, float* [[VLA_REF]], i64 0
+// CHECK: [[VLA0:%.+]] = load float, float* [[VLA0_ADDR]],
+// CHECK: [[INC:%.+]] = fadd float [[VLA0]], 1.000000e+00
+// CHECK: store float [[INC]], float* [[VLA0_ADDR]],
+// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+
+// CHECK: foo
+// %this.addr = alloca %struct.S*, align 8
+// CHECK: [[BPTRS:%.+]] = alloca [5 x i8*],
+// CHECK: [[PTRS:%.+]] = alloca [5 x i8*],
+// CHECK: [[SIZES:%.+]] = alloca [5 x i64],
+// %tmp = alloca i32*, align 8
+// %tmp6 = alloca i32**, align 8
+// %tmp7 = alloca i32*, align 8
+// %tmp8 = alloca i32**, align 8
+// %tmp9 = alloca [4 x i32]*, align 8
+// store %struct.S* %this, %struct.S** %this.addr, align 8
+// %this1 = load %struct.S*, %struct.S** %this.addr, align 8
+// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS:%.+]], i32 0, i32 0
+// %ptr = getelementptr inbounds %struct.S, %struct.S* %this1, i32 0, i32 1
+// %ref = getelementptr inbounds %struct.S, %struct.S* %this1, i32 0, i32 2
+// %0 = load i32*, i32** %ref, align 8
+// CHECK: [[ARR_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 3
+// CHECK: [[A_ADDR2:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 0
+// CHECK: [[PTR_ADDR:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 1
+// CHECK: [[REF_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 2
+// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[REF_REF]],
+// CHECK: [[ARR_ADDR2:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 3
+// CHECK: [[ARR_END:%.+]] = getelementptr [4 x i32], [4 x i32]* [[ARR_ADDR]], i32 1
+// CHECK: [[BEGIN:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+// CHECK: [[END:%.+]] = bitcast [4 x i32]* [[ARR_END]] to i8*
+// CHECK: [[E:%.+]] = ptrtoint i8* [[END]] to i64
+// CHECK: [[B:%.+]] = ptrtoint i8* [[BEGIN]] to i64
+// CHECK: [[DIFF:%.+]] = sub i64 [[E]], [[B]]
+// CHECK: [[SZ:%.+]] = sdiv exact i64 [[DIFF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
+// CHECK: [[BPTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[BPTR0_S:%.+]] = bitcast i8** [[BPTR0]] to %struct.S**
+// CHECK: store %struct.S* [[THIS]], %struct.S** [[BPTR0_S]],
+// CHECK: [[PTR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: [[PTR0_BEGIN:%.+]] = bitcast i8** [[PTR0]] to i32**
+// CHECK: store i32* [[A_ADDR]], i32** [[PTR0_BEGIN]],
+// CHECK: [[SIZE0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
+// CHECK: store i64 [[SZ]], i64* [[SIZE0]],
+// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 1
+// CHECK: [[BPTR1_A_ADDR:%.+]] = bitcast i8** [[BPTR1]] to i32**
+// CHECK: store i32* [[A_ADDR2]], i32** [[BPTR1_A_ADDR]],
+// CHECK: [[PTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 1
+// CHECK: [[PTR1_A_ADDR:%.+]] = bitcast i8** [[PTR1]] to i32**
+// CHECK: store i32* [[A_ADDR2]], i32** [[PTR1_A_ADDR]],
+// CHECK: [[SIZE1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 1
+// CHECK: store i64 0, i64* [[SIZE1]],
+// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 2
+// CHECK: [[BPTR2_PTR_ADDR:%.+]] = bitcast i8** [[BPTR2]] to i32***
+// CHECK: store i32** [[PTR_ADDR]], i32*** [[BPTR2_PTR_ADDR]],
+// CHECK: [[PTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 2
+// CHECK: [[PTR2_PTR_ADDR:%.+]] = bitcast i8** [[PTR2]] to i32***
+// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR2_PTR_ADDR]],
+// CHECK: [[SIZE2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 2
+// CHECK: store i64 0, i64* [[SIZE2]],
+// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 3
+// CHECK: [[BPTR3_REF_PTR:%.+]] = bitcast i8** [[BPTR3]] to i32**
+// CHECK: store i32* [[REF_PTR]], i32** [[BPTR3_REF_PTR]],
+// CHECK: [[PTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 3
+// CHECK: [[PTR3_REF_PTR:%.+]] = bitcast i8** [[PTR3]] to i32**
+// CHECK: store i32* [[REF_PTR]], i32** [[PTR3_REF_PTR]],
+// CHECK: [[SIZE3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 3
+// CHECK: store i64 0, i64* [[SIZE3]],
+// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 4
+// CHECK: [[BPTR4_ARR_ADDR:%.+]] = bitcast i8** [[BPTR4]] to [4 x i32]**
+// CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[BPTR4_ARR_ADDR]],
+// CHECK: [[PTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 4
+// CHECK: [[PTR4_ARR_ADDR:%.+]] = bitcast i8** [[PTR4]] to [4 x i32]**
+// CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[PTR4_ARR_ADDR]],
+// CHECK: [[SIZE4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 4
+// CHECK: store i64 0, i64* [[SIZE4]],
+// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
+// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+// CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[BPTR1_A_ADDR]],
+// CHECK: store i32* [[A_ADDR]], i32** [[A_REF:%.+]],
+// CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]],
+// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR_REF:%.+]],
+// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[BPTR3_REF_PTR]],
+// CHECK: store i32* [[REF_PTR]], i32** [[REF_REF:%.+]],
+// CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]],
+// CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR_REF2:%.+]],
+// CHECK: [[ARR_ADDR:%.+]] = load [4 x i32]*, [4 x i32]** [[BPTR4_ARR_ADDR]],
+// CHECK: store [4 x i32]* [[ARR_ADDR]], [4 x i32]** [[ARR_REF:%.+]],
+// CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]],
+// CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]],
+// CHECK: [[INC:%.+]] = add nsw i32 [[A]], 1
+// CHECK: store i32 [[INC]], i32* [[A_ADDR]],
+// CHECK: [[PTR_PTR:%.+]] = load i32**, i32*** [[PTR_REF2]],
+// CHECK: [[PTR:%.+]] = load i32*, i32** [[PTR_PTR]],
+// CHECK: [[VAL:%.+]] = load i32, i32* [[PTR]],
+// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1
+// CHECK: store i32 [[INC]], i32* [[PTR]],
+// CHECK: [[REF_PTR:%.+]] = load i32*, i32** [[REF_REF]],
+// CHECK: [[VAL:%.+]] = load i32, i32* [[REF_PTR]],
+// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1
+// CHECK: store i32 [[INC]], i32* [[REF_PTR]],
+// CHECK: [[ARR_ADDR:%.+]] = load [4 x i32]*, [4 x i32]** [[ARR_REF]],
+// CHECK: [[ARR0_ADDR:%.+]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR_ADDR]], i64 0, i64 0
+// CHECK: [[VAL:%.+]] = load i32, i32* [[ARR0_ADDR]],
+// CHECK: [[INC:%.+]] = add nsw i32 [[VAL]], 1
+// CHECK: store i32 [[INC]], i32* [[ARR0_ADDR]],
+// CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
+// CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
+// CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
+// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+
+#endif
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -18500,8 +18500,15 @@
     // only need a component.
     MVLI.VarBaseDeclarations.push_back(D);
     MVLI.VarComponents.emplace_back();
+    Expr *Component = SimpleRefExpr;
+    if (VD && (isa<OMPArraySectionExpr>(RefExpr->IgnoreParenImpCasts()) ||
+               isa<ArraySubscriptExpr>(RefExpr->IgnoreParenImpCasts()))) {
+      Component = DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get();
+      // Component = CreateBuiltinUnaryOp(ELoc, UO_Deref, Component).get();
+      // Component = DefaultFunctionArrayLvalueConversion(Component).get();
+    }
     MVLI.VarComponents.back().push_back(
-        OMPClauseMappableExprCommon::MappableComponent(SimpleRefExpr, D));
+        OMPClauseMappableExprCommon::MappableComponent(Component, D));
   }
 
   if (MVLI.ProcessedVarList.empty())
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -76,6 +76,8 @@
 class ObjCAtThrowStmt;
 class ObjCAtSynchronizedStmt;
 class ObjCAutoreleasePoolStmt;
+class OMPUseDevicePtrClause;
+class OMPUseDeviceAddrClause;
 class ReturnsNonNullAttr;
 class SVETypeFlags;
 
@@ -3173,7 +3175,10 @@
   void EmitOMPPrivateClause(const OMPExecutableDirective &D,
                             OMPPrivateScope &PrivateScope);
   void EmitOMPUseDevicePtrClause(
-      const OMPClause &C, OMPPrivateScope &PrivateScope,
+      const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
+      const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
+  void EmitOMPUseDeviceAddrClause(
+      const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
       const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap);
   /// Emit code for copyin clause in \a D directive. The next code is
   /// generated at the start of outlined functions for directives:
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -31,6 +31,8 @@
 using namespace CodeGen;
 using namespace llvm::omp;
 
+static const VarDecl *getBaseDecl(const Expr *Ref);
+
 namespace {
 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
 /// for captured expressions.
@@ -220,6 +222,12 @@
           if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
             CGF.EmitVarDecl(*OED);
         }
+      } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
+        for (const Expr *E : UDP->varlists()) {
+          const Decl *D = getBaseDecl(E);
+          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
+            CGF.EmitVarDecl(*OED);
+        }
       }
     }
     if (!isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -5697,9 +5705,8 @@
 }
 
 void CodeGenFunction::EmitOMPUseDevicePtrClause(
-    const OMPClause &NC, OMPPrivateScope &PrivateScope,
+    const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
     const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
-  const auto &C = cast<OMPUseDevicePtrClause>(NC);
   auto OrigVarIt = C.varlist_begin();
   auto InitIt = C.inits().begin();
   for (const Expr *PvtVarIt : C.private_copies()) {
@@ -5760,6 +5767,60 @@
   }
 }
 
+static const VarDecl *getBaseDecl(const Expr *Ref) {
+  const Expr *Base = Ref->IgnoreParenImpCasts();
+  while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
+    Base = OASE->getBase()->IgnoreParenImpCasts();
+  while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
+    Base = ASE->getBase()->IgnoreParenImpCasts();
+  return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
+}
+
+void CodeGenFunction::EmitOMPUseDeviceAddrClause(
+    const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
+    const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
+  llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
+  for (const Expr *Ref : C.varlists()) {
+    const VarDecl *OrigVD = getBaseDecl(Ref);
+    if (!Processed.insert(OrigVD).second)
+      continue;
+    // In order to identify the right initializer we need to match the
+    // declaration used by the mapping logic. In some cases we may get
+    // OMPCapturedExprDecl that refers to the original declaration.
+    const ValueDecl *MatchingVD = OrigVD;
+    if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
+      // OMPCapturedExprDecl are used to privative fields of the current
+      // structure.
+      const auto *ME = cast<MemberExpr>(OED->getInit());
+      assert(isa<CXXThisExpr>(ME->getBase()) &&
+             "Base should be the current struct!");
+      MatchingVD = ME->getMemberDecl();
+    }
+
+    // If we don't have information about the current list item, move on to
+    // the next one.
+    auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
+    if (InitAddrIt == CaptureDeviceAddrMap.end())
+      continue;
+
+    Address PrivAddr = InitAddrIt->getSecond();
+    // For declrefs and variable length array need to load the pointer for
+    // correct mapping, since the pointer to the data was passed to the runtime.
+    if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
+        MatchingVD->getType()->isArrayType())
+      PrivAddr =
+          EmitLoadOfPointer(PrivAddr, getContext()
+                                          .getPointerType(OrigVD->getType())
+                                          ->castAs<PointerType>());
+    llvm::Type *RealTy =
+        ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
+            ->getPointerTo();
+    PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
+
+    (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
+  }
+}
+
 // Generate the instructions for '#pragma omp target data' directive.
 void CodeGenFunction::EmitOMPTargetDataDirective(
     const OMPTargetDataDirective &S) {
@@ -5804,6 +5865,9 @@
         for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
           CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
                                         Info.CaptureDeviceAddrMap);
+        for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
+          CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
+                                         Info.CaptureDeviceAddrMap);
         (void)PrivateScope.Privatize();
         RCG(CGF);
       } else {
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7031,7 +7031,7 @@
     OMP_MAP_TARGET_PARAM = 0x20,
     /// Signal that the runtime library has to return the device pointer
     /// in the current position for the data being mapped. Used when we have the
-    /// use_device_ptr clause.
+    /// use_device_ptr or use_device_addr clause.
     OMP_MAP_RETURN_PARAM = 0x40,
     /// This flag signals that the reference being passed is a pointer to
     /// private data.
@@ -7099,26 +7099,30 @@
     ArrayRef<OpenMPMapModifierKind> MapModifiers;
     bool ReturnDevicePointer = false;
     bool IsImplicit = false;
+    bool ForDeviceAddr = false;
 
     MapInfo() = default;
     MapInfo(
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
         OpenMPMapClauseKind MapType,
-        ArrayRef<OpenMPMapModifierKind> MapModifiers,
-        bool ReturnDevicePointer, bool IsImplicit)
+        ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
+        bool IsImplicit, bool ForDeviceAddr = false)
         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
-          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
+          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
+          ForDeviceAddr(ForDeviceAddr) {}
   };
 
-  /// If use_device_ptr is used on a pointer which is a struct member and there
-  /// is no map information about it, then emission of that entry is deferred
-  /// until the whole struct has been processed.
+  /// If use_device_ptr or use_device_addr is used on a decl which is a struct
+  /// member and there is no map information about it, then emission of that
+  /// entry is deferred until the whole struct has been processed.
   struct DeferredDevicePtrEntryTy {
     const Expr *IE = nullptr;
     const ValueDecl *VD = nullptr;
+    bool ForDeviceAddr = false;
 
-    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
-        : IE(IE), VD(VD) {}
+    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
+                             bool ForDeviceAddr)
+        : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
   };
 
   /// The target directive from where the mappable clauses were extracted. It
@@ -7306,13 +7310,12 @@
   /// \a IsFirstComponent should be set to true if the provided set of
   /// components is the first associated with a capture.
   void generateInfoForComponentList(
-      OpenMPMapClauseKind MapType,
-      ArrayRef<OpenMPMapModifierKind> MapModifiers,
+      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
-      bool IsImplicit,
+      bool IsImplicit, bool ForDeviceAddr = false,
       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
           OverlappedElements = llvm::None) const {
     // The following summarizes what has to be generated for each map and the
@@ -7623,8 +7626,8 @@
         // If this component is a pointer inside the base struct then we don't
         // need to create any entry for it - it will be combined with the object
         // it is pointing to into a single PTR_AND_OBJ entry.
-        bool IsMemberPointer =
-            IsPointer && EncounteredME &&
+        bool IsMemberPointerOrAddr =
+            (IsPointer || ForDeviceAddr) && EncounteredME &&
             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
              EncounteredME);
         if (!OverlappedElements.empty()) {
@@ -7691,7 +7694,7 @@
           break;
         }
         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
-        if (!IsMemberPointer) {
+        if (!IsMemberPointerOrAddr) {
           BasePointers.push_back(BP.getPointer());
           Pointers.push_back(LB.getPointer());
           Sizes.push_back(
@@ -7952,17 +7955,18 @@
 
     // Helper function to fill the information map for the different supported
     // clauses.
-    auto &&InfoGen = [&Info](
-        const ValueDecl *D,
-        OMPClauseMappableExprCommon::MappableExprComponentListRef L,
-        OpenMPMapClauseKind MapType,
-        ArrayRef<OpenMPMapModifierKind> MapModifiers,
-        bool ReturnDevicePointer, bool IsImplicit) {
-      const ValueDecl *VD =
-          D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
-      Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
-                            IsImplicit);
-    };
+    auto &&InfoGen =
+        [&Info](const ValueDecl *D,
+                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+                OpenMPMapClauseKind MapType,
+                ArrayRef<OpenMPMapModifierKind> MapModifiers,
+                bool ReturnDevicePointer, bool IsImplicit,
+                bool ForDeviceAddr = false) {
+          const ValueDecl *VD =
+              D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+          Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
+                                IsImplicit, ForDeviceAddr);
+        };
 
     assert(CurDir.is<const OMPExecutableDirective *>() &&
            "Expect a executable directive");
@@ -8032,7 +8036,7 @@
           // partial struct.
           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
                   /*ReturnDevicePointer=*/false, C->isImplicit());
-          DeferredInfo[nullptr].emplace_back(IE, VD);
+          DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
         } else {
           llvm::Value *Ptr =
               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
@@ -8044,6 +8048,70 @@
       }
     }
 
+    // Look at the use_device_addr clause information and mark the existing map
+    // entries as such. If there is no map information for an entry in the
+    // use_device_addr list, we create one with map type 'alloc' and zero size
+    // section. It is the user fault if that was not mapped before. If there is
+    // no map information and the pointer is a struct member, then we defer the
+    // emission of that entry until the whole struct has been processed.
+    llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
+    for (const auto *C :
+         CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
+      for (const auto L : C->component_lists()) {
+        assert(!L.second.empty() && "Not expecting empty list of components!");
+        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+        if (!Processed.insert(VD).second)
+          continue;
+        VD = cast<ValueDecl>(VD->getCanonicalDecl());
+        const Expr *IE = L.second.back().getAssociatedExpression();
+        // If the first component is a member expression, we have to look into
+        // 'this', which maps to null in the map of map information. Otherwise
+        // look directly for the information.
+        auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
+
+        // We potentially have map information for this declaration already.
+        // Look for the first set of components that refer to it.
+        if (It != Info.end()) {
+          auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
+            return MI.Components.back().getAssociatedDeclaration() == VD;
+          });
+          // If we found a map entry, signal that the pointer has to be returned
+          // and move on to the next declaration.
+          if (CI != It->second.end()) {
+            CI->ReturnDevicePointer = true;
+            continue;
+          }
+        }
+
+        // We didn't find any match in our map information - generate a zero
+        // size array section - if the pointer is a struct member we defer this
+        // action until the whole struct has been processed.
+        if (isa<MemberExpr>(IE)) {
+          // Insert the pointer into Info to be processed by
+          // generateInfoForComponentList. Because it is a member pointer
+          // without a pointee, no entry will be generated for it, therefore
+          // we need to generate one after the whole struct has been processed.
+          // Nonetheless, generateInfoForComponentList must be called to take
+          // the pointer into account for the calculation of the range of the
+          // partial struct.
+          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
+                  /*ReturnDevicePointer=*/false, C->isImplicit(),
+                  /*ForDeviceAddr=*/true);
+          DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
+        } else {
+          llvm::Value *Ptr;
+          if (IE->isGLValue())
+            Ptr = CGF.EmitLValue(IE).getPointer(CGF);
+          else
+            Ptr = CGF.EmitScalarExpr(IE);
+          BasePointers.emplace_back(Ptr, VD);
+          Pointers.push_back(Ptr);
+          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+        }
+      }
+    }
+
     for (const auto &M : Info) {
       // We need to know when we generate information for the first component
       // associated with a capture, because the mapping flags depend on it.
@@ -8062,10 +8130,10 @@
 
         // Remember the current base pointer index.
         unsigned CurrentBasePointersIdx = CurBasePointers.size();
-        generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
-                                     CurBasePointers, CurPointers, CurSizes,
-                                     CurTypes, PartialStruct,
-                                     IsFirstComponentList, L.IsImplicit);
+        generateInfoForComponentList(
+            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
+            CurPointers, CurSizes, CurTypes, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
 
         // If this entry relates with a device pointer, set the relevant
         // declaration and add the 'return pointer' flag.
@@ -8085,21 +8153,35 @@
       }
 
       // Append any pending zero-length pointers which are struct members and
-      // used with use_device_ptr.
+      // used with use_device_ptr or use_device_addr.
       auto CI = DeferredInfo.find(M.first);
       if (CI != DeferredInfo.end()) {
         for (const DeferredDevicePtrEntryTy &L : CI->second) {
-          llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
-          llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
-              this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
+          llvm::Value *BasePtr;
+          llvm::Value *Ptr;
+          if (L.ForDeviceAddr) {
+            if (L.IE->isGLValue())
+              Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
+            else
+              Ptr = this->CGF.EmitScalarExpr(L.IE);
+            BasePtr = Ptr;
+            // Entry is RETURN_PARAM. Also, set the placeholder value
+            // MEMBER_OF=FFFF so that the entry is later updated with the
+            // correct value of MEMBER_OF.
+            CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+          } else {
+            BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
+            Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
+                                             L.IE->getExprLoc());
+            // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
+            // value MEMBER_OF=FFFF so that the entry is later updated with the
+            // correct value of MEMBER_OF.
+            CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+                               OMP_MAP_MEMBER_OF);
+          }
           CurBasePointers.emplace_back(BasePtr, L.VD);
           CurPointers.push_back(Ptr);
           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
-          // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
-          // value MEMBER_OF=FFFF so that the entry is later updated with the
-          // correct value of MEMBER_OF.
-          CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
-                             OMP_MAP_MEMBER_OF);
         }
       }
 
@@ -8168,10 +8250,10 @@
       for (const MapInfo &L : M.second) {
         assert(!L.Components.empty() &&
                "Not expecting declaration with no component lists.");
-        generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
-                                     CurBasePointers, CurPointers, CurSizes,
-                                     CurTypes, PartialStruct,
-                                     IsFirstComponentList, L.IsImplicit);
+        generateInfoForComponentList(
+            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
+            CurPointers, CurSizes, CurTypes, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
         IsFirstComponentList = false;
       }
 
@@ -8437,10 +8519,10 @@
       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
           OverlappedComponents = Pair.getSecond();
       bool IsFirstComponentList = true;
-      generateInfoForComponentList(MapType, MapModifiers, Components,
-                                   BasePointers, Pointers, Sizes, Types,
-                                   PartialStruct, IsFirstComponentList,
-                                   IsImplicit, OverlappedComponents);
+      generateInfoForComponentList(
+          MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
+          Types, PartialStruct, IsFirstComponentList, IsImplicit,
+          /*ForDeviceAddr=*/false, OverlappedComponents);
     }
     // Go through other elements without overlapped elements.
     bool IsFirstComponentList = OverlappedData.empty();
Index: clang/lib/AST/OpenMPClause.cpp
===================================================================
--- clang/lib/AST/OpenMPClause.cpp
+++ clang/lib/AST/OpenMPClause.cpp
@@ -1207,8 +1207,8 @@
   Sizes.NumComponents = getComponentsTotalNumber(ComponentLists);
 
   // We need to allocate:
-  // 3 x NumVars x Expr* - we have an original list expression for each clause
-  // list entry and an equal number of private copies and inits.
+  // NumVars x Expr* - we have an original list expression for each clause
+  // list entry.
   // NumUniqueDeclarations x ValueDecl* - unique base declarations associated
   // with each component list.
   // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to