sfantao updated this revision to Diff 32322.
sfantao added a comment.

Adress reviewers concerns.

Also fix issue with target regions with no arguments and in the VLA size 
computation I found in the meantime.


http://reviews.llvm.org/D11361

Files:
  include/clang/AST/Decl.h
  include/clang/AST/Stmt.h
  include/clang/Basic/CapturedStmt.h
  include/clang/Sema/ScopeInfo.h
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmt.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/target_codegen.cpp

Index: test/OpenMP/target_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/target_codegen.cpp
@@ -0,0 +1,583 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+// REQUIRES: powerpc-registered-target
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, and only 6 will have mapped arguments.
+
+// CHECK-DAG: [[MAPT2:@.+]] = private constant [1 x i32] [i32 3]
+// CHECK-DAG: [[MAPT3:@.+]] = private constant [2 x i32] [i32 3, i32 3]
+// CHECK-DAG: [[MAPT4:@.+]] = private constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3]
+// CHECK-DAG: [[MAPT5:@.+]] = private constant [3 x i32] [i32 3, i32 3, i32 3]
+// CHECK-DAG: [[MAPT6:@.+]] = private constant [4 x i32] [i32 3, i32 3, i32 3, i32 3]
+// CHECK-DAG: [[MAPT7:@.+]] = private constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long, char> d;
+
+  // CHECK:       br label %[[TRY:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i32* null)
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target
+  {
+  }
+
+  // CHECK:       call void [[HVT1:@.+]](i32* {{[^,]+}})
+  #pragma omp target if(0)
+  {
+    a += 1;
+  }
+
+  // CHECK:       br label %[[TRY:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0))
+
+  // CHECK-DAG:   store i64 4, i64* [[SADDR0:%.+]]
+  // CHECK-DAG:   [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+  // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+  // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+  // CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i32* {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(1)
+  {
+    a += 1;
+  }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0))
+
+  // CHECK-DAG:   store i64 4, i64* [[SADDR0:%.+]]
+  // CHECK-DAG:   [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+  // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+  // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+  // CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+
+  // CHECK-DAG:   store i64 2, i64* [[SADDR1:%.+]]
+  // CHECK-DAG:   [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+  // CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+  // CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+  // CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
+  // CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(n>10)
+  {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK-DAG:   store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]]
+  // CHECK-DAG:   store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]]
+  // CHECK-DAG:   store i64 %{{[^,]+}}, i64* [[VLA2:%[^,]+]]
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0))
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+  // CHECK-DAG:   [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+  // CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+  // CHECK-DAG:   [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+  // CHECK-DAG:   store i8* [[BP1]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P1]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP2:%[^,]+]] = bitcast i64* [[VLA2]] to i8*
+  // CHECK-DAG:   [[P2:%[^,]+]] = bitcast i64* [[VLA2]] to i8*
+  // CHECK-DAG:   store i8* [[BP2]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P2]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   [[P3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP3]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P3]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 4, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP4]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P4]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 40, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+  // CHECK-DAG:   [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP5]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P5]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 4, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP6]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P6]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 400, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+  // CHECK-DAG:   [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP7]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P7]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP8]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P8]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i64 16, i64* {{%[^,]+}}
+
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(n>20)
+  {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]]
+
+// CHECK:       define internal void [[HVT1]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+
+// CHECK:       define internal void [[HVT2]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+
+// CHECK:       define internal void [[HVT3]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG:   [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA:%.+]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+// CHECK-DAG:   load i16, i16* [[USE_AA]]
+
+// CHECK:       define internal void [[HVT4]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   [[LOCALX_B:%.+]] = alloca [10 x float]*
+// CHECK-DAG:   [[LOCALX_BN:%.+]] = alloca float*
+// CHECK-DAG:   [[LOCALX_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK-DAG:   [[LOCALX_CN:%.+]] = alloca double*
+// CHECK-DAG:   [[LOCALX_D:%.+]] = alloca [[TT]]*
+// CHECK-DAG:   [[LOCALX_VLA1:%.+]] = alloca i64*
+// CHECK-DAG:   [[LOCALX_VLA2:%.+]] = alloca i64*
+// CHECK-DAG:   [[LOCALX_VLA3:%.+]] = alloca i64*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCALX_B]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCALX_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCALX_C]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCALX_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCALX_D]]
+// CHECK-DAG:   store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]]
+// CHECK-DAG:   store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]]
+// CHECK-DAG:   store i64* [[ARG_VLA3:%.+]], i64** [[LOCALX_VLA3]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG:   [[USE_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCALX_B:%.+]]
+// CHECK-DAG:   [[USE_BN:%.+]] = load float*, float** [[LOCALX_BN:%.+]]
+// CHECK-DAG:   [[USE_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCALX_C:%.+]]
+// CHECK-DAG:   [[USE_CN:%.+]] = load double*, double** [[LOCALX_CN:%.+]]
+// CHECK-DAG:   [[USE_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCALX_D:%.+]]
+// CHECK-DAG:   [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1:%.+]]
+// CHECK-DAG:   [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2:%.+]]
+// CHECK-DAG:   [[USE_VLA3:%.+]] = load i64*, i64** [[LOCALX_VLA3:%.+]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+// CHECK-DAG:   getelementptr inbounds [10 x float], [10 x float]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+// CHECK-DAG:   getelementptr inbounds float, float* [[USE_BN]], i{{.*}} 3
+// CHECK-DAG:   getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[USE_C]], i{{.*}} 0, i{{.*}} 1
+// CHECK-DAG:   [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}}
+// CHECK-DAG:   getelementptr inbounds double, double* [[USE_CN]], i{{.*}} [[VLAMUL]]
+// CHECK-DAG:   load i64, i64* [[USE_VLA1]]
+// CHECK-DAG:   load i64, i64* [[USE_VLA2]]
+// CHECK-DAG:   load i64, i64* [[USE_VLA3]]
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target if(n>40)
+  {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+  #pragma omp target if(n>50)
+  {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target if(n>60)
+    {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// We capture 2 VLA sizes in this target region
+// CHECK-DAG:   store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]]
+// CHECK-DAG:   store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]]
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0))
+
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+// CHECK-DAG:   [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+// CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+// CHECK-DAG:   [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+// CHECK-DAG:   store i8* [[BP1]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P1]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[P2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   store i8* [[BP2]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P2]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i64 4, i64* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG:   [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG:   store i8* [[BP3]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P3]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   store i8* [[BP4]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P4]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i64 2, i64* {{%[^,]+}}
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0))
+
+// CHECK-DAG:   store i64 4, i64* [[SADDR0:%.+]]
+// CHECK-DAG:   [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+
+// CHECK-DAG:   store i64 2, i64* [[SADDR1:%.+]]
+// CHECK-DAG:   [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+
+// CHECK-DAG:   store i64 1, i64* [[SADDR2:%.+]]
+// CHECK-DAG:   [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG:   store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG:   store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+
+// CHECK-DAG:   store i64 40, i64* [[SADDR3:%.+]]
+// CHECK-DAG:   [[SADDR3]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+// CHECK-DAG:   store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]]
+// CHECK-DAG:   store i8* [[P3:%[^,]+]], i8** [[PADDR3]]
+// CHECK-DAG:   [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG:   [[P3]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+
+// CHECK-DAG:   store i64 4, i64* [[SADDR0:%.+]]
+// CHECK-DAG:   [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG:   [[BP0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG:   [[P0]] = bitcast i32* %{{.+}} to i8*
+
+// CHECK-DAG:   store i64 2, i64* [[SADDR1:%.+]]
+// CHECK-DAG:   [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG:   [[BP1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[P1]] = bitcast i16* %{{.+}} to i8*
+
+// CHECK-DAG:   store i64 40, i64* [[SADDR2:%.+]]
+// CHECK-DAG:   [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG:   store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG:   store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+// CHECK-DAG:   [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG:   [[P2]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// CHECK-DAG:   [[LOCALX_THIS:%.+]] = alloca [[S1]]*
+// CHECK-DAG:   [[LOCALX_B:%.+]] = alloca i32*
+// CHECK-DAG:   [[LOCALX_C:%.+]] = alloca i16*
+// CHECK-DAG:   [[LOCALX_VLA1:%.+]] = alloca i64*
+// CHECK-DAG:   [[LOCALX_VLA2:%.+]] = alloca i64*
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCALX_THIS]]
+// CHECK-DAG:   store i32* [[ARG_B:%.+]], i32** [[LOCALX_B]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCALX_C]]
+// CHECK-DAG:   store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]]
+// CHECK-DAG:   store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]]
+// CHECK-DAG:   [[USE_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCALX_THIS]]
+// CHECK-DAG:   [[USE_B:%.+]] = load i32*, i32** [[LOCALX_B]]
+// CHECK-DAG:   [[USE_C:%.+]] = load i16*, i16** [[LOCALX_C]]
+// CHECK-DAG:   [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1]]
+// CHECK-DAG:   [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2]]
+// CHECK-DAG:   getelementptr inbounds [[S1]], [[S1]]* [[USE_THIS]], i{{.*}} 0, i{{.*}} 0
+// CHECK-DAG:   load i32, i32* [[USE_B]]
+// CHECK-DAG:   [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}}
+// CHECK-DAG:   getelementptr inbounds i16, i16* [[USE_C]], i{{.*}} [[VLAMUL]]
+// CHECK-DAG:   load i64, i64* [[USE_VLA1]]
+// CHECK-DAG:   load i64, i64* [[USE_VLA2]]
+
+// CHECK:       define internal void [[HVT6]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG:   [[LOCALX_AAA:%.+]] = alloca i8*
+// CHECK-DAG:   [[LOCALX_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG:   store i8* [[ARG_AAA:%.+]], i8** [[LOCALX_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]]
+// CHECK-DAG:   [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]]
+// CHECK-DAG:   [[USE_AAA:%.+]] = load i8*, i8** [[LOCALX_AAA]]
+// CHECK-DAG:   [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+// CHECK-DAG:   load i16, i16* [[USE_AA]]
+// CHECK-DAG:   load i8, i8* [[USE_AAA]]
+// CHECK-DAG:   getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+
+// CHECK:       define internal void [[HVT5]]
+// CHECK-DAG:   [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG:   [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG:   [[LOCALX_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG:   store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG:   [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]]
+// CHECK-DAG:   [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]]
+// CHECK-DAG:   [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG:   load i32, i32* [[USE_A]]
+// CHECK-DAG:   load i16, i16* [[USE_AA]]
+// CHECK-DAG:   getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+#endif
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -1308,15 +1308,22 @@
                              Params);
     break;
   }
-  case OMPD_target_data:
-  case OMPD_target: {
+  case OMPD_target_data: {
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              Params);
     break;
   }
+  case OMPD_target: {
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope,
+                             CR_OpenMP_Target, Params);
+    break;
+  }
   case OMPD_teams: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
@@ -1382,7 +1389,85 @@
         }
     }
   }
-  return ActOnCapturedRegionEnd(S.get());
+
+  CapturedStmt *Res = cast<CapturedStmt>(ActOnCapturedRegionEnd(S.get()).get());
+
+  // If this is an offloading captured region, we need change the captured
+  // declaration so that it uses as parameters each of the captured
+  // declarations.
+  if (Res->getCapturedRegionKind() == CR_OpenMP_Target) {
+    CapturedDecl *OldCD = Res->getCapturedDecl();
+    DeclContext *DC = OldCD->getDeclContext();
+
+    unsigned NumParams = Res->capture_size();
+    const RecordDecl *RD = Res->getCapturedRecordDecl();
+    CapturedDecl *NewCD = CapturedDecl::Create(Context, DC, NumParams);
+    DeclContext *NewDC = CapturedDecl::castToDeclContext(NewCD);
+    SourceLocation Loc = Res->getLocStart();
+
+    SmallVector<const FieldDecl *, 3> VLASizeFields;
+    unsigned i = 0;
+    auto ri = RD->field_begin();
+    for (CapturedStmt::const_capture_iterator ci = Res->capture_begin(),
+                                              ce = Res->capture_end();
+         ci != ce; ++ci, ++ri, ++i) {
+      StringRef Name;
+      QualType Ty;
+      if (ci->capturesVariableArrayType()) {
+        VLASizeFields.push_back(*ri);
+        Ty = Context.getPointerType(ri->getType());
+        Name = "__vla_size";
+      } else if (ci->capturesThis()) {
+        Ty = ri->getType();
+        Name = "__this";
+      } else {
+        const VarDecl *VD = ci->getCapturedVar();
+
+        Ty = VD->getType();
+
+        // If this declaration has an variable-length array type, we need to
+        // adapt the size expression to use the implicit parameter that
+        // captures it. We also need to make sure we adapt all the sizes if
+        // the type has several dimensions.
+        if (!VLASizeFields.empty()) {
+          assert(Ty.getTypePtr() ==
+                     VLASizeFields.front()->getCapturedVLAType() &&
+                 "Invalid VLA information!");
+
+          Ty = VLASizeFields.back()->getCapturedVLAType()->getElementType();
+          for (unsigned t = 1; !VLASizeFields.empty();
+               VLASizeFields.pop_back(), ++t) {
+            auto *OriginalVAT = VLASizeFields.back()->getCapturedVLAType();
+            auto SizeType = VLASizeFields.back()->getType();
+            auto *SizeDecl = NewCD->getParam(i - t);
+            auto *NewSizeRefExpr = buildDeclRefExpr(
+                *this, SizeDecl, SizeDecl->getType(), Loc, true);
+            auto *NewSizeValExpr =
+                new (Context) UnaryOperator(NewSizeRefExpr, UO_Deref, SizeType,
+                                            VK_RValue, OK_Ordinary, Loc);
+            Ty = Context.getVariableArrayType(
+                Ty, NewSizeValExpr, OriginalVAT->getSizeModifier(),
+                OriginalVAT->getIndexTypeCVRQualifiers(),
+                OriginalVAT->getBracketsRange());
+          }
+        }
+        Ty = Context.getPointerType(Ty);
+        Name = VD->getName();
+      }
+
+      IdentifierInfo *ParamName = &Context.Idents.get(Name);
+      ImplicitParamDecl *Param =
+          ImplicitParamDecl::Create(Context, NewDC, Loc, ParamName, Ty);
+      NewCD->setParam(i, Param);
+    }
+
+    NewCD->setBody(Res->getCapturedStmt());
+    Res->setCapturedDecl(NewCD);
+    DC->addDecl(NewCD);
+    DC->removeDecl(OldCD);
+  }
+
+  return Res;
 }
 
 static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -197,12 +197,12 @@
     virtual llvm::Value *getContextValue() const { return ThisValue; }
 
     /// \brief Lookup the captured field decl for a variable.
-    virtual const FieldDecl *lookup(const VarDecl *VD) const {
+    virtual const Decl *lookup(const VarDecl *VD) const {
       return CaptureFields.lookup(VD);
     }
 
-    bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; }
-    virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
+    bool isCXXThisExprCaptured() const { return getThisDecl() != nullptr; }
+    virtual Decl *getThisDecl() const { return CXXThisFieldDecl; }
 
     static bool classof(const CGCapturedStmtInfo *) {
       return true;
@@ -1689,6 +1689,11 @@
   std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla);
   std::pair<llvm::Value*,QualType> getVLASize(QualType vla);
 
+  /// getVLASizeMap - Returns an LLVM value that corresponds to the size of the
+  /// VLA size of the type \a type. Assumes that the type has already been
+  /// emitted with EmitVariablyModifiedType.
+  llvm::Value *getVLASizeMap(const VariableArrayType *vla);
+
   /// LoadCXXThis - Load the value of 'this'. This function is only valid while
   /// generating code for an C++ member function.
   llvm::Value *LoadCXXThis() {
@@ -2211,6 +2216,8 @@
   void EmitOMPFlushDirective(const OMPFlushDirective &S);
   void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
+  typedef SmallVector<llvm::Value *, 4> OMPTargetDirectiveVLASizes;
+  void PrepareOMPTargetDirectiveBodyEmission(const OMPTargetDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -1506,6 +1506,12 @@
   return std::pair<llvm::Value*,QualType>(numElements, elementType);
 }
 
+llvm::Value *CodeGenFunction::getVLASizeMap(const VariableArrayType *type) {
+  llvm::Value *vlaSize = VLASizeMap[type->getSizeExpr()];
+  assert(vlaSize && "No vla size matching the requested expression!");
+  return vlaSize;
+}
+
 void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
   assert(type->isVariablyModifiedType() &&
          "Must pass variably modified type to EmitVLASizes!");
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2121,8 +2121,79 @@
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
-  llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
+void CodeGenFunction::PrepareOMPTargetDirectiveBodyEmission(
+    const OMPTargetDirective &S) {
+  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+  const RecordDecl *RD = CS.getCapturedRecordDecl();
+  const CapturedDecl *CD = CS.getCapturedDecl();
+
+  SmallVector<const FieldDecl *, 3> VLASizeFields;
+  auto ri = RD->field_begin();
+  auto pi = CD->param_begin();
+  for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(),
+                                            ce = CS.capture_end();
+       ci != ce; ++ci, ++ri, ++pi) {
+
+    if (ci->capturesVariableArrayType()) {
+      VLASizeFields.push_back(*ri);
+      continue;
+    }
+
+    if (ci->capturesThis()) {
+      auto *ThisRef = LocalDeclMap[*pi];
+      auto Addr = MakeNaturalAlignAddrLValue(ThisRef, ri->getType());
+      CXXThisValue = EmitLoadOfLValue(Addr, CS.getLocStart()).getScalarVal();
+      continue;
+    }
+
+    // Find the expressions that give the VLA sizes and update the VLASizeMap.
+    if (!VLASizeFields.empty()) {
+      assert(ci->capturesVariable() &&
+             "Expecting declaration that has VLA type!");
+
+      const PointerType *PT = cast<PointerType>((*pi)->getType().getTypePtr());
+      QualType CurTy = PT->getPointeeType();
+
+      for (auto *FD : VLASizeFields) {
+        const VariableArrayType *VAT =
+            cast<VariableArrayType>(CurTy.getTypePtr());
+        auto *V = VLASizeMap[VAT->getSizeExpr()];
+        assert(V && "VLA Size value must exist!");
+        VLASizeMap[FD->getCapturedVLAType()->getSizeExpr()] = V;
+        CurTy = VAT->getElementType();
+      }
+
+      VLASizeFields.clear();
+    }
+  }
+}
+
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+
+  // Emit target region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.PrepareOMPTargetDirectiveBodyEmission(S);
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+
+  // Obtain the target region outlined function.
+  llvm::Value *Fn =
+      CGM.getOpenMPRuntime().emitTargetOutlinedFunction(*this, S, CodeGen);
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto C = S.getSingleClause(OMPC_if)) {
+    IfCond = cast<OMPIfClause>(C)->getCondition();
+  }
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto C = S.getSingleClause(OMPC_device)) {
+    Device = cast<OMPDeviceClause>(C)->getDevice();
+  }
+
+  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device);
 }
 
 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2125,28 +2125,33 @@
   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
                 CD->getLocation(),
                 CD->getBody()->getLocStart());
-  // Set the context parameter in CapturedStmtInfo.
-  llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
-  assert(DeclPtr && "missing context parameter for CapturedStmt");
-  CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
-
-  // Initialize variable-length arrays.
-  LValue Base = MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(),
-                                           Ctx.getTagDeclType(RD));
-  for (auto *FD : RD->fields()) {
-    if (FD->hasCapturedVLAType()) {
-      auto *ExprArg = EmitLoadOfLValue(EmitLValueForField(Base, FD),
-                                       S.getLocStart()).getScalarVal();
-      auto VAT = FD->getCapturedVLAType();
-      VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+
+  // Initialize variable length arrays and 'this' using the context argument,
+  // if any. Otherwise the function implicit parameters will be used.
+  if (CD->hasContextParam()) {
+    llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
+    assert(DeclPtr && "missing context parameter for CapturedStmt");
+    CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
+
+    // Initialize variable-length arrays.
+    LValue Base = MakeNaturalAlignAddrLValue(
+        CapturedStmtInfo->getContextValue(), Ctx.getTagDeclType(RD));
+    for (auto *FD : RD->fields()) {
+      if (FD->hasCapturedVLAType()) {
+        auto *ExprArg =
+            EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getLocStart())
+                .getScalarVal();
+        auto VAT = FD->getCapturedVLAType();
+        VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+      }
     }
-  }
 
-  // If 'this' is captured, load it into CXXThisValue.
-  if (CapturedStmtInfo->isCXXThisExprCaptured()) {
-    FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl();
-    LValue ThisLValue = EmitLValueForField(Base, FD);
-    CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
+    // If 'this' is captured, load it into CXXThisValue.
+    if (CapturedStmtInfo->isCXXThisExprCaptured()) {
+      FieldDecl *FD = cast<FieldDecl>(CapturedStmtInfo->getThisDecl());
+      LValue ThisLValue = EmitLValueForField(Base, FD);
+      CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
+    }
   }
 
   PGO.assignRegionCounters(CD, F);
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -154,6 +154,14 @@
     // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
     // kmp_int32 cncl_kind);
     OMPRTL__kmpc_cancel,
+
+    //
+    // Offloading related calls
+    //
+    // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+    // *arg_types);
+    OMPRTL__tgt_target,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -177,6 +185,22 @@
     /// \brief Implicit barrier in 'single' directive.
     OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
   };
+
+  /// \brief Values for bit flags used to specify the mapping type for
+  /// offloading.
+  enum OpenMPOffloadMappingFlags {
+    /// \brief Allocate memory on the device and move data from host to device.
+    OMP_MAP_TO = 0x01,
+    /// \brief Allocate memory on the device and move data from device to host.
+    OMP_MAP_FROM = 0x02,
+  };
+
+  enum OpenMPOffloadingReservedDeviceIDs {
+    /// \brief Device ID if the device was not defined, runtime should get it
+    /// from environment variables in the spec.
+    OMP_DEVICEID_UNDEF = -1,
+  };
+
   CodeGenModule &CGM;
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
@@ -707,6 +731,28 @@
   ///
   virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
                               OpenMPDirectiveKind CancelRegion);
+
+  /// \brief Emit outilined function for 'target' directive.
+  /// \param D Directive to emit.
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  virtual llvm::Value *
+  emitTargetOutlinedFunction(CodeGenFunction &CGF,
+                             const OMPExecutableDirective &D,
+                             const RegionCodeGenTy &CodeGen);
+
+  /// \brief Emit the target offloading code associated with \a D. The emitted
+  /// code attempts offloading the execution to the device, an the event of
+  /// a failure it executes the host version outlined in \a OutlinedFn.
+  /// \param D Directive to emit.
+  /// \param OutlinedFn Host version of the code to be offloaded.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  virtual void emitTargetCall(CodeGenFunction &CGF,
+                              const OMPExecutableDirective &D,
+                              llvm::Value *OutlinedFn, const Expr *IfCond,
+                              const Expr *Device);
 };
 
 } // namespace CodeGen
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -41,6 +41,8 @@
     /// \brief Region for constructs that do not require function outlining,
     /// like 'for', 'sections', 'atomic' etc. directives.
     InlinedRegion,
+    /// \brief Region with outlined function for standalone 'target' directive.
+    TargetRegion,
   };
 
   CGOpenMPRegionInfo(const CapturedStmt &CS,
@@ -164,16 +166,16 @@
     llvm_unreachable("No context value for inlined OpenMP region");
   }
   /// \brief Lookup the captured field decl for a variable.
-  const FieldDecl *lookup(const VarDecl *VD) const override {
+  const Decl *lookup(const VarDecl *VD) const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->lookup(VD);
     // If there is no outer outlined region,no need to lookup in a list of
     // captured variables, we can use the original one.
     return nullptr;
   }
-  FieldDecl *getThisFieldDecl() const override {
+  Decl *getThisDecl() const override {
     if (OuterRegionInfo)
-      return OuterRegionInfo->getThisFieldDecl();
+      return OuterRegionInfo->getThisDecl();
     return nullptr;
   }
   /// \brief Get a variable or parameter for storing global thread id
@@ -204,6 +206,57 @@
   CGOpenMPRegionInfo *OuterRegionInfo;
 };
 
+/// \brief API for captured statement code generation in OpenMP target
+/// constructs. For this captures, implicit parameters are used instead of the
+/// captured fields.
+class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+public:
+  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
+                           const RegionCodeGenTy &CodeGen)
+      : CGOpenMPRegionInfo(TargetRegion, CodeGen, OMPD_target),
+        CXXThisImplicitParamDecl(nullptr) {
+
+    CapturedDecl::param_iterator Param = CS.getCapturedDecl()->param_begin();
+    for (CapturedStmt::const_capture_iterator I = CS.capture_begin(),
+                                              E = CS.capture_end();
+         I != E; ++I, ++Param) {
+      if (I->capturesThis())
+        CXXThisImplicitParamDecl = *Param;
+      else if (I->capturesVariable())
+        CaptureImplicitParams[I->getCapturedVar()] = *Param;
+    }
+  }
+
+  /// \brief This is unused for target regions because each starts executing
+  /// with a single thread.
+  const VarDecl *getThreadIDVariable() const override { return nullptr; }
+
+  /// \brief Get the name of the capture helper.
+  StringRef getHelperName() const override { return ".omp_offloading."; }
+
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return CGOpenMPRegionInfo::classof(Info) &&
+           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
+  }
+
+  /// \brief Lookup the captured implicit parameter declaration for a variable.
+  virtual const Decl *lookup(const VarDecl *VD) const override {
+    return CaptureImplicitParams.lookup(VD);
+  }
+
+  virtual Decl *getThisDecl() const override {
+    return CXXThisImplicitParamDecl;
+  }
+
+private:
+  /// \brief Keep the map between VarDecl and FieldDecl.
+  llvm::SmallDenseMap<const VarDecl *, ImplicitParamDecl *>
+      CaptureImplicitParams;
+
+  /// \brief Captured 'this' type.
+  ImplicitParamDecl *CXXThisImplicitParamDecl;
+};
+
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
@@ -838,6 +891,22 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
     break;
   }
+  case OMPRTL__tgt_target: {
+    // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+    // *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -2236,7 +2305,7 @@
       if (Init) {
         if (auto *Elem = Pair.second.PrivateElemInit) {
           auto *OriginalVD = Pair.second.Original;
-          auto *SharedField = CapturesInfo.lookup(OriginalVD);
+          auto *SharedField = cast<FieldDecl>(CapturesInfo.lookup(OriginalVD));
           auto SharedRefLValue =
               CGF.EmitLValueForField(SharedsBase, SharedField);
           QualType Type = OriginalVD->getType();
@@ -2836,3 +2905,268 @@
   }
 }
 
+llvm::Value *
+CGOpenMPRuntime::emitTargetOutlinedFunction(CodeGenFunction &CGF,
+                                            const OMPExecutableDirective &D,
+                                            const RegionCodeGenTy &CodeGen) {
+
+  const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // Generate the outlined target offloading function.
+  CodeGenFunction TargetCGF(CGM, true);
+  CGOpenMPTargetRegionInfo CGInfo(*CS, CodeGen);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(TargetCGF, &CGInfo);
+  return TargetCGF.GenerateCapturedStmtFunction(*CS);
+}
+
+void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D,
+                                     llvm::Value *OutlinedFn,
+                                     const Expr *IfCond, const Expr *Device) {
+
+  // Fill up the arrays with the all the captured variables.
+  SmallVector<llvm::Value *, 16> BasePointers;
+  SmallVector<llvm::Value *, 16> Pointers;
+  SmallVector<llvm::Value *, 16> Sizes;
+  SmallVector<unsigned, 16> MapTypes;
+
+  bool hasVLACaptures = false;
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+  auto ri = CS.getCapturedRecordDecl()->field_begin();
+  auto ii = CS.capture_init_begin();
+  for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(),
+                                            ce = CS.capture_end();
+       ci != ce; ++ci, ++ri, ++ii) {
+    StringRef Name;
+    QualType Ty;
+    llvm::Value *BasePointer;
+    llvm::Value *Pointer;
+    llvm::Value *Size;
+    unsigned MapType;
+
+    if (ci->capturesVariableArrayType()) {
+      llvm::Value *V = CGF.getVLASizeMap(ri->getCapturedVLAType());
+      LValue LV = CGF.MakeNaturalAlignAddrLValue(
+          CGF.CreateMemTemp(ri->getType(), "__vla_size"), ri->getType());
+      CGF.EmitStoreThroughLValue(RValue::get(V), LV);
+      BasePointer = Pointer = LV.getAddress();
+      uint64_t SizeVal =
+          CGM.getContext().getTypeSizeInChars(ri->getType()).getQuantity();
+      Size = llvm::ConstantInt::get(CGM.SizeTy, SizeVal);
+
+      hasVLACaptures = true;
+      // VLA sizes don't need to be copied back from the device.
+      MapType = CGOpenMPRuntime::OMP_MAP_TO;
+    } else if (ci->capturesThis()) {
+      BasePointer = Pointer = CGF.LoadCXXThis();
+      const PointerType *PtrTy = cast<PointerType>(ri->getType().getTypePtr());
+      uint64_t SizeVal = CGM.getContext()
+                             .getTypeSizeInChars(PtrTy->getPointeeType())
+                             .getQuantity();
+      Size = llvm::ConstantInt::get(CGM.SizeTy, SizeVal);
+
+      // Default map type.
+      MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM;
+    } else {
+      BasePointer = Pointer =
+          CGF.EmitLValue(cast<DeclRefExpr>(*ii)).getAddress();
+
+      const ReferenceType *PtrTy =
+          cast<ReferenceType>(ri->getType().getTypePtr());
+      QualType ElementType = PtrTy->getPointeeType();
+
+      if (auto *VAT = dyn_cast<VariableArrayType>(ElementType.getTypePtr())) {
+        auto VATInfo = CGF.getVLASize(VAT);
+        Size = llvm::ConstantInt::get(
+            CGM.SizeTy,
+            CGM.getContext().getTypeSizeInChars(VATInfo.second).getQuantity());
+        Size = CGF.Builder.CreateNUWMul(Size, VATInfo.first);
+      } else {
+        uint64_t ElementTypeSize =
+            CGM.getContext().getTypeSizeInChars(ElementType).getQuantity();
+        Size = llvm::ConstantInt::get(CGM.SizeTy, ElementTypeSize);
+      }
+
+      // Default map type.
+      MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM;
+    }
+
+    BasePointers.push_back(BasePointer);
+    Pointers.push_back(Pointer);
+    Sizes.push_back(Size);
+    MapTypes.push_back(MapType);
+  }
+
+  if (IfCond) {
+    // Check if the if clause conditional always evaluates to true or false.
+    // If it evaluates to false, we only need to emit the host version of the
+    // target region. If it evaluates to true, we can proceed with the codegen
+    // as if no if clause was provided.
+    bool CondConstant;
+    if (CGF.ConstantFoldsToSimpleInteger(IfCond, CondConstant)) {
+      if (CondConstant) {
+        IfCond = nullptr;
+      } else {
+        CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+        return;
+      }
+    }
+  }
+
+  // Generate the code to launch the target region. The pattern is the
+  // following:
+  //
+  //   ...
+  //   br IfCond (if any), omp_offload, omp_offload_fail
+  //
+  // omp_offload.try:
+  //   ; create arrays for offloading
+  //   error = __tgt_target(...)
+  //   br error, omp_offload_fail, omp_offload_end
+  //
+  // omp_offload.fail:
+  //   host_version(...)
+  //
+  // omp_offload.end:
+  //   ...
+  //
+
+  auto OffloadTryBlock = CGF.createBasicBlock("omp_offload.try");
+  auto OffloadFailBlock = CGF.createBasicBlock("omp_offload.fail");
+  auto ContBlock = CGF.createBasicBlock("omp_offload.end");
+
+  if (IfCond)
+    CGF.EmitBranchOnBoolExpr(IfCond, OffloadTryBlock, OffloadFailBlock,
+                             /*TrueCount=*/0);
+
+  CGF.EmitBlock(OffloadTryBlock);
+
+  unsigned PointerNumVal = BasePointers.size();
+  llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
+  llvm::Value *BasePointersArray;
+  llvm::Value *PointersArray;
+  llvm::Value *SizesArray;
+  llvm::Value *MapTypesArray;
+
+  if (PointerNumVal) {
+    llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+    QualType PointerArrayType = CGF.getContext().getConstantArrayType(
+        CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
+        /*IndexTypeQuals=*/0);
+
+    BasePointersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs");
+    PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs");
+
+    // If we don't have any VLA types, we can use a constant array for the map
+    // sizes, otherwise we need to fill up the arrays as we do for the pointers.
+    if (hasVLACaptures) {
+      QualType SizeArrayType = CGF.getContext().getConstantArrayType(
+          CGF.getContext().getIntTypeForBitwidth(64, /*Signed=*/true),
+          PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0);
+      SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
+    } else {
+      // We expect all the sizes to be constant, so we collect them to create
+      // a constant array.
+      SmallVector<uint64_t, 16> ConstSizes;
+      for (auto *V : Sizes)
+        ConstSizes.push_back(cast<llvm::ConstantInt>(V)->getZExtValue());
+
+      llvm::Constant *SizesArrayInit =
+          llvm::ConstantDataArray::get(CGF.Builder.getContext(), ConstSizes);
+      auto *SizesArrayGbl = new llvm::GlobalVariable(
+          CGM.getModule(), SizesArrayInit->getType(),
+          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+          SizesArrayInit, ".offload_sizes");
+      SizesArrayGbl->setUnnamedAddr(true);
+      SizesArray = SizesArrayGbl;
+    }
+
+    // The map types are always constant so we don't need to generate code to
+    // fill arrays. Instead, we create an array constant.
+    llvm::Constant *MapTypesArrayInit =
+        llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+    auto *MapTypesArrayGbl = new llvm::GlobalVariable(
+        CGM.getModule(), MapTypesArrayInit->getType(),
+        /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+        MapTypesArrayInit, ".offload_maptypes");
+    MapTypesArrayGbl->setUnnamedAddr(true);
+    MapTypesArray = MapTypesArrayGbl;
+
+    for (unsigned i = 0; i < PointerNumVal; ++i) {
+      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+          0, i);
+      CGF.Builder.CreateStore(
+          CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BP);
+
+      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+          i);
+      CGF.Builder.CreateStore(
+          CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), P);
+
+      if (hasVLACaptures) {
+        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
+            llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 0, i);
+        CGF.Builder.CreateStore(
+            CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+            S);
+      }
+    }
+
+    BasePointersArray = CGF.Builder.CreateConstGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+        0, 0);
+    PointersArray = CGF.Builder.CreateConstGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+        0);
+    SizesArray = CGF.Builder.CreateConstGEP2_32(
+        llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 0, 0);
+    MapTypesArray = CGF.Builder.CreateConstGEP2_32(
+        llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 0, 0);
+
+  } else {
+    BasePointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy);
+    PointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy);
+    SizesArray = llvm::Constant::getNullValue(CGM.SizeTy->getPointerTo());
+    MapTypesArray = llvm::Constant::getNullValue(CGM.Int32Ty->getPointerTo());
+  }
+
+  // On top of the arrays that were filled up, the target offloading call takes
+  // as arguments the device id as well as the host pointer. The host pointer
+  // is used by the runtime library to identify the current target region, so
+  // it only has to be unique and not necessarily point to anything. It could be
+  // the pointer to the outlined function that implements the target region, but
+  // we aren't using that so that the compiler doesn't need to keep that, and
+  // could therefore inline the host function if proven worthwhile during
+  // optimization.
+
+  llvm::Value *HostPtr = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+      llvm::GlobalValue::PrivateLinkage,
+      llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
+
+  // Emit device ID if any.
+  llvm::Value *DeviceID;
+  if (Device)
+    DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                         CGM.Int32Ty, /*isSigned=*/true);
+  else
+    DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+  llvm::Value *OffloadingArgs[] = {DeviceID,          HostPtr,       PointerNum,
+                                   BasePointersArray, PointersArray, SizesArray,
+                                   MapTypesArray};
+  auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                    OffloadingArgs);
+  auto Error = CGF.Builder.CreateICmpNE(Return, CGF.Builder.getInt32(0));
+  CGF.Builder.CreateCondBr(Error, OffloadFailBlock, ContBlock);
+
+  CGF.EmitBlock(OffloadFailBlock);
+  CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+  CGF.EmitBranch(ContBlock);
+
+  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
+  return;
+}
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1884,6 +1884,29 @@
   return CGF.EmitLValueForField(LV, FD);
 }
 
+static LValue EmitCapturedImplicitParamLValue(CodeGenFunction &CGF,
+                                              const ImplicitParamDecl *PD) {
+  // If the captured declaration is an implicit parameter, it should already
+  // exist in the local declaration map.
+  LValue LV = LValue::MakeAddr(CGF.GetAddrOfLocalVar(PD), PD->getType(),
+                               CharUnits(), CGF.getContext());
+  RValue RV = CGF.EmitLoadOfLValue(LV, PD->getLocStart());
+  return LValue::MakeAddr(RV.getScalarVal(), PD->getType(), CharUnits(),
+                          CGF.getContext());
+}
+
+static LValue EmitCapturedValue(CodeGenFunction &CGF, const Decl *D,
+                                llvm::Value *ThisValue) {
+  switch (D->getKind()) {
+  default:
+    llvm_unreachable("Unexpected declaration kind for capture!");
+  case Decl::ImplicitParam:
+    return EmitCapturedImplicitParamLValue(CGF, cast<ImplicitParamDecl>(D));
+  case Decl::Field:
+    return EmitCapturedFieldLValue(CGF, cast<FieldDecl>(D), ThisValue);
+  }
+}
+
 /// Named Registers are named metadata pointing to the register name
 /// which will be read from/written to as an argument to the intrinsic
 /// @llvm.read/write_register.
@@ -1943,8 +1966,8 @@
         if (auto *V = LocalDeclMap.lookup(VD))
           return MakeAddrLValue(V, T, Alignment);
         else
-          return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
-                                         CapturedStmtInfo->getContextValue());
+          return EmitCapturedValue(*this, CapturedStmtInfo->lookup(VD),
+                                   CapturedStmtInfo->getContextValue());
       }
       assert(isa<BlockDecl>(CurCodeDecl));
       return MakeAddrLValue(GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()),
Index: include/clang/Sema/ScopeInfo.h
===================================================================
--- include/clang/Sema/ScopeInfo.h
+++ include/clang/Sema/ScopeInfo.h
@@ -608,6 +608,8 @@
       return "default captured statement";
     case CR_OpenMP:
       return "OpenMP region";
+    case CR_OpenMP_Target:
+      return "OpenMP target region";
     }
     llvm_unreachable("Invalid captured region kind!");
   }
Index: include/clang/Basic/CapturedStmt.h
===================================================================
--- include/clang/Basic/CapturedStmt.h
+++ include/clang/Basic/CapturedStmt.h
@@ -16,7 +16,8 @@
 /// \brief The different kinds of captured statement.
 enum CapturedRegionKind {
   CR_Default,
-  CR_OpenMP
+  CR_OpenMP,
+  CR_OpenMP_Target
 };
 
 } // end namespace clang
Index: include/clang/AST/Stmt.h
===================================================================
--- include/clang/AST/Stmt.h
+++ include/clang/AST/Stmt.h
@@ -2059,7 +2059,7 @@
 
   /// \brief The pointer part is the implicit the outlined function and the 
   /// int part is the captured region kind, 'CR_Default' etc.
-  llvm::PointerIntPair<CapturedDecl *, 1, CapturedRegionKind> CapDeclAndKind;
+  llvm::PointerIntPair<CapturedDecl *, 2, CapturedRegionKind> CapDeclAndKind;
 
   /// \brief The record for captured variables, a RecordDecl or CXXRecordDecl.
   RecordDecl *TheRecordDecl;
Index: include/clang/AST/Decl.h
===================================================================
--- include/clang/AST/Decl.h
+++ include/clang/AST/Decl.h
@@ -3645,8 +3645,9 @@
   llvm::PointerIntPair<Stmt *, 1, bool> BodyAndNothrow;
 
   explicit CapturedDecl(DeclContext *DC, unsigned NumParams)
-    : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
-      NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) { }
+      : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
+        NumParams(NumParams), ContextParam(-1u),
+        BodyAndNothrow(nullptr, false) {}
 
   ImplicitParamDecl *const *getParams() const {
     return getTrailingObjects<ImplicitParamDecl *>();
@@ -3679,9 +3680,11 @@
     getParams()[i] = P;
   }
 
+  bool hasContextParam() const { return ContextParam < NumParams; }
+
   /// \brief Retrieve the parameter containing captured variables.
   ImplicitParamDecl *getContextParam() const {
-    assert(ContextParam < NumParams);
+    assert(hasContextParam());
     return getParam(ContextParam);
   }
   void setContextParam(unsigned i, ImplicitParamDecl *P) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to