Meinersbur created this revision.
Meinersbur added reviewers: jdoerfert, AMDChirag, anchu-rajendran, 
kiranchandramohan, SouraVX, ftynse, kiranktp, fghanim, ABataev, craig.topper, 
dblaikie, rsmith, aaron.ballman.
Herald added subscribers: dexonsmith, martong, guansong, hiraditya, yaxunl.
Herald added a reviewer: shafik.
Meinersbur requested review of this revision.
Herald added subscribers: llvm-commits, sstefan1.
Herald added projects: clang, LLVM.

Alternative version of D94973 <https://reviews.llvm.org/D94973>. Instead of a 
new AST node of kind OMPCanonicalLoop that acts as parent of a ForStmt or 
CXXForRangeStmt, introduce a new superclass for both classes for storying the 
properties for an OpenMP canonical loop.

This was suggested by @ABataev following the same approach taken in D83261 
<https://reviews.llvm.org/D83261> for directives. In this patch, the equivalent 
of OMPChildren is LoopChildren and the class corresponding to 
OMPExecutableDirective is MaybeCanonicalLoopStmt.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95496

Files:
  clang/include/clang/AST/Stmt.h
  clang/include/clang/AST/StmtCXX.h
  clang/include/clang/Basic/StmtNodes.td
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/Stmt.cpp
  clang/lib/AST/StmtCXX.cpp
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Parse/ParseOpenMP.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaOpenMP.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderStmt.cpp
  clang/lib/Serialization/ASTWriterStmt.cpp
  clang/test/OpenMP/irbuilder_for_iterator.cpp
  clang/test/OpenMP/irbuilder_for_rangefor.cpp
  clang/test/OpenMP/irbuilder_for_unsigned.c
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -971,7 +971,8 @@
 
   // Emit the body content. We do it after connecting the loop to the CFG to
   // avoid that the callback encounters degenerate BBs.
-  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
+  if (BodyGenCB)
+    BodyGenCB(CL->getBodyIP(), CL->getIndVar());
 
 #ifndef NDEBUG
   CL->assertOK();
@@ -1164,6 +1165,13 @@
   return CLI;
 }
 
+CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
+    const LocationDescription &Loc, CanonicalLoopInfo *CLI,
+    InsertPointTy AllocaIP, bool NeedsBarrier) {
+  // Currently only supports static schedules.
+  return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
+}
+
 /// Make \p Source branch to \p Target.
 ///
 /// Handles two situations:
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -300,6 +300,12 @@
                                                bool NeedsBarrier,
                                                Value *Chunk = nullptr);
 
+  /// Modifies the canonical loop to be a workshare loop.
+  CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
+                                         CanonicalLoopInfo *CLI,
+                                         InsertPointTy AllocaIP,
+                                         bool NeedsBarrier);
+
   /// Tile a loop nest.
   ///
   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
Index: clang/test/OpenMP/irbuilder_for_unsigned.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/irbuilder_for_unsigned.c
@@ -0,0 +1,147 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
+// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+extern "C" void workshareloop_unsigned(float *a, float *b, float *c, float *d) {
+#pragma omp for
+  for (unsigned i = 33; i < 32000000; i += 7) {
+    a[i] = b[i] * c[i] * d[i];
+  }
+}
+
+#endif // HEADER
+// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) [[ATTR0:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
+// CHECK-NEXT:    store float* [[D]], float** [[D_ADDR]], align 8
+// CHECK-NEXT:    store i32 33, i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store i32* [[I]], i32** [[TMP0]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP1]], align 4
+// CHECK-NEXT:    call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT]], 1
+// CHECK-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND]], align 4
+// CHECK-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
+// CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP7]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP4]]
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* [[I]], i32 [[TMP8]], %struct.anon.0* [[AGG_CAPTURED1]])
+// CHECK-NEXT:    [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP10]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load float*, float** [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP13]] to i64
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[TMP14:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP11]], [[TMP14]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load float*, float** [[D_ADDR]], align 8
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP16]] to i64
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[ARRAYIDX5]], align 4
+// CHECK-NEXT:    [[MUL6:%.*]] = fmul float [[MUL]], [[TMP17]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM7:%.*]] = zext i32 [[TMP19]] to i64
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM7]]
+// CHECK-NEXT:    store float [[MUL6]], float* [[ARRAYIDX8]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM9]])
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR1:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
+// CHECK-NEXT:    store i32* [[DISTANCE]], i32** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP3]], 32000000
+// CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK:       cond.true:
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub i32 32000000, [[TMP6]]
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 7
+// CHECK-NEXT:    br label [[COND_END:%.*]]
+// CHECK:       cond.false:
+// CHECK-NEXT:    br label [[COND_END]]
+// CHECK:       cond.end:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[COND]], i32* [[TMP7]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR1]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8
+// CHECK-NEXT:    store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[LOGICAL]], i32* [[LOGICAL_ADDR]], align 4
+// CHECK-NEXT:    store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[LOGICAL_ADDR]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = mul i32 7, [[TMP3]]
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
Index: clang/test/OpenMP/irbuilder_for_rangefor.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/irbuilder_for_rangefor.cpp
@@ -0,0 +1,164 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
+// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+struct MyIterator {
+  MyIterator(unsigned pos);
+  MyIterator(const MyIterator &other);
+  const MyIterator &operator=(const MyIterator &that);
+  MyIterator &operator++();
+  int operator-(const MyIterator &that) const;
+  MyIterator &operator+=(unsigned a);
+  MyIterator operator+(unsigned a) const;
+  bool operator==(const MyIterator &that) const;
+  bool operator!=(const MyIterator &that) const;
+  unsigned operator*() const;
+};
+
+struct MyRange {
+  MyRange(int n);
+
+  MyIterator begin();
+  MyIterator end();
+};
+
+extern "C" void workshareloop_rangefor(float *a, float *b, float *c) {
+#pragma omp for
+  for (unsigned i : MyRange(42)) {
+    a[i] = b[i] * c[i];
+  }
+}
+
+#endif // HEADER
+// CHECK-LABEL: define {{[^@]+}}@workshareloop_rangefor
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[__RANGE2:%.*]] = alloca %struct.MyRange*, align 8
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_MYRANGE:%.*]], align 1
+// CHECK-NEXT:    [[__BEGIN2:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
+// CHECK-NEXT:    [[__END2:%.*]] = alloca [[STRUCT_MYITERATOR]], align 1
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[P_STRIDE:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
+// CHECK-NEXT:    call void @_ZN7MyRangeC1Ei(%struct.MyRange* nonnull dereferenceable(1) [[REF_TMP]], i32 42)
+// CHECK-NEXT:    store %struct.MyRange* [[REF_TMP]], %struct.MyRange** [[__RANGE2]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
+// CHECK-NEXT:    call void @_ZN7MyRange5beginEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__BEGIN2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = load %struct.MyRange*, %struct.MyRange** [[__RANGE2]], align 8
+// CHECK-NEXT:    call void @_ZN7MyRange3endEv(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[__END2]], %struct.MyRange* nonnull dereferenceable(1) [[TMP1]])
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[__BEGIN2]])
+// CHECK-NEXT:    store i32 [[CALL]], i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store %struct.MyIterator* [[__BEGIN2]], %struct.MyIterator** [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK-NEXT:    store %struct.MyIterator* [[__END2]], %struct.MyIterator** [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0
+// CHECK-NEXT:    call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP4]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[__BEGIN2]])
+// CHECK-NEXT:    call void @__captured_stmt(i64* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i64, i64* [[DOTCOUNT_ADDR]], align 8
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-NEXT:    store i64 0, i64* [[P_LOWERBOUND]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[DOTCOUNT]], 1
+// CHECK-NEXT:    store i64 [[TMP5]], i64* [[P_UPPERBOUND]], align 8
+// CHECK-NEXT:    store i64 1, i64* [[P_STRIDE]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], [[TMP6]]
+// CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP9]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP6]]
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* [[I]], i64 [[TMP10]], %struct.anon.0* [[AGG_CAPTURED1]])
+// CHECK-NEXT:    [[TMP11:%.*]] = load float*, float** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP12]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = load float*, float** [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP13]], [[TMP16]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP18]] to i64
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    store float [[MUL]], float* [[ARRAYIDX5]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
+// CHECK-NEXT:    store i64* [[DISTANCE]], i64** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP3]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP2]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP4]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i64 [[CONV]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64*, i64** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[DIV]], i64* [[TMP5]], align 8
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK-SAME: (i32* nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
+// CHECK-NEXT:    store i32* [[LOOPVAR]], i32** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[LOGICAL]], i64* [[LOGICAL_ADDR]], align 8
+// CHECK-NEXT:    store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8
+// CHECK-NEXT:    [[MUL:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[MUL]] to i32
+// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]])
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[CALL]], i32* [[TMP3]], align 4
+// CHECK-NEXT:    ret void
+//
Index: clang/test/OpenMP/irbuilder_for_iterator.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/irbuilder_for_iterator.cpp
@@ -0,0 +1,147 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
+// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+struct MyIterator {
+  MyIterator(unsigned pos);
+  MyIterator(const MyIterator &other);
+  const MyIterator &operator=(const MyIterator &that);
+  MyIterator &operator++();
+  int operator-(const MyIterator &that) const;
+  MyIterator &operator+=(unsigned a);
+  MyIterator operator+(unsigned a) const;
+  bool operator==(const MyIterator &that) const;
+  bool operator!=(const MyIterator &that) const;
+  unsigned operator*() const;
+};
+
+extern "C" void workshareloop_iterator(float *a, float *b, float *c) {
+#pragma omp for
+  for (MyIterator it = MyIterator(7); it != MyIterator(41); ++it) {
+    unsigned i = *it;
+    a[i] = b[i] * c[i];
+  }
+}
+
+#endif // HEADER
+// CHECK-LABEL: define {{[^@]+}}@workshareloop_iterator
+// CHECK-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]]) [[ATTR0:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:    [[IT:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
+// CHECK-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:    [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1
+// CHECK-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[P_STRIDE:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
+// CHECK-NEXT:    call void @_ZN10MyIteratorC1Ej(%struct.MyIterator* nonnull dereferenceable(1) [[IT]], i32 7)
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:    store %struct.MyIterator* [[IT]], %struct.MyIterator** [[TMP0]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED1]], i32 0, i32 0
+// CHECK-NEXT:    call void @_ZN10MyIteratorC1ERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[IT]])
+// CHECK-NEXT:    call void @__captured_stmt(i64* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]])
+// CHECK-NEXT:    [[DOTCOUNT:%.*]] = load i64, i64* [[DOTCOUNT_ADDR]], align 8
+// CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]]
+// CHECK:       omp_loop.preheader:
+// CHECK-NEXT:    store i64 0, i64* [[P_LOWERBOUND]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[DOTCOUNT]], 1
+// CHECK-NEXT:    store i64 [[TMP2]], i64* [[P_UPPERBOUND]], align 8
+// CHECK-NEXT:    store i64 1, i64* [[P_STRIDE]], align 8
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+// CHECK-NEXT:    call void @__kmpc_for_static_init_8u(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[P_LASTITER]], i64* [[P_LOWERBOUND]], i64* [[P_UPPERBOUND]], i64* [[P_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[P_LOWERBOUND]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[P_UPPERBOUND]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP4]], [[TMP3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER:%.*]]
+// CHECK:       omp_loop.header:
+// CHECK-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i64 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
+// CHECK-NEXT:    br label [[OMP_LOOP_COND:%.*]]
+// CHECK:       omp_loop.cond:
+// CHECK-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i64 [[OMP_LOOP_IV]], [[TMP6]]
+// CHECK-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp_loop.body:
+// CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OMP_LOOP_IV]], [[TMP3]]
+// CHECK-NEXT:    call void @__captured_stmt.1(%struct.MyIterator* [[IT]], i64 [[TMP7]], %struct.anon.0* [[AGG_CAPTURED1]])
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratordeEv(%struct.MyIterator* nonnull dereferenceable(1) [[IT]])
+// CHECK-NEXT:    store i32 [[CALL]], i32* [[I]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[TMP9]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = load float*, float** [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM2:%.*]] = zext i32 [[TMP12]] to i64
+// CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[IDXPROM2]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP10]], [[TMP13]]
+// CHECK-NEXT:    [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM4:%.*]] = zext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM4]]
+// CHECK-NEXT:    store float [[MUL]], float* [[ARRAYIDX5]], align 4
+// CHECK-NEXT:    br label [[OMP_LOOP_INC]]
+// CHECK:       omp_loop.inc:
+// CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i64 [[OMP_LOOP_IV]], 1
+// CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
+// CHECK:       omp_loop.exit:
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-NEXT:    br label [[OMP_LOOP_AFTER:%.*]]
+// CHECK:       omp_loop.after:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK-SAME: (i64* nonnull align 8 dereferenceable(8) [[DISTANCE:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) [[ATTR2:#.*]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca i64*, align 8
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
+// CHECK-NEXT:    store i64* [[DISTANCE]], i64** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    call void @_ZN10MyIteratorC1Ej(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]], i32 41)
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[TMP1]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @_ZNK10MyIteratormiERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[REF_TMP]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[TMP2]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i64 [[CONV]], 1
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64*, i64** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[DIV]], i64* [[TMP3]], align 8
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK-SAME: (%struct.MyIterator* nonnull align 1 dereferenceable(1) [[LOOPVAR:%.*]], i64 [[LOGICAL:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) [[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca %struct.MyIterator*, align 8
+// CHECK-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_MYITERATOR:%.*]], align 1
+// CHECK-NEXT:    store %struct.MyIterator* [[LOOPVAR]], %struct.MyIterator** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[LOGICAL]], i64* [[LOGICAL_ADDR]], align 8
+// CHECK-NEXT:    store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOGICAL_ADDR]], align 8
+// CHECK-NEXT:    [[MUL:%.*]] = mul i64 1, [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[MUL]] to i32
+// CHECK-NEXT:    call void @_ZNK10MyIteratorplEj(%struct.MyIterator* sret(%struct.MyIterator) align 1 [[REF_TMP]], %struct.MyIterator* nonnull dereferenceable(1) [[TMP1]], i32 [[CONV]])
+// CHECK-NEXT:    [[TMP3:%.*]] = load %struct.MyIterator*, %struct.MyIterator** [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call nonnull align 1 dereferenceable(1) %struct.MyIterator* @_ZN10MyIteratoraSERKS_(%struct.MyIterator* nonnull dereferenceable(1) [[TMP3]], %struct.MyIterator* nonnull align 1 dereferenceable(1) [[REF_TMP]])
+// CHECK-NEXT:    ret void
+//
Index: clang/lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- clang/lib/Serialization/ASTWriterStmt.cpp
+++ clang/lib/Serialization/ASTWriterStmt.cpp
@@ -223,6 +223,11 @@
   Record.AddSourceLocation(S->getForLoc());
   Record.AddSourceLocation(S->getLParenLoc());
   Record.AddSourceLocation(S->getRParenLoc());
+
+  Record.AddStmt(S->getDistanceFunc());
+  Record.AddStmt(S->getLoopVarFunc());
+  Record.AddStmt(S->getLoopVarRef());
+
   Code = serialization::STMT_FOR;
 }
 
@@ -1544,6 +1549,11 @@
   Record.AddStmt(S->getInc());
   Record.AddStmt(S->getLoopVarStmt());
   Record.AddStmt(S->getBody());
+
+  Record.AddStmt(S->getDistanceFunc());
+  Record.AddStmt(S->getLoopVarFunc());
+  Record.AddStmt(S->getLoopVarRef());
+
   Code = serialization::STMT_CXX_FOR_RANGE;
 }
 
Index: clang/lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- clang/lib/Serialization/ASTReaderStmt.cpp
+++ clang/lib/Serialization/ASTReaderStmt.cpp
@@ -298,6 +298,10 @@
   S->setForLoc(readSourceLocation());
   S->setLParenLoc(readSourceLocation());
   S->setRParenLoc(readSourceLocation());
+
+  S->setDistanceFunc(Record.readSubStmt());
+  S->setLoopVarFunc(Record.readSubStmt());
+  S->setLoopVarRef(Record.readSubExpr());
 }
 
 void ASTStmtReader::VisitGotoStmt(GotoStmt *S) {
@@ -1662,6 +1666,10 @@
   S->setInc(Record.readSubExpr());
   S->setLoopVarStmt(Record.readSubStmt());
   S->setBody(Record.readSubStmt());
+
+  S->setDistanceFunc(Record.readSubStmt());
+  S->setLoopVarFunc(Record.readSubStmt());
+  S->setLoopVarRef(Record.readSubExpr());
 }
 
 void ASTStmtReader::VisitMSDependentExistsStmt(MSDependentExistsStmt *S) {
@@ -2724,7 +2732,7 @@
       break;
 
     case STMT_FOR:
-      S = new (Context) ForStmt(Empty);
+      S = ForStmt::createEmpty(Context);
       break;
 
     case STMT_GOTO:
@@ -3120,7 +3128,7 @@
       break;
 
     case STMT_CXX_FOR_RANGE:
-      S = new (Context) CXXForRangeStmt(Empty);
+      S = CXXForRangeStmt::createEmpty(Context);
       break;
 
     case STMT_MS_DEPENDENT_EXISTS:
Index: clang/lib/Sema/TreeTransform.h
===================================================================
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -8350,6 +8350,9 @@
       else
         CS = D->getInnermostCapturedStmt()->getCapturedStmt();
       Body = getDerived().TransformStmt(CS);
+      if (Body.isUsable() && isOpenMPLoopDirective(D->getDirectiveKind()) &&
+          getSema().getLangOpts().OpenMPIRBuilder)
+        Body = getSema().ActOnOpenMPCanonicalLoop(Body.get());
     }
     AssociatedStmt =
         getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses);
Index: clang/lib/Sema/SemaStmt.cpp
===================================================================
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -1862,9 +1862,9 @@
   if (isa<NullStmt>(Body))
     getCurCompoundScope().setHasEmptyLoopBodies();
 
-  return new (Context)
-      ForStmt(Context, First, Second.get().second, Second.get().first, Third,
-              Body, ForLoc, LParenLoc, RParenLoc);
+  return ForStmt::create(Context, First, Second.get().second,
+                         Second.get().first, Third, Body, ForLoc, LParenLoc,
+                         RParenLoc);
 }
 
 /// In an Objective C collection iteration statement:
@@ -2730,11 +2730,11 @@
   if (getLangOpts().OpenMP >= 50 && BeginDeclStmt.isUsable())
     ActOnOpenMPLoopInitialization(ForLoc, BeginDeclStmt.get());
 
-  return new (Context) CXXForRangeStmt(
-      InitStmt, RangeDS, cast_or_null<DeclStmt>(BeginDeclStmt.get()),
+  return CXXForRangeStmt::create(
+      Context, InitStmt, RangeDS, cast_or_null<DeclStmt>(BeginDeclStmt.get()),
       cast_or_null<DeclStmt>(EndDeclStmt.get()), NotEqExpr.get(),
-      IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc,
-      ColonLoc, RParenLoc);
+      IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc, ColonLoc,
+      RParenLoc);
 }
 
 /// FinishObjCForCollectionStmt - Attach the body to a objective-C foreach
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -5130,6 +5130,362 @@
   }
 }
 
+namespace {
+/// Rewrite statements and expressions for Sema \p Actions CurContext.
+/// Used to capture variable references if already parsed statements/expressions
+/// into a CapturedStatement.
+class CaptureVars : public TreeTransform<CaptureVars> {
+  using BaseTransform = TreeTransform<CaptureVars>;
+
+public:
+  CaptureVars(Sema &Actions) : BaseTransform(Actions) {}
+
+  bool AlwaysRebuild() { return true; }
+};
+} // namespace
+
+/// Create a closure that computes the number of iterations of a loop.
+///
+/// \param Actions   The Sema object.
+/// \param LogicalTy Type for the logical iteration number.
+/// \param Rel       Comparison operator of the loop condition.
+/// \param StartExpr Value of the loop counter at the first iteration.
+/// \param StopExpr  Expression the loop counter is compared against in the loop
+/// condition. \param Step      Amount of increment after each iteration.
+///
+/// \return Closure (CapturedStmt) of the distance calculation.
+static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy,
+                                       BinaryOperator::Opcode Rel,
+                                       Expr *StartExpr, Expr *StopExpr,
+                                       Expr *Step) {
+  ASTContext &Ctx = Actions.getASTContext();
+  TypeSourceInfo *LogicalTSI = Ctx.getTrivialTypeSourceInfo(LogicalTy);
+
+  // Captured regions currently don't support return values, we use an
+  // out-parameter instead. All inputs are implicit captures.
+  // TODO: Instead of capturing each DeclRefExpr occurring in
+  // StartExpr/StopExpr/Step, these could also be passed as a value capture.
+  QualType ResultTy = Ctx.getLValueReferenceType(LogicalTy);
+  Sema::CapturedParamNameType Params[] = {{"Distance", ResultTy},
+                                          {StringRef(), QualType()}};
+  Actions.ActOnCapturedRegionStart({}, nullptr, CR_Default, Params);
+
+  Expr *Body;
+  {
+    Sema::CompoundScopeRAII CompoundScope(Actions);
+    CapturedDecl *CS = cast<CapturedDecl>(Actions.CurContext);
+
+    // Get the LValue expression for the result.
+    ImplicitParamDecl *DistParam = CS->getParam(0);
+    DeclRefExpr *DistRef = Actions.BuildDeclRefExpr(
+        DistParam, LogicalTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr);
+
+    // Capture all referenced variable references.
+    CaptureVars Recap(Actions);
+    Expr *NewStart = AssertSuccess(Recap.TransformExpr(StartExpr));
+    Expr *NewStop = AssertSuccess(Recap.TransformExpr(StopExpr));
+    Expr *NewStep = AssertSuccess(Recap.TransformExpr(Step));
+
+    IntegerLiteral *Zero = IntegerLiteral::Create(
+        Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {});
+    Expr *Dist;
+    if (Rel == BO_NE) {
+      // When using a != comparison, the increment can be +1 or -1. This can be
+      // dynamic at runtime, so we need to check for the direction.
+      Expr *IsNegStep =
+          AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_LT, NewStep, Zero));
+
+      // Positive increment.
+      Expr *ForwardRange = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStop, NewStart));
+      ForwardRange = AssertSuccess(
+          Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, ForwardRange));
+      Expr *ForwardDist = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Div, ForwardRange, NewStep));
+
+      // Negative increment.
+      Expr *BackwardRange = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStart, NewStop));
+      BackwardRange = AssertSuccess(
+          Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, BackwardRange));
+      Expr *NegIncAmount =
+          AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, NewStep));
+      Expr *BackwardDist = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Div, BackwardRange, NegIncAmount));
+
+      // Use the appropriate case.
+      Dist = AssertSuccess(Actions.ActOnConditionalOp(
+          {}, {}, IsNegStep, BackwardDist, ForwardDist));
+    } else {
+      assert((Rel == BO_LT || Rel == BO_LE || Rel == BO_GE || Rel == BO_GT) &&
+             "Expected one of these relational operators");
+
+      // We can derive the direction from any other comparison operator. It is
+      // non well-formed OpenMP if Step increments/decrements in the other
+      // directions. Whether at least the first iteration passes the loop
+      // condition.
+      Expr *HasAnyIteration = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, Rel, NewStart, NewStop));
+
+      // Compute the range between first and last counter value.
+      Expr *Range;
+      if (Rel == BO_GE || Rel == BO_GT)
+        Range = AssertSuccess(
+            Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStart, NewStop));
+      else
+        Range = AssertSuccess(
+            Actions.BuildBinOp(nullptr, {}, BO_Sub, NewStop, NewStart));
+
+      // Ensure unsigned range space.
+      Range =
+          AssertSuccess(Actions.BuildCStyleCastExpr({}, LogicalTSI, {}, Range));
+
+      if (Rel == BO_LE || Rel == BO_GE) {
+        // Add one to the range if the relational operator is inclusive.
+        Range =
+            AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range));
+      }
+
+      // Divide by the absolute step amount.
+      if (Rel == BO_GE || Rel == BO_GT)
+        NewStep =
+            AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, NewStep));
+      Dist = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Div, Range, NewStep));
+
+      // If there is not at least one iteration, the range contains garbage. Fix
+      // to zero in this case.
+      Dist = AssertSuccess(
+          Actions.ActOnConditionalOp({}, {}, HasAnyIteration, Dist, Zero));
+    }
+
+    // Assign the result to the out-parameter.
+    Body = AssertSuccess(Actions.BuildBinOp(Actions.getCurScope(), {},
+                                            BO_Assign, DistRef, Dist));
+  }
+  return cast<CapturedStmt>(
+      AssertSuccess(Actions.ActOnCapturedRegionEnd(Body)));
+}
+
+/// Create a closure that computes the loop variable from the logical iteration
+/// number.
+///
+/// \param Actions   The Sema object.
+/// \param LoopVarTy Type for the loop variable used for result value.
+/// \param LogicalTy Type for the logical iteration number.
+/// \param StartExpr Value of the loop counter at the first iteration.
+/// \param Step      Amount of increment after each iteration.
+/// \param Deref     Whether the loop variable is a dereference of the loop
+/// counter variable.
+///
+/// \return Closure (CapturedStmt) of the loop value calculation.
+static CapturedStmt *buildLoopVarFunc(Sema &Actions, QualType LoopVarTy,
+                                      QualType LogicalTy,
+                                      DeclRefExpr *StartExpr, Expr *Step,
+                                      bool Deref) {
+  ASTContext &Ctx = Actions.getASTContext();
+
+  // Pass the result as an out-parameter. Passing as return value would require
+  // the OpenMPIRBuilder to know additional C/C++ semantics, such as how to
+  // invoke a copy constructor.
+  QualType TargetParamTy = Ctx.getLValueReferenceType(LoopVarTy);
+  Sema::CapturedParamNameType Params[] = {{"LoopVar", TargetParamTy},
+                                          {"Logical", LogicalTy},
+                                          {StringRef(), QualType()}};
+  Actions.ActOnCapturedRegionStart({}, nullptr, CR_Default, Params);
+
+  // Capture the initial iterator which represents the LoopVar value at the
+  // zero's logical iteration. Since the original ForStmt/CXXRangeForStmt update
+  // it in every iteration, capture it by value before it is modified.
+  VarDecl *StartVar = cast<VarDecl>(StartExpr->getDecl());
+  bool Invalid = Actions.tryCaptureVariable(StartVar, {},
+                                            Sema::TryCapture_ExplicitByVal, {});
+  (void)Invalid;
+  assert(!Invalid && "Expecting capture-by-value to work.");
+
+  Expr *Body;
+  {
+    Sema::CompoundScopeRAII CompoundScope(Actions);
+    auto *CS = cast<CapturedDecl>(Actions.CurContext);
+
+    ImplicitParamDecl *TargetParam = CS->getParam(0);
+    DeclRefExpr *TargetRef = Actions.BuildDeclRefExpr(
+        TargetParam, LoopVarTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr);
+    ImplicitParamDecl *IndvarParam = CS->getParam(1);
+    DeclRefExpr *LogicalRef = Actions.BuildDeclRefExpr(
+        IndvarParam, LogicalTy, VK_LValue, {}, nullptr, nullptr, {}, nullptr);
+
+    // Capture the Start expression.
+    CaptureVars Recap(Actions);
+    Expr *NewStart = AssertSuccess(Recap.TransformExpr(StartExpr));
+    Expr *NewStep = AssertSuccess(Recap.TransformExpr(Step));
+
+    Expr *Skip = AssertSuccess(
+        Actions.BuildBinOp(nullptr, {}, BO_Mul, NewStep, LogicalRef));
+    // TODO: Explicitly cast to the iterator's difference_type instead of
+    // relying on implicit conversion.
+    Expr *Advanced =
+        AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, NewStart, Skip));
+
+    if (Deref) {
+      // For range-based for-loops convert the loop counter value to a concrete
+      // loop variable value by dereferencing the iterator.
+      Advanced =
+          AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Deref, Advanced));
+    }
+
+    // Assign the result to the output parameter.
+    Body = AssertSuccess(Actions.BuildBinOp(Actions.getCurScope(), {},
+                                            BO_Assign, TargetRef, Advanced));
+  }
+  return cast<CapturedStmt>(
+      AssertSuccess(Actions.ActOnCapturedRegionEnd(Body)));
+}
+
+StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) {
+  ASTContext &Ctx = getASTContext();
+
+  // Extract the common elements of ForStmt and CXXForRangeStmt:
+  // Loop variable, repeat condition, increment
+  Expr *Cond, *Inc;
+  VarDecl *CounterDecl, *LVDecl;
+  if (auto *For = dyn_cast<ForStmt>(AStmt)) {
+    Stmt *Init = For->getInit();
+    if (auto *LCVarDeclStmt = dyn_cast<DeclStmt>(Init)) {
+      // For statement declares loop variable.
+      CounterDecl = cast<VarDecl>(LCVarDeclStmt->getSingleDecl());
+    } else if (auto *LCAssign = cast<BinaryOperator>(Init)) {
+      // For statement reuses variable.
+      assert(LCAssign->getOpcode() == BO_Assign &&
+             "init part must be a loop variable assignment");
+      auto *CounterRef = cast<DeclRefExpr>(LCAssign->getLHS());
+      CounterDecl = cast<VarDecl>(CounterRef->getDecl());
+    } else
+      llvm_unreachable("Cannot determine loop variable");
+    LVDecl = CounterDecl;
+
+    Cond = For->getCond();
+    Inc = For->getInc();
+  } else if (auto *RangeFor = dyn_cast<CXXForRangeStmt>(AStmt)) {
+    DeclStmt *BeginStmt = RangeFor->getBeginStmt();
+    CounterDecl = cast<VarDecl>(BeginStmt->getSingleDecl());
+    LVDecl = RangeFor->getLoopVariable();
+
+    Cond = RangeFor->getCond();
+    Inc = RangeFor->getInc();
+  } else
+    llvm_unreachable("unhandled kind of loop");
+
+  QualType CounterTy = CounterDecl->getType();
+  QualType LVTy = LVDecl->getType();
+
+  // Analyze the loop condition.
+  Expr *LHS, *RHS;
+  BinaryOperator::Opcode CondRel;
+  Cond = Cond->IgnoreImplicit();
+  if (auto *CondBinExpr = dyn_cast<BinaryOperator>(Cond)) {
+    LHS = CondBinExpr->getLHS();
+    RHS = CondBinExpr->getRHS();
+    CondRel = CondBinExpr->getOpcode();
+  } else if (auto *CondCXXOp = dyn_cast<CXXOperatorCallExpr>(Cond)) {
+    assert(CondCXXOp->getOperator() == OO_ExclaimEqual &&
+           "Expected != loop condition for iterator-based loops");
+    assert(CondCXXOp->getNumArgs() == 2 && "Comparison should have 2 operands");
+    LHS = CondCXXOp->getArg(0);
+    RHS = CondCXXOp->getArg(1);
+    CondRel = BO_NE;
+  } else
+    llvm_unreachable("unexpected loop condition");
+
+  // Normalize such that the loop counter is on the LHS.
+  if (!isa<DeclRefExpr>(LHS->IgnoreImplicit()) ||
+      cast<DeclRefExpr>(LHS->IgnoreImplicit())->getDecl() != CounterDecl) {
+    std::swap(LHS, RHS);
+    CondRel = BinaryOperator::reverseComparisonOp(CondRel);
+  }
+  auto *CounterRef = cast<DeclRefExpr>(LHS->IgnoreImplicit());
+
+  // Decide the bit width for the logical iteration counter. By default use the
+  // unsigned ptrdiff_t integer size (for iterators and pointers).
+  // TODO: For iterators, use iterator::difference_type,
+  // std::iterator_traits<>::difference_type or decltype(it - end).
+  QualType LogicalTy = Ctx.getUnsignedPointerDiffType();
+  if (CounterTy->isIntegerType()) {
+    unsigned BitWidth = Ctx.getIntWidth(CounterTy);
+    LogicalTy = Ctx.getIntTypeForBitwidth(BitWidth, false);
+  }
+
+  // Analyze the loop increment.
+  Expr *Step;
+  if (auto *IncUn = dyn_cast<UnaryOperator>(Inc)) {
+    int Direction;
+    switch (IncUn->getOpcode()) {
+    case UO_PreInc:
+    case UO_PostInc:
+      Direction = 1;
+      break;
+    case UO_PreDec:
+    case UO_PostDec:
+      Direction = -1;
+      break;
+    default:
+      llvm_unreachable("unhandled unary increment operator");
+    }
+    Step = IntegerLiteral::Create(
+        Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), Direction), LogicalTy, {});
+  } else if (auto *IncBin = dyn_cast<BinaryOperator>(Inc)) {
+    if (IncBin->getOpcode() == BO_AddAssign) {
+      Step = IncBin->getRHS();
+    } else if (IncBin->getOpcode() == BO_SubAssign) {
+      Step =
+          AssertSuccess(BuildUnaryOp(nullptr, {}, UO_Minus, IncBin->getRHS()));
+    } else
+      llvm_unreachable("unhandled binary increment operator");
+  } else if (auto *CondCXXOp = dyn_cast<CXXOperatorCallExpr>(Inc)) {
+    switch (CondCXXOp->getOperator()) {
+    case OO_PlusPlus:
+      Step = IntegerLiteral::Create(
+          Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {});
+      break;
+    case OO_MinusMinus:
+      Step = IntegerLiteral::Create(
+          Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), -1), LogicalTy, {});
+      break;
+    case OO_PlusEqual:
+      Step = CondCXXOp->getArg(1);
+      break;
+    case OO_MinusEqual:
+      Step = AssertSuccess(
+          BuildUnaryOp(nullptr, {}, UO_Minus, CondCXXOp->getArg(1)));
+      break;
+    default:
+      llvm_unreachable("unhandled overloaded increment operator");
+    }
+  } else
+    llvm_unreachable("unknown increment expression");
+
+  CapturedStmt *DistanceFunc =
+      buildDistanceFunc(*this, LogicalTy, CondRel, LHS, RHS, Step);
+  CapturedStmt *LoopVarFunc = buildLoopVarFunc(
+      *this, LVTy, LogicalTy, CounterRef, Step, isa<CXXForRangeStmt>(AStmt));
+  DeclRefExpr *LVRef = BuildDeclRefExpr(LVDecl, LVDecl->getType(), VK_LValue,
+                                        {}, nullptr, nullptr, {}, nullptr);
+  if (auto *For = dyn_cast<ForStmt>(AStmt)) {
+    return ForStmt::create(
+        Ctx, For->getInit(), For->getCond(), For->getConditionVariable(),
+        For->getInc(), For->getBody(), For->getForLoc(), For->getLParenLoc(),
+        For->getRParenLoc(), DistanceFunc, LoopVarFunc, LVRef);
+  } else if (auto *For = dyn_cast<CXXForRangeStmt>(AStmt)) {
+    return CXXForRangeStmt::create(
+        Ctx, For->getInit(), For->getRangeStmt(), For->getBeginStmt(),
+        For->getEndStmt(), For->getCond(), For->getInc(), For->getLoopVarStmt(),
+        For->getBody(), For->getForLoc(), For->getCoawaitLoc(),
+        For->getColonLoc(), For->getRParenLoc(), DistanceFunc, LoopVarFunc,
+        LVRef);
+  } else
+    llvm_unreachable("unsupported");
+}
+
 StmtResult Sema::ActOnOpenMPExecutableDirective(
     OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
     OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -17263,18 +17263,17 @@
 
 
 /// Capture the given variable in the captured region.
-static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
-                                    VarDecl *Var,
-                                    SourceLocation Loc,
-                                    const bool BuildAndDiagnose,
-                                    QualType &CaptureType,
-                                    QualType &DeclRefType,
-                                    const bool RefersToCapturedVariable,
-                                    Sema &S, bool Invalid) {
+static bool captureInCapturedRegion(
+    CapturedRegionScopeInfo *RSI, VarDecl *Var, SourceLocation Loc,
+    const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType,
+    const bool RefersToCapturedVariable, Sema::TryCaptureKind Kind,
+    bool IsTopScope, Sema &S, bool Invalid) {
   // By default, capture variables by reference.
   bool ByRef = true;
-  // Using an LValue reference type is consistent with Lambdas (see below).
-  if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
+  if (IsTopScope && Kind != Sema::TryCapture_Implicit) {
+    ByRef = (Kind == Sema::TryCapture_ExplicitByRef);
+  } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
+    // Using an LValue reference type is consistent with Lambdas (see below).
     if (S.isOpenMPCapturedDecl(Var)) {
       bool HasConst = DeclRefType.isConstQualified();
       DeclRefType = DeclRefType.getUnqualifiedType();
@@ -17620,9 +17619,9 @@
                                DeclRefType, Nested, *this, Invalid);
       Nested = true;
     } else if (CapturedRegionScopeInfo *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
-      Invalid = !captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose,
-                                         CaptureType, DeclRefType, Nested,
-                                         *this, Invalid);
+      Invalid = !captureInCapturedRegion(
+          RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested,
+          Kind, /*IsTopScope*/ I == N - 1, *this, Invalid);
       Nested = true;
     } else {
       LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);
Index: clang/lib/Parse/ParseOpenMP.cpp
===================================================================
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2529,7 +2529,15 @@
       // the captured region. Code elsewhere assumes that any FunctionScopeInfo
       // should have at least one compound statement scope within it.
       ParsingOpenMPDirectiveRAII NormalScope(*this, /*Value=*/false);
-      AssociatedStmt = (Sema::CompoundScopeRAII(Actions), ParseStatement());
+      {
+        Sema::CompoundScopeRAII Scope(Actions);
+        AssociatedStmt = ParseStatement();
+
+        if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) &&
+            getLangOpts().OpenMPIRBuilder)
+          AssociatedStmt =
+              Actions.ActOnOpenMPCanonicalLoop(AssociatedStmt.get());
+      }
       AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
     } else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data ||
                DKind == OMPD_target_exit_data) {
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -50,6 +50,7 @@
 class SwitchInst;
 class Twine;
 class Value;
+class CanonicalLoopInfo;
 }
 
 namespace clang {
@@ -276,6 +277,18 @@
   // because of jumps.
   VarBypassDetector Bypasses;
 
+  /// List of recently emitted OMPCanonicalLoops.
+  ///
+  /// Since OMPCanonicalLoops are nested inside other statements (in particular
+  /// CapturedStmt generated by OMPExecutableDirective and non-perfectly nested
+  /// loops), we cannot directly call OMPEmitOMPCanonicalLoop and receive its
+  /// llvm::CanonicalLoopInfo. Instead, we call EmitStmt and any
+  /// OMPEmitOMPCanonicalLoop called by it will add its CanonicalLoopInfo to
+  /// this stack when done. Entering a new loop requires clearing this list; it
+  /// either means we start parsing an new loop nest or sequential loop that are
+  /// not nested in each other.
+  SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack;
+
   // CodeGen lambda for loops and support for ordered clause
   typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
                                   JumpDest)>
@@ -3498,6 +3511,18 @@
   static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
       CodeGenModule &CGM, StringRef ParentName,
       const OMPTargetTeamsDistributeParallelForDirective &S);
+
+  /// Emit the Stmt \p S and return its topmost canonical loop, if any.
+  /// TODO: The \p Depth paramter is not yet implemented and must be 1. In the
+  /// future it is meant to be the number of loops expected in the loop nests
+  /// (usually specified by the "collapse" clause) that are collapsed to a
+  /// single loop by this function.
+  llvm::CanonicalLoopInfo *EmitOMPCollapsedCanonicalLoopNest(const Stmt *S,
+                                                             int Depth);
+
+  /// Emit an MaybeCanonicalLoopStmt using the OpenMPIRBuilder.
+  void EmitOMPCanonicalLoop(const MaybeCanonicalLoopStmt *S);
+
   /// Emit inner loop of the worksharing/simd construct.
   ///
   /// \param S Directive, for which the inner loop must be emitted.
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1858,6 +1858,115 @@
   BreakContinueStack.pop_back();
 }
 
+using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
+
+/// Emit a captured statement and return the function as well as its captured
+/// closure context.
+static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
+                                             const CapturedStmt *S) {
+  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
+  CodeGenFunction CGF(ParentCGF.CGM, true);
+  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
+      std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
+  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
+
+  return {F, CapStruct.getPointer(ParentCGF)};
+}
+
+/// Emit a call to a previously captured closure.
+static llvm::CallInst *
+emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
+                     llvm::ArrayRef<llvm::Value *> Args) {
+  // Append the closure context to the argument.
+  SmallVector<llvm::Value *> EffectiveArgs;
+  EffectiveArgs.reserve(Args.size() + 1);
+  llvm::append_range(EffectiveArgs, Args);
+  EffectiveArgs.push_back(Cap.second);
+
+  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
+}
+
+llvm::CanonicalLoopInfo *
+CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
+  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
+
+  EmitStmt(S);
+
+  assert(OMPLoopNestStack.size() >= Depth && "Found too few loops");
+  return OMPLoopNestStack.front();
+}
+
+void CodeGenFunction::EmitOMPCanonicalLoop(const MaybeCanonicalLoopStmt *S) {
+  assert(S->getDistanceFunc());
+  assert(S->getLoopVarFunc());
+  assert(S->getLoopVarRef());
+
+  const Stmt *SyntacticalLoop = S;
+  LexicalScope ForScope(*this, S->getSourceRange());
+
+  // Emit init statements. The Distance/LoopVar funcs may reference variable
+  // declarations they contain.
+  const Stmt *BodyStmt;
+  if (auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
+    if (const Stmt *InitStmt = For->getInit())
+      EmitStmt(InitStmt);
+    BodyStmt = For->getBody();
+  } else if (auto *RangeFor = dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
+    if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
+      EmitStmt(RangeStmt);
+    if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
+      EmitStmt(BeginStmt);
+    if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
+      EmitStmt(EndStmt);
+    if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
+      EmitStmt(LoopVarStmt);
+    BodyStmt = RangeFor->getBody();
+  } else
+    llvm_unreachable("Expected for-stmt or range-based for-stmt");
+
+  // Emit closure for later use. By-value captures will be captured here.
+  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
+  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
+  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
+  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
+
+  // Call the distance function to get the number of iterations of the loop to
+  // come.
+  QualType LogicalTy = DistanceFunc->getCapturedDecl()
+                           ->getParam(0)
+                           ->getType()
+                           .getNonReferenceType();
+  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
+  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
+  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
+
+  // Emit the loop structure.
+  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+  llvm::CanonicalLoopInfo *CL =
+      OMPBuilder.createCanonicalLoop(Builder, {}, DistVal);
+
+  // Emit the loop body: Convert the logical iteration number to the loop
+  // variable and emit the body.
+  Builder.restoreIP(CL->getBodyIP());
+  const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
+  LValue LCVal = EmitLValue(LoopVarRef);
+  Address LoopVarAddress = LCVal.getAddress(*this);
+  emitCapturedStmtCall(*this, LoopVarClosure,
+                       {LoopVarAddress.getPointer(), CL->getIndVar()});
+  {
+    RunCleanupsScope BodyScope(*this);
+    EmitStmt(BodyStmt);
+  }
+
+  // Finish up the loop.
+  Builder.restoreIP(CL->getAfterIP());
+  ForScope.ForceCleanup();
+
+  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
+  OMPLoopNestStack.push_back(CL);
+}
+
 void CodeGenFunction::EmitOMPInnerLoop(
     const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
     const Expr *IncExpr,
@@ -1875,6 +1984,7 @@
   const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
   const Stmt *SS = ICS->getCapturedStmt();
   const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
+  OMPLoopNestStack.clear();
   if (AS)
     LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
                    AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
@@ -2424,6 +2534,7 @@
   llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
   EmitBlock(CondBlock);
   const SourceRange R = S.getSourceRange();
+  OMPLoopNestStack.clear();
   LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
                  SourceLocToDebugLoc(R.getEnd()));
 
@@ -2507,6 +2618,7 @@
   }
 
   EmitBranch(CondBlock);
+  OMPLoopNestStack.clear();
   LoopStack.pop();
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock());
@@ -3351,8 +3463,24 @@
 
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
-                                          PrePostActionTy &) {
+  auto &&CodeGen = [this, &S, &HasLastprivates](CodeGenFunction &CGF,
+                                                PrePostActionTy &) {
+    // Use the OpenMPIRBuilder if enabled.
+    if (CGM.getLangOpts().OpenMPIRBuilder) {
+      // Emit the associated statement and get its loop representation.
+      const Stmt *Inner = S.getRawStmt();
+      llvm::CanonicalLoopInfo *CLI =
+          EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
+
+      bool NeedsBarrer = !S.getSingleClause<OMPNowaitClause>();
+      llvm::OpenMPIRBuilder &OMPBuilder =
+          CGM.getOpenMPRuntime().getOMPBuilder();
+      llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+          AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+      OMPBuilder.createStaticWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrer);
+      return;
+    }
+
     HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
   };
   {
@@ -3363,9 +3491,11 @@
                                                 S.hasCancel());
   }
 
-  // Emit an implicit barrier at the end.
-  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+  if (!CGM.getLangOpts().OpenMPIRBuilder) {
+    // Emit an implicit barrier at the end.
+    if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
+      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+  }
   // Check for outer lastprivate conditional update.
   checkForLastprivateConditionalUpdate(*this, S);
 }
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -928,6 +928,10 @@
 
 void CodeGenFunction::EmitForStmt(const ForStmt &S,
                                   ArrayRef<const Attr *> ForAttrs) {
+  // If applicable, emit an OpenMP canonical loop instead.
+  if (getLangOpts().OpenMPIRBuilder && S.getDistanceFunc())
+    return EmitOMPCanonicalLoop(&S);
+
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   LexicalScope ForScope(*this, S.getSourceRange());
@@ -1042,6 +1046,10 @@
 void
 CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                                      ArrayRef<const Attr *> ForAttrs) {
+  // If applicable, emit an OpenMP canonical loop instead.
+  if (getLangOpts().OpenMPIRBuilder && S.getDistanceFunc())
+    return EmitOMPCanonicalLoop(&S);
+
   JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
 
   LexicalScope ForScope(*this, S.getSourceRange());
Index: clang/lib/AST/StmtCXX.cpp
===================================================================
--- clang/lib/AST/StmtCXX.cpp
+++ clang/lib/AST/StmtCXX.cpp
@@ -44,22 +44,39 @@
   std::copy(handlers.begin(), handlers.end(), Stmts + 1);
 }
 
-CXXForRangeStmt::CXXForRangeStmt(Stmt *Init, DeclStmt *Range,
-                                 DeclStmt *BeginStmt, DeclStmt *EndStmt,
-                                 Expr *Cond, Expr *Inc, DeclStmt *LoopVar,
-                                 Stmt *Body, SourceLocation FL,
-                                 SourceLocation CAL, SourceLocation CL,
-                                 SourceLocation RPL)
-    : Stmt(CXXForRangeStmtClass), ForLoc(FL), CoawaitLoc(CAL), ColonLoc(CL),
-      RParenLoc(RPL) {
-  SubExprs[INIT] = Init;
-  SubExprs[RANGE] = Range;
-  SubExprs[BEGINSTMT] = BeginStmt;
-  SubExprs[ENDSTMT] = EndStmt;
-  SubExprs[COND] = Cond;
-  SubExprs[INC] = Inc;
-  SubExprs[LOOPVAR] = LoopVar;
-  SubExprs[BODY] = Body;
+CXXForRangeStmt *
+CXXForRangeStmt::create(ASTContext &Ctx, Stmt *InitStmt, DeclStmt *Range,
+                        DeclStmt *Begin, DeclStmt *End, Expr *Cond, Expr *Inc,
+                        DeclStmt *LoopVar, Stmt *Body, SourceLocation FL,
+                        SourceLocation CAL, SourceLocation CL,
+                        SourceLocation RPL, CapturedStmt *DistanceFunc,
+                        CapturedStmt *LoopVarFunc, DeclRefExpr *LoopVarRef) {
+  CXXForRangeStmt *Result = createEmpty(Ctx);
+  Result->setInit(InitStmt);
+  Result->setRangeStmt(Range);
+  Result->setBeginStmt(Begin);
+  Result->setEndStmt(End);
+  Result->setCond(Cond);
+  Result->setInc(Inc);
+  Result->setLoopVarStmt(LoopVar);
+  Result->setBody(Body);
+  Result->ForLoc = FL;
+  Result->CoawaitLoc = CAL;
+  Result->ColonLoc = CL;
+  Result->RParenLoc = RPL;
+  Result->setDistanceFunc(DistanceFunc);
+  Result->setLoopVarFunc(LoopVarFunc);
+  Result->setLoopVarRef(LoopVarRef);
+  return Result;
+}
+
+CXXForRangeStmt *CXXForRangeStmt::createEmpty(ASTContext &Ctx) {
+  unsigned NumChildren = MaybeCanonicalLoopStmt::SubStmtCount + SubCount;
+  void *Mem =
+      Ctx.Allocate(sizeof(CXXForRangeStmt) + LoopChildren::size(NumChildren));
+  void *DataMem = (char *)Mem + sizeof(CXXForRangeStmt);
+  auto *Data = new (DataMem) LoopChildren(NumChildren);
+  return new (Mem) CXXForRangeStmt(Data);
 }
 
 Expr *CXXForRangeStmt::getRangeInit() {
Index: clang/lib/AST/Stmt.cpp
===================================================================
--- clang/lib/AST/Stmt.cpp
+++ clang/lib/AST/Stmt.cpp
@@ -995,36 +995,93 @@
   return !getCond()->EvaluateKnownConstInt(Ctx) ? getElse() : getThen();
 }
 
-ForStmt::ForStmt(const ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar,
-                 Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP,
-                 SourceLocation RP)
-  : Stmt(ForStmtClass), LParenLoc(LP), RParenLoc(RP)
-{
-  SubExprs[INIT] = Init;
-  setConditionVariable(C, condVar);
-  SubExprs[COND] = Cond;
-  SubExprs[INC] = Inc;
-  SubExprs[BODY] = Body;
-  ForStmtBits.ForLoc = FL;
+LoopChildren::LoopChildren(unsigned NumChildren) : NumChildren(NumChildren) {
+  for (unsigned i = 0; i < NumChildren; ++i)
+    getTrailingObjects<Stmt *>()[i] = nullptr;
+}
+
+Stmt::child_range LoopChildren::children() {
+  Stmt **Elts = getTrailingObjects<Stmt *>();
+
+  // For compatibility, hide the canonical loop sub-stmts if not present.
+  if (!Elts[NumChildren - 1] && !Elts[NumChildren - 2] &&
+      !Elts[NumChildren - 3])
+    return Stmt::child_range(&Elts[0], &Elts[NumChildren - 3]);
+
+  return Stmt::child_range(&Elts[0], &Elts[NumChildren]);
+}
+
+Stmt::const_child_range LoopChildren::children() const {
+  Stmt *const *Elts = getTrailingObjects<Stmt *>();
+
+  // For compatibility, hide the canonical loop sub-stmts if not present.
+  if (!Elts[NumChildren - 1] && !Elts[NumChildren - 2] &&
+      !Elts[NumChildren - 3])
+    return Stmt::const_child_range(&Elts[0], &Elts[NumChildren - 3]);
+
+  return Stmt::const_child_range(&Elts[0], &Elts[NumChildren]);
+}
+
+void MaybeCanonicalLoopStmt::setDistanceFunc(Stmt *S) {
+  assert(!S || isa<CapturedStmt>(S));
+  getCanonicalChildren()[DISTANCE_FUNC] = S;
+}
+
+void MaybeCanonicalLoopStmt::setLoopVarFunc(Stmt *S) {
+  assert(!S || isa<CapturedStmt>(S));
+  getCanonicalChildren()[LOOPVAR_FUNC] = S;
+}
+
+void MaybeCanonicalLoopStmt::setLoopVarRef(Expr *E) {
+  assert(!E || isa<DeclRefExpr>(E));
+  getCanonicalChildren()[LOOPVAR_REF] = E;
+}
+
+ForStmt *ForStmt::create(ASTContext &Ctx, Stmt *Init, Expr *Cond,
+                         VarDecl *condVar, Expr *Inc, Stmt *Body,
+                         SourceLocation FL, SourceLocation LP,
+                         SourceLocation RP, CapturedStmt *DistanceFunc,
+                         CapturedStmt *LoopVarFunc, DeclRefExpr *LoopVarRef) {
+  ForStmt *Result = createEmpty(Ctx);
+  Result->setInit(Init);
+  Result->setConditionVariable(Ctx, condVar);
+  Result->setCond(Cond);
+  Result->setInc(Inc);
+  Result->setBody(Body);
+  Result->setForLoc(FL);
+  Result->setLParenLoc(LP);
+  Result->setRParenLoc(RP);
+  Result->setDistanceFunc(DistanceFunc);
+  Result->setLoopVarFunc(LoopVarFunc);
+  Result->setLoopVarRef(LoopVarRef);
+  return Result;
+}
+
+ForStmt *ForStmt::createEmpty(ASTContext &Ctx) {
+  unsigned NumChildren = MaybeCanonicalLoopStmt::SubStmtCount + SubCount;
+  void *Mem = Ctx.Allocate(sizeof(ForStmt) + LoopChildren::size(NumChildren));
+  void *DataMem = (char *)Mem + sizeof(ForStmt);
+  auto *Data = new (DataMem) LoopChildren(NumChildren);
+  return new (Mem) ForStmt(Data);
 }
 
 VarDecl *ForStmt::getConditionVariable() const {
-  if (!SubExprs[CONDVAR])
+  if (!getConditionVariableDeclStmt())
     return nullptr;
 
-  auto *DS = cast<DeclStmt>(SubExprs[CONDVAR]);
+  auto *DS = cast<DeclStmt>(getChildren()[CONDVAR]);
   return cast<VarDecl>(DS->getSingleDecl());
 }
 
 void ForStmt::setConditionVariable(const ASTContext &C, VarDecl *V) {
   if (!V) {
-    SubExprs[CONDVAR] = nullptr;
+    getChildren()[CONDVAR] = nullptr;
     return;
   }
 
   SourceRange VarRange = V->getSourceRange();
-  SubExprs[CONDVAR] = new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(),
-                                       VarRange.getEnd());
+  getChildren()[CONDVAR] =
+      new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), VarRange.getEnd());
 }
 
 SwitchStmt::SwitchStmt(const ASTContext &Ctx, Stmt *Init, VarDecl *Var,
@@ -1266,13 +1323,6 @@
     break;
   case VCK_ByCopy:
     assert(Var && "capturing by copy must have a variable!");
-    assert(
-        (Var->getType()->isScalarType() || (Var->getType()->isReferenceType() &&
-                                            Var->getType()
-                                                ->castAs<ReferenceType>()
-                                                ->getPointeeType()
-                                                ->isScalarType())) &&
-        "captures by copy are expected to have a scalar type!");
     break;
   case VCK_VLAType:
     assert(!Var &&
Index: clang/lib/AST/ASTImporter.cpp
===================================================================
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -6246,10 +6246,9 @@
   if (Err)
     return std::move(Err);
 
-  return new (Importer.getToContext()) ForStmt(
-      Importer.getToContext(),
-      ToInit, ToCond, ToConditionVariable, ToInc, ToBody, ToForLoc, ToLParenLoc,
-      ToRParenLoc);
+  return ForStmt::create(Importer.getToContext(), ToInit, ToCond,
+                         ToConditionVariable, ToInc, ToBody, ToForLoc,
+                         ToLParenLoc, ToRParenLoc);
 }
 
 ExpectedStmt ASTNodeImporter::VisitGotoStmt(GotoStmt *S) {
@@ -6358,9 +6357,10 @@
   if (Err)
     return std::move(Err);
 
-  return new (Importer.getToContext()) CXXForRangeStmt(
-      ToInit, ToRangeStmt, ToBeginStmt, ToEndStmt, ToCond, ToInc, ToLoopVarStmt,
-      ToBody, ToForLoc, ToCoawaitLoc, ToColonLoc, ToRParenLoc);
+  return CXXForRangeStmt::create(Importer.getToContext(), ToInit, ToRangeStmt,
+                                 ToBeginStmt, ToEndStmt, ToCond, ToInc,
+                                 ToLoopVarStmt, ToBody, ToForLoc, ToCoawaitLoc,
+                                 ToColonLoc, ToRParenLoc);
 }
 
 ExpectedStmt
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -10479,6 +10479,11 @@
 
   /// Initialization of captured region for OpenMP region.
   void ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope);
+
+  /// Called for syntactical loops (ForStmt for CXXRangeForStmt) associated to
+  /// an OpenMP loop directive.
+  StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt);
+
   /// End of OpenMP region.
   ///
   /// \param S Statement associated with the current OpenMP region.
Index: clang/include/clang/Basic/StmtNodes.td
===================================================================
--- clang/include/clang/Basic/StmtNodes.td
+++ clang/include/clang/Basic/StmtNodes.td
@@ -13,7 +13,6 @@
 def SwitchStmt : StmtNode<Stmt>;
 def WhileStmt : StmtNode<Stmt>;
 def DoStmt : StmtNode<Stmt>;
-def ForStmt : StmtNode<Stmt>;
 def GotoStmt : StmtNode<Stmt>;
 def IndirectGotoStmt : StmtNode<Stmt>;
 def ContinueStmt : StmtNode<Stmt>;
@@ -25,6 +24,10 @@
 def DefaultStmt : StmtNode<SwitchCase>;
 def CapturedStmt : StmtNode<Stmt>;
 
+def MaybeCanonicalLoopStmt : StmtNode<Stmt, 1>;
+def ForStmt : StmtNode<MaybeCanonicalLoopStmt>;
+def CXXForRangeStmt : StmtNode<MaybeCanonicalLoopStmt>;
+
 // Statements that might produce a value (for example, as the last non-null
 // statement in a GNU statement-expression).
 def ValueStmt : StmtNode<Stmt, 1>;
@@ -48,7 +51,6 @@
 // C++ statements
 def CXXCatchStmt : StmtNode<Stmt>;
 def CXXTryStmt : StmtNode<Stmt>;
-def CXXForRangeStmt : StmtNode<Stmt>;
 
 // C++ Coroutines TS statements
 def CoroutineBodyStmt : StmtNode<Stmt>;
Index: clang/include/clang/AST/StmtCXX.h
===================================================================
--- clang/include/clang/AST/StmtCXX.h
+++ clang/include/clang/AST/StmtCXX.h
@@ -131,72 +131,87 @@
 /// This is stored in a partially-desugared form to allow full semantic
 /// analysis of the constituent components. The original syntactic components
 /// can be extracted using getLoopVariable and getRangeInit.
-class CXXForRangeStmt : public Stmt {
+class CXXForRangeStmt : public MaybeCanonicalLoopStmt {
+  enum {
+    INIT,
+    RANGE,
+    BEGINSTMT,
+    ENDSTMT,
+    COND,
+    INC,
+    LOOPVAR,
+    BODY,
+    LastSub = BODY
+  };
+  static constexpr unsigned SubCount = LastSub + 1;
+
   SourceLocation ForLoc;
-  enum { INIT, RANGE, BEGINSTMT, ENDSTMT, COND, INC, LOOPVAR, BODY, END };
-  // SubExprs[RANGE] is an expression or declstmt.
-  // SubExprs[COND] and SubExprs[INC] are expressions.
-  Stmt *SubExprs[END];
   SourceLocation CoawaitLoc;
   SourceLocation ColonLoc;
   SourceLocation RParenLoc;
 
   friend class ASTStmtReader;
-public:
-  CXXForRangeStmt(Stmt *InitStmt, DeclStmt *Range, DeclStmt *Begin,
-                  DeclStmt *End, Expr *Cond, Expr *Inc, DeclStmt *LoopVar,
-                  Stmt *Body, SourceLocation FL, SourceLocation CAL,
-                  SourceLocation CL, SourceLocation RPL);
-  CXXForRangeStmt(EmptyShell Empty) : Stmt(CXXForRangeStmtClass, Empty) { }
 
-  Stmt *getInit() { return SubExprs[INIT]; }
+private:
+  CXXForRangeStmt(LoopChildren *Data)
+      : MaybeCanonicalLoopStmt(CXXForRangeStmtClass, Data) {}
+
+public:
+  static CXXForRangeStmt *
+  create(ASTContext &Ctx, Stmt *InitStmt, DeclStmt *Range, DeclStmt *Begin,
+         DeclStmt *End, Expr *Cond, Expr *Inc, DeclStmt *LoopVar, Stmt *Body,
+         SourceLocation FL, SourceLocation CAL, SourceLocation CL,
+         SourceLocation RPL, CapturedStmt *DistanceFunc = nullptr,
+         CapturedStmt *LoopVarFunc = nullptr,
+         DeclRefExpr *LoopVarRef = nullptr);
+  static CXXForRangeStmt *createEmpty(ASTContext &Ctx);
+
+  Stmt *getInit() { return getChildren()[INIT]; }
   VarDecl *getLoopVariable();
   Expr *getRangeInit();
 
-  const Stmt *getInit() const { return SubExprs[INIT]; }
+  const Stmt *getInit() const { return getChildren()[INIT]; }
   const VarDecl *getLoopVariable() const;
   const Expr *getRangeInit() const;
 
-
-  DeclStmt *getRangeStmt() { return cast<DeclStmt>(SubExprs[RANGE]); }
+  DeclStmt *getRangeStmt() { return cast<DeclStmt>(getChildren()[RANGE]); }
   DeclStmt *getBeginStmt() {
-    return cast_or_null<DeclStmt>(SubExprs[BEGINSTMT]);
+    return cast_or_null<DeclStmt>(getChildren()[BEGINSTMT]);
+  }
+  DeclStmt *getEndStmt() {
+    return cast_or_null<DeclStmt>(getChildren()[ENDSTMT]);
   }
-  DeclStmt *getEndStmt() { return cast_or_null<DeclStmt>(SubExprs[ENDSTMT]); }
-  Expr *getCond() { return cast_or_null<Expr>(SubExprs[COND]); }
-  Expr *getInc() { return cast_or_null<Expr>(SubExprs[INC]); }
-  DeclStmt *getLoopVarStmt() { return cast<DeclStmt>(SubExprs[LOOPVAR]); }
-  Stmt *getBody() { return SubExprs[BODY]; }
+  Expr *getCond() { return cast_or_null<Expr>(getChildren()[COND]); }
+  Expr *getInc() { return cast_or_null<Expr>(getChildren()[INC]); }
+  DeclStmt *getLoopVarStmt() { return cast<DeclStmt>(getChildren()[LOOPVAR]); }
+  Stmt *getBody() { return getChildren()[BODY]; }
 
   const DeclStmt *getRangeStmt() const {
-    return cast<DeclStmt>(SubExprs[RANGE]);
+    return cast<DeclStmt>(getChildren()[RANGE]);
   }
   const DeclStmt *getBeginStmt() const {
-    return cast_or_null<DeclStmt>(SubExprs[BEGINSTMT]);
+    return cast_or_null<DeclStmt>(getChildren()[BEGINSTMT]);
   }
   const DeclStmt *getEndStmt() const {
-    return cast_or_null<DeclStmt>(SubExprs[ENDSTMT]);
+    return cast_or_null<DeclStmt>(getChildren()[ENDSTMT]);
   }
   const Expr *getCond() const {
-    return cast_or_null<Expr>(SubExprs[COND]);
-  }
-  const Expr *getInc() const {
-    return cast_or_null<Expr>(SubExprs[INC]);
+    return cast_or_null<Expr>(getChildren()[COND]);
   }
+  const Expr *getInc() const { return cast_or_null<Expr>(getChildren()[INC]); }
   const DeclStmt *getLoopVarStmt() const {
-    return cast<DeclStmt>(SubExprs[LOOPVAR]);
+    return cast<DeclStmt>(getChildren()[LOOPVAR]);
   }
-  const Stmt *getBody() const { return SubExprs[BODY]; }
+  const Stmt *getBody() const { return getChildren()[BODY]; }
 
-  void setInit(Stmt *S) { SubExprs[INIT] = S; }
-  void setRangeInit(Expr *E) { SubExprs[RANGE] = reinterpret_cast<Stmt*>(E); }
-  void setRangeStmt(Stmt *S) { SubExprs[RANGE] = S; }
-  void setBeginStmt(Stmt *S) { SubExprs[BEGINSTMT] = S; }
-  void setEndStmt(Stmt *S) { SubExprs[ENDSTMT] = S; }
-  void setCond(Expr *E) { SubExprs[COND] = reinterpret_cast<Stmt*>(E); }
-  void setInc(Expr *E) { SubExprs[INC] = reinterpret_cast<Stmt*>(E); }
-  void setLoopVarStmt(Stmt *S) { SubExprs[LOOPVAR] = S; }
-  void setBody(Stmt *S) { SubExprs[BODY] = S; }
+  void setInit(Stmt *S) { getChildren()[INIT] = S; }
+  void setRangeStmt(Stmt *S) { getChildren()[RANGE] = S; }
+  void setBeginStmt(Stmt *S) { getChildren()[BEGINSTMT] = S; }
+  void setEndStmt(Stmt *S) { getChildren()[ENDSTMT] = S; }
+  void setCond(Expr *E) { getChildren()[COND] = reinterpret_cast<Stmt *>(E); }
+  void setInc(Expr *E) { getChildren()[INC] = reinterpret_cast<Stmt *>(E); }
+  void setLoopVarStmt(Stmt *S) { getChildren()[LOOPVAR] = S; }
+  void setBody(Stmt *S) { getChildren()[BODY] = S; }
 
   SourceLocation getForLoc() const { return ForLoc; }
   SourceLocation getCoawaitLoc() const { return CoawaitLoc; }
@@ -205,21 +220,12 @@
 
   SourceLocation getBeginLoc() const LLVM_READONLY { return ForLoc; }
   SourceLocation getEndLoc() const LLVM_READONLY {
-    return SubExprs[BODY]->getEndLoc();
+    return getBody()->getEndLoc();
   }
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXForRangeStmtClass;
   }
-
-  // Iterators
-  child_range children() {
-    return child_range(&SubExprs[0], &SubExprs[END]);
-  }
-
-  const_child_range children() const {
-    return const_child_range(&SubExprs[0], &SubExprs[END]);
-  }
 };
 
 /// Representation of a Microsoft __if_exists or __if_not_exists
Index: clang/include/clang/AST/Stmt.h
===================================================================
--- clang/include/clang/AST/Stmt.h
+++ clang/include/clang/AST/Stmt.h
@@ -46,6 +46,7 @@
 class ASTContext;
 class Attr;
 class CapturedDecl;
+class CapturedStmt;
 class Decl;
 class Expr;
 class AddrLabelExpr;
@@ -2495,23 +2496,222 @@
   }
 };
 
+/// Implementation for holding the children of ForStmt or CXXForRangeStmt.
+class LoopChildren final : private llvm::TrailingObjects<LoopChildren, Stmt *> {
+  friend class ForStmt;
+  friend class CXXForRangeStmt;
+
+private:
+  unsigned NumChildren;
+
+  size_t numTrailingObjects(OverloadToken<Stmt *>) const { return NumChildren; }
+  static size_t size(unsigned NumChildren) {
+    return totalSizeToAlloc<Stmt *>(NumChildren);
+  }
+
+  LoopChildren() = delete;
+  LoopChildren(unsigned NumChildren);
+
+public:
+  /// Return this AST node's children.
+  /// @{
+  Stmt::child_range children();
+  Stmt::const_child_range children() const;
+
+  MutableArrayRef<Stmt *> getChildren() {
+    return {&getTrailingObjects<Stmt *>()[0], NumChildren};
+  }
+  ArrayRef<Stmt *> getChildren() const {
+    return {&getTrailingObjects<Stmt *>()[0], NumChildren};
+  }
+  /// @}
+}; // class LoopChildren
+
+/// Superclass for AST nodes that can be OpenMP canonical loops.
+///
+/// If the semantic analyzer determines that a ForStmt or CXXForRangeStmt is
+/// used in context of an OpenMP loop-associated directive (OMPLoopDirective)
+/// and fulfills the requirement of an OpenMP canonical loop, the AST node has
+/// three more properties: the distance function, the loop variable function and
+/// the loop variable reference. Otherweise, these members are NULL.
+///
+/// An OpenMP canonical loop is a for-statement or range-based for-statement
+/// with additional requirements that ensure that the number of iterations is
+/// known before entering the loop and allow skipping to an arbitrary iteration.
+/// The MaybeCanonicalLoopStmt AST node wraps a ForStmt or CXXRangeForStmt that
+/// is known for fulfill OpenMP's loop requirements.
+///
+/// There are three different kinds of iteration variables for different
+/// purposes:
+/// * Loop variable: The user-accessible variable with different value for each
+///   iteration.
+/// * Loop counter: The variable used to identify a loop iterations; for
+///   range-based for-statement, this is the hidden iterator '__begin'. For
+///   other loops, it is identical to the loop variable. Must be a random-access
+///   iterator or integer type.
+/// * Logical iteration counter: Normalized loop counter starting at 0 and
+///   incrementing by one at each iterations. Allows abstracting over the type
+///   of the loop counter and is always an unsigned integer type appropriate to
+///   represent the range of the loop counter variable.
+///
+/// This AST node provides two captured statements:
+/// * The distance function which computes the number of iterations.
+/// * The loop variable function that computes the loop variable when given a
+///   logical iteration number.
+///
+/// These captured statements provide the link between C/C++ semantics and the
+/// logical iteration counters used by the OpenMPIRBuilder which is
+/// language-agnostic and therefore does not know e.g. how to advance a
+/// random-access iterator. The OpenMPIRBuilder will use this information to
+/// convert the loop into simd-, workshare-, distribute-, taskloop etc. For
+/// compatibility with the non-OpenMPIRBuilder codegen path, an
+/// MaybeCanonicalLoopStmt can itself also be wrapped into the CapturedStmts of
+/// an OMPLoopDirective and skipped when searching for the associated
+/// syntactical loop.
+///
+/// Example:
+/// <code>
+///   std::vector<std::string> Container{1,2,3};
+///   for (std::string Str : Container)
+///      Body(Str);
+/// </code>
+/// which is syntactic sugar for approximately:
+/// <code>
+///   auto &&__range = Container;
+///   auto __begin = std::begin(__range);
+///   auto __end = std::end(__range);
+///   for (; __begin != __end; ++__begin) {
+///     std::String Str = *__begin;
+///     Body(Str);
+///   }
+/// </code>
+/// In this example, the loop variable is `Str`, the loop counter is `__begin`
+/// of type `std::vector<std::string>::iterator` and the logical iteration
+/// number type is `size_t` (unsigned version of
+/// `std::vector<std::string>::iterator::difference_type` aka `ptrdiff_t`).
+/// Therefore, the distance function will be
+/// <code>
+///   [&](size_t &Result) { Result = __end - __begin; }
+/// </code>
+///  and the loop variable function is
+/// <code>
+///   [&,__begin](std::vector<std::string>::iterator &Result, size_t Logical) {
+///   Result = __begin + Logical; }
+/// </code>
+class MaybeCanonicalLoopStmt
+    : public Stmt,
+      private llvm::TrailingObjects<MaybeCanonicalLoopStmt, Stmt *> {
+
+protected:
+  /// Children of this AST node.
+  enum { DISTANCE_FUNC, LOOPVAR_FUNC, LOOPVAR_REF, LastSubStmt = LOOPVAR_REF };
+  static constexpr unsigned SubStmtCount = LastSubStmt + 1;
+
+  LoopChildren *Data;
+
+  MaybeCanonicalLoopStmt() = delete;
+  MaybeCanonicalLoopStmt(StmtClass SC, LoopChildren *Data)
+      : Stmt(SC), Data(Data) {}
+
+  MutableArrayRef<Stmt *> getChildren() { return Data->getChildren(); }
+  ArrayRef<Stmt *> getChildren() const { return Data->getChildren(); }
+
+  MutableArrayRef<Stmt *> getCanonicalChildren() {
+    MutableArrayRef<Stmt *> AllChildren = Data->getChildren();
+    return Data->getChildren().slice(AllChildren.size() - SubStmtCount,
+                                     SubStmtCount);
+  }
+  ArrayRef<Stmt *> getCanonicalChildren() const {
+    ArrayRef<Stmt *> AllChildren = Data->getChildren();
+    return Data->getChildren().slice(AllChildren.size() - SubStmtCount,
+                                     SubStmtCount);
+  }
+
+public:
+  static bool classof(const Stmt *S) {
+    return StmtClass::firstMaybeCanonicalLoopStmtConstant <=
+               S->getStmtClass() &&
+           S->getStmtClass() <= StmtClass::lastMaybeCanonicalLoopStmtConstant;
+  }
+
+public:
+  /// Return this AST node's children.
+  /// @{
+  child_range children() { return Data->children(); }
+  const_child_range children() const { return Data->children(); }
+  /// @}
+
+  /// The function that computes the number of loop iterations. Can be evaluated
+  /// before entering the loop but after the syntactical loop's init
+  /// statement(s).
+  ///
+  /// Function signature: void(LogicalTy &Result)
+  /// Any values necessary to compute the distance are captures of the closure.
+  /// @{
+  CapturedStmt *getDistanceFunc() {
+    return cast_or_null<CapturedStmt>(getCanonicalChildren()[DISTANCE_FUNC]);
+  }
+  const CapturedStmt *getDistanceFunc() const {
+    return cast_or_null<CapturedStmt>(getCanonicalChildren()[DISTANCE_FUNC]);
+  }
+  void setDistanceFunc(Stmt *S);
+  /// @}
+
+  /// The function that compute the loop variable from a logical iteration
+  /// counter. Can be evaluated as first statement in the loop.
+  ///
+  /// Function signature: void(LoopVarTy &Result, LogicalTy Number)
+  /// Ayn other values required to compute the loop variable (such as start
+  /// value, step size) are captured by the closure. In particular, the initial
+  /// value of loop counter is captured by value to be unaffected by previous
+  /// iterations.
+  /// @{
+  CapturedStmt *getLoopVarFunc() {
+    return cast_or_null<CapturedStmt>(getCanonicalChildren()[LOOPVAR_FUNC]);
+  }
+  const CapturedStmt *getLoopVarFunc() const {
+    return cast_or_null<CapturedStmt>(getCanonicalChildren()[LOOPVAR_FUNC]);
+  }
+  void setLoopVarFunc(Stmt *S);
+  /// @}
+
+  /// Reference to the loop variable as accessed in the loop body.
+  /// @{
+  DeclRefExpr *getLoopVarRef() {
+    return reinterpret_cast<DeclRefExpr *>(getCanonicalChildren()[LOOPVAR_REF]);
+  }
+  const DeclRefExpr *getLoopVarRef() const {
+    return reinterpret_cast<const DeclRefExpr *>(
+        getCanonicalChildren()[LOOPVAR_REF]);
+  }
+  void setLoopVarRef(Expr *E);
+  /// @}
+};
+
 /// ForStmt - This represents a 'for (init;cond;inc)' stmt.  Note that any of
 /// the init/cond/inc parts of the ForStmt will be null if they were not
 /// specified in the source.
-class ForStmt : public Stmt {
-  enum { INIT, CONDVAR, COND, INC, BODY, END_EXPR };
-  Stmt* SubExprs[END_EXPR]; // SubExprs[INIT] is an expression or declstmt.
+class ForStmt : public MaybeCanonicalLoopStmt {
+  enum { INIT, CONDVAR, COND, INC, BODY, LastSub = BODY };
+  static const unsigned SubCount = LastSub + 1;
   SourceLocation LParenLoc, RParenLoc;
 
-public:
-  ForStmt(const ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar,
-          Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP,
-          SourceLocation RP);
-
+private:
   /// Build an empty for statement.
-  explicit ForStmt(EmptyShell Empty) : Stmt(ForStmtClass, Empty) {}
+  explicit ForStmt(LoopChildren *Data)
+      : MaybeCanonicalLoopStmt(ForStmtClass, Data) {}
+
+public:
+  static ForStmt *create(ASTContext &Ctx, Stmt *Init, Expr *Cond,
+                         VarDecl *condVar, Expr *Inc, Stmt *Body,
+                         SourceLocation FL, SourceLocation LP,
+                         SourceLocation RP,
+                         CapturedStmt *DistanceFunc = nullptr,
+                         CapturedStmt *LoopVarFunc = nullptr,
+                         DeclRefExpr *LoopVarRef = nullptr);
+  static ForStmt *createEmpty(ASTContext &Ctx);
 
-  Stmt *getInit() { return SubExprs[INIT]; }
+  Stmt *getInit() { return getChildren()[INIT]; }
 
   /// Retrieve the variable declared in this "for" statement, if any.
   ///
@@ -2527,22 +2727,26 @@
   /// If this ForStmt has a condition variable, return the faux DeclStmt
   /// associated with the creation of that condition variable.
   const DeclStmt *getConditionVariableDeclStmt() const {
-    return reinterpret_cast<DeclStmt*>(SubExprs[CONDVAR]);
+    return reinterpret_cast<DeclStmt *>(getChildren()[CONDVAR]);
   }
 
-  Expr *getCond() { return reinterpret_cast<Expr*>(SubExprs[COND]); }
-  Expr *getInc()  { return reinterpret_cast<Expr*>(SubExprs[INC]); }
-  Stmt *getBody() { return SubExprs[BODY]; }
+  Expr *getCond() { return reinterpret_cast<Expr *>(getChildren()[COND]); }
+  Expr *getInc() { return reinterpret_cast<Expr *>(getChildren()[INC]); }
+  Stmt *getBody() { return getChildren()[BODY]; }
 
-  const Stmt *getInit() const { return SubExprs[INIT]; }
-  const Expr *getCond() const { return reinterpret_cast<Expr*>(SubExprs[COND]);}
-  const Expr *getInc()  const { return reinterpret_cast<Expr*>(SubExprs[INC]); }
-  const Stmt *getBody() const { return SubExprs[BODY]; }
+  const Stmt *getInit() const { return getChildren()[INIT]; }
+  const Expr *getCond() const {
+    return reinterpret_cast<Expr *>(getChildren()[COND]);
+  }
+  const Expr *getInc() const {
+    return reinterpret_cast<Expr *>(getChildren()[INC]);
+  }
+  const Stmt *getBody() const { return getChildren()[BODY]; }
 
-  void setInit(Stmt *S) { SubExprs[INIT] = S; }
-  void setCond(Expr *E) { SubExprs[COND] = reinterpret_cast<Stmt*>(E); }
-  void setInc(Expr *E) { SubExprs[INC] = reinterpret_cast<Stmt*>(E); }
-  void setBody(Stmt *S) { SubExprs[BODY] = S; }
+  void setInit(Stmt *S) { getChildren()[INIT] = S; }
+  void setCond(Expr *E) { getChildren()[COND] = reinterpret_cast<Stmt *>(E); }
+  void setInc(Expr *E) { getChildren()[INC] = reinterpret_cast<Stmt *>(E); }
+  void setBody(Stmt *S) { getChildren()[BODY] = S; }
 
   SourceLocation getForLoc() const { return ForStmtBits.ForLoc; }
   void setForLoc(SourceLocation L) { ForStmtBits.ForLoc = L; }
@@ -2557,15 +2761,6 @@
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ForStmtClass;
   }
-
-  // Iterators
-  child_range children() {
-    return child_range(&SubExprs[0], &SubExprs[0]+END_EXPR);
-  }
-
-  const_child_range children() const {
-    return const_child_range(&SubExprs[0], &SubExprs[0] + END_EXPR);
-  }
 };
 
 /// GotoStmt - This represents a direct goto.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to