Author: abataev Date: Fri Aug 21 01:41:23 2015 New Revision: 245674 URL: http://llvm.org/viewvc/llvm-project?rev=245674&view=rev Log: [OPENMP 4.1] Improved codegen for 'uval' qualifier of 'linear' clause. According to standard the 'uval' modifier declares the address of the original list item to have an invariant value for all iterations of the associated loop(s). Patch improves codegen for this qualifier by removing usage of the original reference variable and replacing by referenced l-value.
Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp cfe/trunk/test/OpenMP/simd_codegen.cpp Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=245674&r1=245673&r2=245674&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original) +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Fri Aug 21 01:41:23 2015 @@ -6423,7 +6423,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc()); // Build var to save initial value. VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start"); - AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(), + Expr *InitExpr; + if (LinKind == OMPC_LINEAR_uval) + InitExpr = VD->getInit(); + else + InitExpr = DE; + AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(), /*DirectInit*/ false, /*TypeMayContainAuto*/ false); auto InitRef = buildDeclRefExpr( *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc()); @@ -6491,15 +6496,20 @@ static bool FinishOpenMPLinearClause(OMP bool HasErrors = false; auto CurInit = Clause.inits().begin(); auto CurPrivate = Clause.privates().begin(); + auto LinKind = Clause.getModifier(); for (auto &RefExpr : Clause.varlists()) { Expr *InitExpr = *CurInit; // Build privatized reference to the current linear var. auto DE = cast<DeclRefExpr>(RefExpr); - auto CapturedRef = - buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()), - DE->getType().getUnqualifiedType(), DE->getExprLoc(), - /*RefersToCapture=*/true); + Expr *CapturedRef; + if (LinKind == OMPC_LINEAR_uval) + CapturedRef = cast<VarDecl>(DE->getDecl())->getInit(); + else + CapturedRef = + buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()), + DE->getType().getUnqualifiedType(), DE->getExprLoc(), + /*RefersToCapture=*/true); // Build update: Var = InitExpr + IV * Step ExprResult Update = Modified: cfe/trunk/test/OpenMP/simd_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/simd_codegen.cpp?rev=245674&r1=245673&r2=245674&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/simd_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/simd_codegen.cpp Fri Aug 21 01:41:23 2015 @@ -481,6 +481,135 @@ void widened(float *a, float *b, float * // CHECK: ret void } +// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}}) +void linear(float *a) { + // CHECK: [[VAL_ADDR:%.+]] = alloca i64, + // CHECK: [[K_ADDR:%.+]] = alloca i64*, + long long val = 0; + long long &k = val; + + #pragma omp simd linear(k : 3) +// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]], +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] +// + + #pragma omp simd linear(val(k) : 3) +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]], +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] +// + #pragma omp simd linear(uval(k) : 3) +// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] +// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]] +// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] + +// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]] +// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 +// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] + for (int i = 10; i > 1; i--) { +// CHECK: [[SIMPLE_LOOP_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// FIXME: It is interesting, why the following "mul 1" was not constant folded? +// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 +// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] +// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// +// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 +// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 +// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] +// Update of the privatized version of linear variable! +// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] + a[k]++; + k = k + 3; +// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 +// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]] +// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] + } +// CHECK: [[SIMPLE_LOOP_END]] +// +// Update linear vars after loop, as the loop was operating on a private version. +// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] +// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 +// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]] +// +} + // TERM_DEBUG-LABEL: bar int bar() {return 0;}; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits