https://llvm.org/bugs/show_bug.cgi?id=27894
Bug ID: 27894 Summary: failure to convert FP addition loop into multiplication with fast-math Product: libraries Version: trunk Hardware: PC OS: All Status: NEW Severity: normal Priority: P Component: Loop Optimizer Assignee: unassignedb...@nondot.org Reporter: spatel+l...@rotateright.com CC: llvm-bugs@lists.llvm.org Classification: Unclassified Another example in the loop vectorizer code explosion series (bug 27881, bug 27826) can be seen with: float multiply_the_hard_way(int n) { float sum = 0.0f; for (int i=0; i<n; i++) sum += 7.0f; return sum; } ------------------------------------------------------------------------------ This may look ridiculous in simplified form, but the problem could easily exist in real code or slightly more real code like bug 27881. This may be considered a bug before it ever gets to the vectorizer as discussed recently here: http://lists.llvm.org/pipermail/llvm-dev/2016-May/099724.html Ie, if these were integers, we'd convert this into an imul. ------------------------------------------------------------------------------ $ ./clang -O2 multiply_the_hard_way.c -S -ffast-math -emit-llvm -o - ; ModuleID = 'multiply_the_hard_way.c' source_filename = "multiply_the_hard_way.c" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" ; Function Attrs: norecurse nounwind readnone ssp uwtable define float @multiple_the_hard_way(i32 %n) #0 { entry: %cmp5 = icmp sgt i32 %n, 0 br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry %min.iters.check = icmp ult i32 %n, 8 br i1 %min.iters.check, label %for.body.preheader15, label %min.iters.checked for.body.preheader15: ; preds = %middle.block, %min.iters.checked, %for.body.preheader %i.07.ph = phi i32 [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] %sum.06.ph = phi float [ 0.000000e+00, %min.iters.checked ], [ 0.000000e+00, %for.body.preheader ], [ %8, %middle.block ] br label %for.body min.iters.checked: ; preds = %for.body.preheader %n.vec = and i32 %n, -8 %cmp.zero = icmp eq i32 %n.vec, 0 br i1 %cmp.zero, label %for.body.preheader15, label %vector.body.preheader vector.body.preheader: ; preds = %min.iters.checked %0 = add i32 %n.vec, -8 %1 = lshr exact i32 %0, 3 %2 = add nuw nsw i32 %1, 1 %xtraiter = and i32 %2, 7 %3 = icmp ult i32 %0, 56 br i1 %3, label %middle.block.unr-lcssa, label %vector.body.preheader.new vector.body.preheader.new: ; preds = %vector.body.preheader %unroll_iter = sub nsw i32 %2, %xtraiter br label %vector.body vector.body: ; preds = %vector.body, %vector.body.preheader.new %vec.phi = phi <4 x float> [ zeroinitializer, %vector.body.preheader.new ], [ %4, %vector.body ] %vec.phi9 = phi <4 x float> [ zeroinitializer, %vector.body.preheader.new ], [ %5, %vector.body ] %niter = phi i32 [ %unroll_iter, %vector.body.preheader.new ], [ %niter.nsub.7, %vector.body ] %4 = fadd fast <4 x float> %vec.phi, <float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01> %5 = fadd fast <4 x float> %vec.phi9, <float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01> %niter.nsub.7 = add i32 %niter, -8 %niter.ncmp.7 = icmp eq i32 %niter.nsub.7, 0 br i1 %niter.ncmp.7, label %middle.block.unr-lcssa.loopexit, label %vector.body, !llvm.loop !2 middle.block.unr-lcssa.loopexit: ; preds = %vector.body %.lcssa20 = phi <4 x float> [ %5, %vector.body ] %.lcssa19 = phi <4 x float> [ %4, %vector.body ] br label %middle.block.unr-lcssa middle.block.unr-lcssa: ; preds = %middle.block.unr-lcssa.loopexit, %vector.body.preheader %.lcssa16.ph = phi <4 x float> [ undef, %vector.body.preheader ], [ %.lcssa20, %middle.block.unr-lcssa.loopexit ] %.lcssa.ph = phi <4 x float> [ undef, %vector.body.preheader ], [ %.lcssa19, %middle.block.unr-lcssa.loopexit ] %vec.phi.unr = phi <4 x float> [ zeroinitializer, %vector.body.preheader ], [ %.lcssa19, %middle.block.unr-lcssa.loopexit ] %vec.phi9.unr = phi <4 x float> [ zeroinitializer, %vector.body.preheader ], [ %.lcssa20, %middle.block.unr-lcssa.loopexit ] %lcmp.mod = icmp eq i32 %xtraiter, 0 br i1 %lcmp.mod, label %middle.block, label %vector.body.epil.preheader vector.body.epil.preheader: ; preds = %middle.block.unr-lcssa br label %vector.body.epil vector.body.epil: ; preds = %vector.body.epil, %vector.body.epil.preheader %vec.phi.epil = phi <4 x float> [ %6, %vector.body.epil ], [ %vec.phi.unr, %vector.body.epil.preheader ] %vec.phi9.epil = phi <4 x float> [ %7, %vector.body.epil ], [ %vec.phi9.unr, %vector.body.epil.preheader ] %epil.iter = phi i32 [ %epil.iter.sub, %vector.body.epil ], [ %xtraiter, %vector.body.epil.preheader ] %6 = fadd fast <4 x float> %vec.phi.epil, <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> %7 = fadd fast <4 x float> %vec.phi9.epil, <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00> %epil.iter.sub = add i32 %epil.iter, -1 %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0 br i1 %epil.iter.cmp, label %middle.block.epilog-lcssa, label %vector.body.epil, !llvm.loop !5 middle.block.epilog-lcssa: ; preds = %vector.body.epil %.lcssa22 = phi <4 x float> [ %7, %vector.body.epil ] %.lcssa21 = phi <4 x float> [ %6, %vector.body.epil ] br label %middle.block middle.block: ; preds = %middle.block.unr-lcssa, %middle.block.epilog-lcssa %.lcssa16 = phi <4 x float> [ %.lcssa16.ph, %middle.block.unr-lcssa ], [ %.lcssa22, %middle.block.epilog-lcssa ] %.lcssa = phi <4 x float> [ %.lcssa.ph, %middle.block.unr-lcssa ], [ %.lcssa21, %middle.block.epilog-lcssa ] %bin.rdx = fadd fast <4 x float> %.lcssa16, %.lcssa %rdx.shuf = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> %bin.rdx12 = fadd fast <4 x float> %bin.rdx, %rdx.shuf %rdx.shuf13 = shufflevector <4 x float> %bin.rdx12, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %bin.rdx14 = fadd fast <4 x float> %bin.rdx12, %rdx.shuf13 %8 = extractelement <4 x float> %bin.rdx14, i32 0 %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader15 for.cond.cleanup.loopexit: ; preds = %for.body %add.lcssa = phi float [ %add, %for.body ] br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %middle.block, %entry %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %8, %middle.block ], [ %add.lcssa, %for.cond.cleanup.loopexit ] ret float %sum.0.lcssa for.body: ; preds = %for.body.preheader15, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ %i.07.ph, %for.body.preheader15 ] %sum.06 = phi float [ %add, %for.body ], [ %sum.06.ph, %for.body.preheader15 ] %add = fadd fast float %sum.06, 7.000000e+00 %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, %n br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !7 } attributes #0 = { norecurse nounwind readnone ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"PIC Level", i32 2} !1 = !{!"clang version 3.9.0 (trunk 270847)"} !2 = distinct !{!2, !3, !4} !3 = !{!"llvm.loop.vectorize.width", i32 1} !4 = !{!"llvm.loop.interleave.count", i32 1} !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.unroll.disable"} !7 = distinct !{!7, !8, !3, !4} !8 = !{!"llvm.loop.unroll.runtime.disable"} -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs