https://github.com/xiongzile updated https://github.com/llvm/llvm-project/pull/194671
>From f5373d5cb694cba3a917571dae3d3626c0654ec3 Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Wed, 29 Apr 2026 00:25:23 +0800 Subject: [PATCH 1/6] [VPlan] Collect FOR PHIs before sinking/hoisting recurrence users Avoid iterating over HeaderVPBB->phis() while potentially mutating the underlying VPBasicBlock. Collect all VPFirstOrderRecurrencePHIRecipe instances first, then process them in a separate loop. This prevents iterator invalidation when sinking or hoisting recurrence users, and makes the transformation more robust. Fixes: https://github.com/llvm/llvm-project/issues/194618 --- llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 391c358b22fa3..3a268fa667416 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -864,11 +864,13 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, /// fails. static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, VPDominatorTree &VPDT) { - for (VPRecipeBase &R : HeaderVPBB->phis()) { - auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R); - if (!FOR) - continue; + SmallVector<VPFirstOrderRecurrencePHIRecipe *> FORs; + + for (VPRecipeBase &R : HeaderVPBB->phis()) + if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) + FORs.push_back(FOR); + for (VPFirstOrderRecurrencePHIRecipe *FOR : FORs) { // Follow through FOR phi chains to find the actual Previous recipe. // Fixed-order recurrences do not contain cycles, so this loop is // guaranteed to terminate. >From 8e1d993e4f971130b1d7d361cf7dcba34896cd12 Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Wed, 29 Apr 2026 01:45:45 +0800 Subject: [PATCH 2/6] [VPlan] add test for crash --- .../LoopVectorize/VPlan/for-phi-ordering.ll | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll new file mode 100644 index 0000000000000..b2342e00a062e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -vplan-print-after=printOptimizedVPlan -disable-output 2>&1 | FileCheck %s +@b = global i32 0, align 4 +@c = global i64 0, align 8 +@g = global i32 0, align 4 +@h = global i32 0, align 4 +@f = global i32 0, align 4 +@d = global i64 0, align 8 + +define i32 @main() { +; CHECK-LABEL: VPlan for loop in 'main' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF +; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<entry>: +; CHECK-NEXT: IR %.pr = load i32, ptr @b, align 4 +; CHECK-NEXT: IR %h = load i32, ptr @h, align 4 +; CHECK-NEXT: IR %sext = shl i32 %h, 24 +; CHECK-NEXT: IR %tobool9 = icmp ne i32 %sext, 0 +; CHECK-NEXT: IR %conv8 = ashr exact i32 %sext, 24 +; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 + (-1 * (0 smin %.pr)) + %.pr) +; CHECK-NEXT: Successor(s): scalar.ph, vector.ph +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = DERIVED-IV ir<%.pr> + vp<[[VP2]]> * ir<-1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<%.pr>, ir<-1>, vp<[[VP0]]> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%rec> = phi ir<0>, ir<%dec> +; CHECK-NEXT: WIDEN ir<%dec> = add nsw ir<%iv>, ir<-1> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: WIDEN ir<%tobool7> = icmp ne ir<%iv>, ir<0> +; CHECK-NEXT: WIDEN ir<%or.cond> = select ir<%tobool7>, ir<%tobool9>, ir<false> +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = select ir<%or.cond>, ir<1>, ir<%conv8> +; CHECK-NEXT: WIDEN ir<%rem> = srem ir<%iv>, vp<[[VP7]]> +; CHECK-NEXT: BLEND ir<%cond> = ir<%rem> ir<0>/ir<%or.cond> +; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = first-order splice ir<%rec>, ir<%dec> +; CHECK-NEXT: WIDEN-CAST ir<%conv> = zext vp<[[VP8]]> to i64 +; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%dec> +; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = extract-last-part ir<%cond> +; CHECK-NEXT: EMIT vp<[[VP10:%[0-9]+]]> = extract-last-lane vp<[[VP9]]> +; CHECK-NEXT: EMIT vp<[[VP11:%[0-9]+]]> = extract-last-part ir<%conv> +; CHECK-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = extract-last-lane vp<[[VP11]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> +; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> +; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %cond.lcssa = phi i32 [ %cond, %cond.end ] (extra operand: vp<[[VP10]]> from middle.block) +; CHECK-NEXT: IR %dec.lcssa = phi i32 [ %dec, %cond.end ] (extra operand: vp<[[VP4]]> from middle.block) +; CHECK-NEXT: IR %rec.lcssa = phi i32 [ %rec, %cond.end ] (extra operand: vp<%vector.recur.extract.for.phi> from middle.block) +; CHECK-NEXT: IR %conv.lcssa = phi i64 [ %conv, %cond.end ] (extra operand: vp<[[VP12]]> from middle.block) +; CHECK-NEXT: IR %conv11 = sext i32 %cond.lcssa to i64 +; CHECK-NEXT: IR store i64 %conv.lcssa, ptr @c, align 8 +; CHECK-NEXT: IR store i32 %rec.lcssa, ptr @g, align 4 +; CHECK-NEXT: IR store i32 %cond.lcssa, ptr @f, align 4 +; CHECK-NEXT: IR store i64 %conv11, ptr @d, align 8 +; CHECK-NEXT: IR store i32 %dec.lcssa, ptr @b, align 4 +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP4]]>, middle.block ], [ ir<%.pr>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<[[VP4]]>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb<loop>: +; CHECK-NEXT: IR %iv = phi i32 [ %.pr, %entry ], [ %dec, %cond.end ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %rec = phi i32 [ 0, %entry ], [ %dec, %cond.end ] (extra operand: vp<%scalar.recur.init> from scalar.ph) +; CHECK-NEXT: IR %conv = zext i32 %rec to i64 +; CHECK-NEXT: IR %tobool7 = icmp ne i32 %iv, 0 +; CHECK-NEXT: IR %or.cond = select i1 %tobool7, i1 %tobool9, i1 false +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + %.pr = load i32, ptr @b, align 4 + %h = load i32, ptr @h, align 4 + %sext = shl i32 %h, 24 + %tobool9 = icmp ne i32 %sext, 0 + %conv8 = ashr exact i32 %sext, 24 + br label %loop + +loop: + %iv = phi i32 [ %.pr, %entry ], [ %dec, %cond.end ] + %rec = phi i32 [ 0, %entry ], [ %dec, %cond.end ] + %conv = zext i32 %rec to i64 + %tobool7 = icmp ne i32 %iv, 0 + %or.cond = select i1 %tobool7, i1 %tobool9, i1 false + br i1 %or.cond, label %cond.end, label %cond.false + +cond.false: + %rem = srem i32 %iv, %conv8 + br label %cond.end + +cond.end: + %cond = phi i32 [ %rem, %cond.false ], [ 0, %loop ] + %dec = add nsw i32 %iv, -1 + %cmp = icmp sgt i32 %iv, 0 + br i1 %cmp, label %loop, label %exit + +exit: + %conv11 = sext i32 %cond to i64 + store i64 %conv, ptr @c, align 8 + store i32 %rec, ptr @g, align 4 + store i32 %cond, ptr @f, align 4 + store i64 %conv11, ptr @d, align 8 + store i32 %dec, ptr @b, align 4 + ret i32 0 +} >From bfe1735d1a08ee8aab728f65006ea79f38c6b91d Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Wed, 29 Apr 2026 22:58:31 +0800 Subject: [PATCH 3/6] [VPlan] add test(v2) --- .../LoopVectorize/VPlan/for-phi-ordering.ll | 75 ++++--------------- 1 file changed, 14 insertions(+), 61 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll index b2342e00a062e..5a78298e093fd 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll @@ -1,79 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -vplan-print-after=printOptimizedVPlan -disable-output 2>&1 | FileCheck %s -@b = global i32 0, align 4 -@c = global i64 0, align 8 -@g = global i32 0, align 4 -@h = global i32 0, align 4 -@f = global i32 0, align 4 -@d = global i64 0, align 8 -define i32 @main() { -; CHECK-LABEL: VPlan for loop in 'main' +define i32 @test(i32 %.pr, i1 %tobool9, i32 %conv8) { +; CHECK-LABEL: VPlan for loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF -; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count -; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count +; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = vector-trip-count +; CHECK-NEXT: vp<[[VP2:%[0-9]+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: IR %.pr = load i32, ptr @b, align 4 -; CHECK-NEXT: IR %h = load i32, ptr @h, align 4 -; CHECK-NEXT: IR %sext = shl i32 %h, 24 -; CHECK-NEXT: IR %tobool9 = icmp ne i32 %sext, 0 -; CHECK-NEXT: IR %conv8 = ashr exact i32 %sext, 24 -; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 + (-1 * (0 smin %.pr)) + %.pr) +; CHECK-NEXT: EMIT vp<[[VP2]]> = EXPAND SCEV (1 + (-1 * (0 smin %.pr)) + %.pr) ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = DERIVED-IV ir<%.pr> + vp<[[VP2]]> * ir<-1> +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = DERIVED-IV ir<%.pr> + vp<[[VP1]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { -; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = CANONICAL-IV +; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = CANONICAL-IV ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<%.pr>, ir<-1>, vp<[[VP0]]> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%rec> = phi ir<0>, ir<%dec> -; CHECK-NEXT: WIDEN ir<%dec> = add nsw ir<%iv>, ir<-1> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP0]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP1]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: WIDEN ir<%tobool7> = icmp ne ir<%iv>, ir<0> -; CHECK-NEXT: WIDEN ir<%or.cond> = select ir<%tobool7>, ir<%tobool9>, ir<false> -; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = select ir<%or.cond>, ir<1>, ir<%conv8> -; CHECK-NEXT: WIDEN ir<%rem> = srem ir<%iv>, vp<[[VP7]]> -; CHECK-NEXT: BLEND ir<%cond> = ir<%rem> ir<0>/ir<%or.cond> -; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = first-order splice ir<%rec>, ir<%dec> -; CHECK-NEXT: WIDEN-CAST ir<%conv> = zext vp<[[VP8]]> to i64 -; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%dec> -; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = extract-last-part ir<%cond> -; CHECK-NEXT: EMIT vp<[[VP10:%[0-9]+]]> = extract-last-lane vp<[[VP9]]> -; CHECK-NEXT: EMIT vp<[[VP11:%[0-9]+]]> = extract-last-part ir<%conv> -; CHECK-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = extract-last-lane vp<[[VP11]]> -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP2]]>, vp<[[VP1]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %cond.lcssa = phi i32 [ %cond, %cond.end ] (extra operand: vp<[[VP10]]> from middle.block) -; CHECK-NEXT: IR %dec.lcssa = phi i32 [ %dec, %cond.end ] (extra operand: vp<[[VP4]]> from middle.block) -; CHECK-NEXT: IR %rec.lcssa = phi i32 [ %rec, %cond.end ] (extra operand: vp<%vector.recur.extract.for.phi> from middle.block) -; CHECK-NEXT: IR %conv.lcssa = phi i64 [ %conv, %cond.end ] (extra operand: vp<[[VP12]]> from middle.block) -; CHECK-NEXT: IR %conv11 = sext i32 %cond.lcssa to i64 -; CHECK-NEXT: IR store i64 %conv.lcssa, ptr @c, align 8 -; CHECK-NEXT: IR store i32 %rec.lcssa, ptr @g, align 4 -; CHECK-NEXT: IR store i32 %cond.lcssa, ptr @f, align 4 -; CHECK-NEXT: IR store i64 %conv11, ptr @d, align 8 -; CHECK-NEXT: IR store i32 %dec.lcssa, ptr @b, align 4 ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP4]]>, middle.block ], [ ir<%.pr>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<[[VP4]]>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP3]]>, middle.block ], [ ir<%.pr>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<[[VP3]]>, middle.block ], [ ir<0>, ir-bb<entry> ] ; CHECK-NEXT: Successor(s): ir-bb<loop> ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<loop>: @@ -86,11 +49,6 @@ define i32 @main() { ; CHECK-NEXT: } ; entry: - %.pr = load i32, ptr @b, align 4 - %h = load i32, ptr @h, align 4 - %sext = shl i32 %h, 24 - %tobool9 = icmp ne i32 %sext, 0 - %conv8 = ashr exact i32 %sext, 24 br label %loop loop: @@ -112,11 +70,6 @@ cond.end: br i1 %cmp, label %loop, label %exit exit: - %conv11 = sext i32 %cond to i64 - store i64 %conv, ptr @c, align 8 - store i32 %rec, ptr @g, align 4 - store i32 %cond, ptr @f, align 4 - store i64 %conv11, ptr @d, align 8 - store i32 %dec, ptr @b, align 4 ret i32 0 } + >From 208ed4455b2a3ee1099538db5218f60ec39ca680 Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Fri, 1 May 2026 13:37:09 +0800 Subject: [PATCH 4/6] [VPlan] improve test --- .../LoopVectorize/VPlan/for-phi-ordering.ll | 81 +++++++++++-------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll index 5a78298e093fd..0d7f2819a6316 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll @@ -1,75 +1,86 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -vplan-print-after=printOptimizedVPlan -disable-output 2>&1 | FileCheck %s - -define i32 @test(i32 %.pr, i1 %tobool9, i32 %conv8) { +define i32 @test(i32 %limit, i1 %guard, i32 %divisor) { ; CHECK-LABEL: VPlan for loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = vector-trip-count -; CHECK-NEXT: vp<[[VP2:%[0-9]+]]> = original trip-count +; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF +; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<entry>: -; CHECK-NEXT: EMIT vp<[[VP2]]> = EXPAND SCEV (1 + (-1 * (0 smin %.pr)) + %.pr) +; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 + (0 smax %limit))<nuw> ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = DERIVED-IV ir<%.pr> + vp<[[VP1]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { ; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = CANONICAL-IV ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP0]]> -; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP1]]> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP2]]>, vp<[[VP1]]> +; CHECK-NEXT: WIDEN ir<%prev.iv> = add nsw ir<%iv>, ir<-1> +; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%prev.iv> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = extract-last-part ir<%prev.iv> +; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[VP6]]> +; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<exit>: +; CHECK-NEXT: IR %rec.lcssa = phi i32 [ %rec, %loop.latch ] (extra operand: vp<%vector.recur.extract.for.phi> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP3]]>, middle.block ], [ ir<%.pr>, ir-bb<entry> ] -; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<[[VP3]]>, middle.block ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: Successor(s): ir-bb<loop> +; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP2]]>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<%vector.recur.extract>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: Successor(s): ir-bb<loop.header> ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb<loop>: -; CHECK-NEXT: IR %iv = phi i32 [ %.pr, %entry ], [ %dec, %cond.end ] (extra operand: vp<%bc.resume.val> from scalar.ph) -; CHECK-NEXT: IR %rec = phi i32 [ 0, %entry ], [ %dec, %cond.end ] (extra operand: vp<%scalar.recur.init> from scalar.ph) +; CHECK-NEXT: ir-bb<loop.header>: +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph) +; CHECK-NEXT: IR %rec = phi i32 [ 0, %entry ], [ %prev.iv, %loop.latch ] (extra operand: vp<%scalar.recur.init> from scalar.ph) ; CHECK-NEXT: IR %conv = zext i32 %rec to i64 -; CHECK-NEXT: IR %tobool7 = icmp ne i32 %iv, 0 -; CHECK-NEXT: IR %or.cond = select i1 %tobool7, i1 %tobool9, i1 false +; CHECK-NEXT: IR %iv.nonzero = icmp ne i32 %iv, 0 +; CHECK-NEXT: IR %or.cond = select i1 %iv.nonzero, i1 %guard, i1 false ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: - br label %loop + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %rec = phi i32 [ 0, %entry ], [ %prev.iv, %loop.latch ] -loop: - %iv = phi i32 [ %.pr, %entry ], [ %dec, %cond.end ] - %rec = phi i32 [ 0, %entry ], [ %dec, %cond.end ] + ; move %conv = zext i32 %rec to i64 - %tobool7 = icmp ne i32 %iv, 0 - %or.cond = select i1 %tobool7, i1 %tobool9, i1 false - br i1 %or.cond, label %cond.end, label %cond.false -cond.false: - %rem = srem i32 %iv, %conv8 - br label %cond.end + %iv.nonzero = icmp ne i32 %iv, 0 + %or.cond = select i1 %iv.nonzero, i1 %guard, i1 false + br i1 %or.cond, label %loop.latch, label %loop.rem + +loop.rem: + %rem = srem i32 %iv, %divisor + br label %loop.latch -cond.end: - %cond = phi i32 [ %rem, %cond.false ], [ 0, %loop ] - %dec = add nsw i32 %iv, -1 - %cmp = icmp sgt i32 %iv, 0 - br i1 %cmp, label %loop, label %exit +loop.latch: + %cond = phi i32 [ %rem, %loop.rem ], [ 0, %loop.header ] + + ; after + %prev.iv = add nsw i32 %iv, -1 + + %iv.next = add nuw nsw i32 %iv, 1 + %cmp = icmp slt i32 %iv, %limit + br i1 %cmp, label %loop.header, label %exit exit: - ret i32 0 + ret i32 %rec } - >From 2a2249b682b67760e9d8147e2cd62f9a5947bcb1 Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Fri, 1 May 2026 13:50:24 +0800 Subject: [PATCH 5/6] [VPlan] test: FOT is not the final phi --- .../test/CIR/CodeGenCXX/relative-vtables.cpp | 242 ++++++++++++++++++ ...ordering.ll => for-phi-ordering-strict.ll} | 35 ++- 2 files changed, 264 insertions(+), 13 deletions(-) create mode 100644 clang/test/CIR/CodeGenCXX/relative-vtables.cpp rename llvm/test/Transforms/LoopVectorize/VPlan/{for-phi-ordering.ll => for-phi-ordering-strict.ll} (63%) diff --git a/clang/test/CIR/CodeGenCXX/relative-vtables.cpp b/clang/test/CIR/CodeGenCXX/relative-vtables.cpp new file mode 100644 index 0000000000000..381df8ad5ced9 --- /dev/null +++ b/clang/test/CIR/CodeGenCXX/relative-vtables.cpp @@ -0,0 +1,242 @@ +// TODO by @Elio + +namespace { + struct A { + virtual void f() { } + }; +} + +void f() { A b; } + +struct B { + B(); + virtual void f(); +}; + +B::B() { } + +struct C : virtual B { + C(); + virtual void f() { } +}; + +C::C() { } + +struct D { + virtual void f(); +}; + +void D::f() { } + +static struct : D { } e; + +// Force 'e' to be constructed and therefore have a vtable defined. +void use_e() { + e.f(); +} + +// The destructor is the key function. +template<typename T> +struct E { + virtual ~E(); +}; + +template<typename T> E<T>::~E() { } + +// Anchor is the key function +template<> +struct E<char> { + virtual void anchor(); +}; + +void E<char>::anchor() { } + +template struct E<short>; +extern template struct E<int>; + +void use_E() { + E<int> ei; + (void)ei; + E<long> el; + (void)el; +} + +// No key function +template<typename T> +struct F { + virtual void foo() { } +}; + +// No key function +template<> +struct F<char> { + virtual void foo() { } +}; + +template struct F<short>; +extern template struct F<int>; + +void use_F() { + F<char> fc; + fc.foo(); + F<int> fi; + fi.foo(); + F<long> fl; + (void)fl; +} + +// B has a key function that is not defined in this translation unit so its vtable +// has external linkage. +// CIR-DAG: cir.global "private" external @_ZTV1B : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1B = external {{.*}}{ [3 x ptr] }, align 8 + +// C has no key function, so its vtable should have weak_odr linkage +// and hidden visibility. +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1C = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1C> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1C1fEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global linkonce_odr comdat @_ZTS1C = #cir.const_array<"1C" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i x 3> {alignment = 1 : i64} +// CIR-DAG: cir.global constant linkonce_odr comdat @_ZTI1C = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1C> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<1> : !u32i, #cir.global_view<@_ZTI1B> : !cir.ptr<!u8i>, #cir.int<-8189> : !s64i}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global linkonce_odr comdat @_ZTT1C = #cir.const_array<[#cir.global_view<@_ZTV1C, [0 : i32, 4 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTV1C, [0 : i32, 4 : i32]> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 2> {alignment = 8 : i64} +// LLVM-DAG: @_ZTV1C = linkonce_odr {{.*}}{ [5 x ptr] } { [5 x ptr] [ptr null, ptr null, ptr null, ptr @_ZTI1C, ptr @_ZN1C1fEv] }, comdat, align 8 +// LLVM-DAG: @_ZTS1C = linkonce_odr {{.*}}[{{[0-9]}} x i8] c"1C\00", comdat, align 1 +// LLVM-DAG: @_ZTI1C = linkonce_odr {{.*}}{ ptr, ptr, i32, i32, ptr, i64 } { ptr getelementptr{{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 {{.*}}), ptr @_ZTS1C, i32 0, i32 1, ptr @_ZTI1B, i64 -8189 }, comdat +// LLVM-DAG: @_ZTT1C = linkonce_odr {{.*}}[2 x ptr] [ptr getelementptr inbounds{{.*}}({{.*}}, ptr @_ZTV1C, {{.*}}, ptr getelementptr inbounds {{.*}}({{.*}}, ptr @_ZTV1C{{.*}})] + +// D has a key function that is defined in this translation unit so its vtable is +// defined in the translation unit. +// CIR-DAG: cir.global "private" external @_ZTV1D = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1D> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1D1fEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global external @_ZTS1D = #cir.const_array<"1D" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i x 3> {alignment = 1 : i64} +// CIR-DAG: cir.global constant external @_ZTI1D = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1D> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1D = {{(unnamed_addr constant|global)}} { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1D, ptr @_ZN1D1fEv] }, align 8 +// LLVM-DAG: @_ZTS1D = {{(constant|global)}} [{{[0-9]}} x i8] c"1D\00", align 1 +// LLVM-DAG: @_ZTI1D = constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{[0-9]+}}), ptr @_ZTS1D }, align 8 + +// E<char> is an explicit specialization with a key function defined +// in this translation unit, so its vtable should have external +// linkage. +// CIR-DAG: cir.global "private" external @_ZTV1EIcE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1EIcE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1EIcE6anchorEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global external @_ZTS1EIcE = #cir.const_array<"1EIcE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// CIR-DAG: cir.global constant external @_ZTI1EIcE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1EIcE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1EIcE = {{(unnamed_addr constant|global)}} { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1EIcE, ptr @_ZN1EIcE6anchorEv] }, align 8 +// LLVM-DAG: @_ZTS1EIcE = {{(constant|global)}} [6 x i8] c"1EIcE\00", align 1 +// LLVM-DAG: @_ZTI1EIcE = constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1EIcE }, align 8 + +// E<short> is an explicit template instantiation with a key function +// defined in this translation unit, so its vtable should have +// weak_odr linkage. +// CIR-DAG: cir.global "private" weak_odr comdat @_ZTV1EIsE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1EIsE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1EIsED1Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1EIsED0Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global constant weak_odr comdat @_ZTI1EIsE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1EIsE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global weak_odr comdat @_ZTS1EIsE = #cir.const_array<"1EIsE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// LLVM-DAG: @_ZTV1EIsE = weak_odr {{.*}}{ [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI1EIsE, ptr @_ZN1EIsED1Ev, ptr @_ZN1EIsED0Ev] }, comdat, align 8 +// LLVM-DAG: @_ZTI1EIsE = weak_odr constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1EIsE }, comdat +// LLVM-DAG: @_ZTS1EIsE = weak_odr {{.*}}[6 x i8] c"1EIsE\00", comdat + +// F<short> is an explicit template instantiation without a key +// function, so its vtable should have weak_odr linkage +// CIR-DAG: cir.global "private" weak_odr comdat @_ZTV1FIsE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1FIsE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1FIsE3fooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global weak_odr comdat @_ZTS1FIsE = #cir.const_array<"1FIsE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// CIR-DAG: cir.global constant weak_odr comdat @_ZTI1FIsE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1FIsE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1FIsE = weak_odr {{.*}}{ [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1FIsE, ptr @_ZN1FIsE3fooEv] }, comdat, align 8 +// LLVM-DAG: @_ZTS1FIsE = weak_odr {{.*}}[6 x i8] c"1FIsE\00", comdat, align 1 +// LLVM-DAG: @_ZTI1FIsE = weak_odr constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1FIsE }, comdat + +// E<long> is an implicit template instantiation with a key function +// defined in this translation unit, so its vtable should have +// linkonce_odr linkage. +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1EIlE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1EIlE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1EIlED1Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1EIlED0Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global linkonce_odr comdat @_ZTS1EIlE = #cir.const_array<"1EIlE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// CIR-DAG: cir.global constant linkonce_odr comdat @_ZTI1EIlE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1EIlE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1EIlE = linkonce_odr {{.*}}{ [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI1EIlE, ptr @_ZN1EIlED1Ev, ptr @_ZN1EIlED0Ev] }, comdat, align 8 +// LLVM-DAG: @_ZTS1EIlE = linkonce_odr {{.*}}[6 x i8] c"1EIlE\00", comdat, align 1 +// LLVM-DAG: @_ZTI1EIlE = linkonce_odr constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1EIlE }, comdat + +// F<long> is an implicit template instantiation with no key function, +// so its vtable should have linkonce_odr linkage. +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1FIlE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1FIlE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1FIlE3fooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global linkonce_odr comdat @_ZTS1FIlE = #cir.const_array<"1FIlE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// CIR-DAG: cir.global constant linkonce_odr comdat @_ZTI1FIlE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1FIlE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1FIlE = linkonce_odr {{.*}}{ [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1FIlE, ptr @_ZN1FIlE3fooEv] }, comdat, align 8 +// LLVM-DAG: @_ZTS1FIlE = linkonce_odr {{.*}}[6 x i8] c"1FIlE\00", comdat, align 1 +// LLVM-DAG: @_ZTI1FIlE = linkonce_odr constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1FIlE }, comdat + +// F<int> is an explicit template instantiation declaration without a +// key function, so its vtable should have external linkage. +// CIR-DAG: cir.global "private" external @_ZTV1FIiE : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1FIiE = external {{.*}}{ [3 x ptr] }, align 8 + +// E<int> is an explicit template instantiation declaration. It has a +// key function is not instantiated, so we know that vtable definition +// will be generated in TU where key function will be defined +// so we can mark it as external (without optimizations) and +// available_externally (with optimizations) because all of the inline +// virtual functions have been emitted. +// CIR-DAG: cir.global "private" external @_ZTV1EIiE : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1EIiE = external {{.*}}{ [4 x ptr] }, align 8 + +// The anonymous struct for e has no linkage, so the vtable should have +// internal linkage. +// CIR-DAG: cir.global "private" internal dso_local @_ZTV3$_0 = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI3$_0> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1D1fEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global internal dso_local @_ZTS3$_0 = #cir.const_array<"3$_0" : !cir.array<!s8i x 4>, trailing_zeros> : !cir.array<!s8i x 5> {alignment = 1 : i64} +// CIR-DAG: cir.global constant internal @_ZTI3$_0 = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS3$_0> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1D> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @"_ZTV3$_0" = internal {{.*}}{ [3 x ptr] } { [3 x ptr] [ptr null, ptr @"_ZTI3$_0", ptr @_ZN1D1fEv] }, align 8 +// LLVM-DAG: @"_ZTS3$_0" = internal {{.*}}[5 x i8] c"3$_0\00", align 1 +// LLVM-DAG: @"_ZTI3$_0" = internal constant { ptr, ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 {{.*}}), ptr @"_ZTS3$_0", ptr @_ZTI1D }, align 8 + +// The A vtable should have internal linkage since it is inside an anonymous +// namespace. +// CIR-DAG: cir.global "private" internal dso_local @_ZTVN12_GLOBAL__N_11AE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTIN12_GLOBAL__N_11AE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN12_GLOBAL__N_11A1fEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global internal dso_local @_ZTSN12_GLOBAL__N_11AE = #cir.const_array<"N12_GLOBAL__N_11AE" : !cir.array<!s8i x 18>, trailing_zeros> : !cir.array<!s8i x 19> {alignment = 1 : i64} +// CIR-DAG: cir.global constant internal @_ZTIN12_GLOBAL__N_11AE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTSN12_GLOBAL__N_11AE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTVN12_GLOBAL__N_11AE = internal {{.*}}{ [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTIN12_GLOBAL__N_11AE, ptr @_ZN12_GLOBAL__N_11A1fEv] }, align 8 +// LLVM-DAG: @_ZTSN12_GLOBAL__N_11AE = internal {{.*}}[19 x i8] c"N12_GLOBAL__N_11AE\00", align 1 +// LLVM-DAG: @_ZTIN12_GLOBAL__N_11AE = internal constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTSN12_GLOBAL__N_11AE }, align 8 + +// F<char> is an explicit specialization without a key function, so +// its vtable should have linkonce_odr linkage. +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1FIcE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1FIcE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1FIcE3fooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global linkonce_odr comdat @_ZTS1FIcE = #cir.const_array<"1FIcE" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> {alignment = 1 : i64} +// CIR-DAG: cir.global constant linkonce_odr comdat @_ZTI1FIcE = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1FIcE> : !cir.ptr<!u8i>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1FIcE = linkonce_odr {{.*}}{ [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1FIcE, ptr @_ZN1FIcE3fooEv] }, comdat, align 8 +// LLVM-DAG: @_ZTS1FIcE = linkonce_odr {{.*}}[6 x i8] c"1FIcE\00", comdat, align 1 +// LLVM-DAG: @_ZTI1FIcE = linkonce_odr constant { ptr, ptr } { ptr getelementptr {{.*}}({{.*}}, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 {{.*}}), ptr @_ZTS1FIcE }, comdat + +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1GIiE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1GIiE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1GIiE2f0Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1GIiE2f1Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1GIiE = linkonce_odr {{.*}}{ [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI1GIiE, ptr @_ZN1GIiE2f0Ev, ptr @_ZN1GIiE2f1Ev] }, comdat, align 8 +template <typename T> +class G { +public: + G() {} + virtual void f0(); + virtual void f1(); +}; +template <> +void G<int>::f1() {} +template <typename T> +void G<T>::f0() {} +void G_f0() { new G<int>(); } + +// H<int> has a key function without a body but it's a template instantiation +// so its VTable must be emitted. +// CIR-DAG: cir.global "private" linkonce_odr comdat @_ZTV1HIiE = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1HIiE> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1HIiED1Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1HIiED0Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !{{.*}}{alignment = 8 : i64} +// LLVM-DAG: @_ZTV1HIiE = linkonce_odr {{.*}}{ [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI1HIiE, ptr @_ZN1HIiED1Ev, ptr @_ZN1HIiED0Ev] }, comdat, align 8 +template <typename T> +class H { +public: + virtual ~H(); +}; + +void use_H() { + H<int> h; +} + +// I<int> has an explicit instantiation declaration and needs a VTT and +// construction vtables. + +// CIR-DAG: cir.global "private" external @_ZTV1IIiE : !{{.*}}{alignment = 8 : i64} +// CIR-DAG: cir.global "private" external @_ZTT1IIiE : !cir.array<!cir.ptr<!u8i> x 4> {alignment = 8 : i64} +// LLVM-DAG: @_ZTV1IIiE = external {{.*}}{ [5 x ptr] }, align 8 +// LLVM-DAG: @_ZTT1IIiE = external {{.*}}[4 x ptr], align 8 +struct VBase1 { virtual void f(); }; struct VBase2 : virtual VBase1 {}; +template<typename T> +struct I : VBase2 {}; +extern template struct I<int>; +I<int> i; diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering-strict.ll similarity index 63% rename from llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll rename to llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering-strict.ll index 0d7f2819a6316..dac94a1426cea 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/for-phi-ordering-strict.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 -; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -vplan-print-after=printOptimizedVPlan -disable-output 2>&1 | FileCheck %s +; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -vplan-print-after=printOptimizedVPlan -disable-output 2>&1 | FileCheck %s + define i32 @test(i32 %limit, i1 %guard, i32 %divisor) { ; CHECK-LABEL: VPlan for loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { @@ -13,14 +14,22 @@ define i32 @test(i32 %limit, i1 %guard, i32 %divisor) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: <x1> vector loop: { -; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = CANONICAL-IV +; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = CANONICAL-IV ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]> -; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum> = phi (add) vp<[[VP4]]>, ir<%sum.next> +; CHECK-NEXT: WIDEN ir<%iv.nonzero> = icmp ne ir<%iv>, ir<0> +; CHECK-NEXT: WIDEN ir<%or.cond> = select ir<%iv.nonzero>, ir<%guard>, ir<false> +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = not ir<%or.cond> +; CHECK-NEXT: WIDEN-INTRINSIC vp<[[VP7:%[0-9]+]]> = call llvm.masked.srem(ir<%iv>, ir<%divisor>, vp<[[VP6]]>) +; CHECK-NEXT: BLEND ir<%cond> = vp<[[VP7]]> ir<0>/ir<%or.cond> +; CHECK-NEXT: WIDEN ir<%sum.next> = add ir<%sum>, ir<%cond> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -28,25 +37,27 @@ define i32 @test(i32 %limit, i1 %guard, i32 %divisor) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: WIDEN ir<%prev.iv> = add nsw ir<%iv>, ir<-1> -; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%prev.iv> -; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = extract-last-part ir<%prev.iv> -; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[VP6]]> +; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = compute-reduction-result (add) ir<%sum.next> +; CHECK-NEXT: EMIT vp<[[VP10:%[0-9]+]]> = extract-last-part ir<%prev.iv> +; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[VP10]]> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<exit>: -; CHECK-NEXT: IR %rec.lcssa = phi i32 [ %rec, %loop.latch ] (extra operand: vp<%vector.recur.extract.for.phi> from middle.block) +; CHECK-NEXT: IR %sum.next.lcssa = phi i32 [ %sum.next, %loop.latch ] (extra operand: vp<[[VP9]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP2]]>, middle.block ], [ ir<0>, ir-bb<entry> ] ; CHECK-NEXT: EMIT-SCALAR vp<%scalar.recur.init> = phi [ vp<%vector.recur.extract>, middle.block ], [ ir<0>, ir-bb<entry> ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[VP9]]>, middle.block ], [ ir<0>, ir-bb<entry> ] ; CHECK-NEXT: Successor(s): ir-bb<loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb<loop.header>: ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<%bc.resume.val> from scalar.ph) ; CHECK-NEXT: IR %rec = phi i32 [ 0, %entry ], [ %prev.iv, %loop.latch ] (extra operand: vp<%scalar.recur.init> from scalar.ph) +; CHECK-NEXT: IR %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ] (extra operand: vp<%bc.merge.rdx> from scalar.ph) ; CHECK-NEXT: IR %conv = zext i32 %rec to i64 ; CHECK-NEXT: IR %iv.nonzero = icmp ne i32 %iv, 0 ; CHECK-NEXT: IR %or.cond = select i1 %iv.nonzero, i1 %guard, i1 false @@ -59,28 +70,26 @@ entry: loop.header: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] %rec = phi i32 [ 0, %entry ], [ %prev.iv, %loop.latch ] + %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ] ; move %conv = zext i32 %rec to i64 %iv.nonzero = icmp ne i32 %iv, 0 %or.cond = select i1 %iv.nonzero, i1 %guard, i1 false - br i1 %or.cond, label %loop.latch, label %loop.rem - -loop.rem: - %rem = srem i32 %iv, %divisor br label %loop.latch loop.latch: - %cond = phi i32 [ %rem, %loop.rem ], [ 0, %loop.header ] + %cond = phi i32 [ 0, %loop.header ] ; after %prev.iv = add nsw i32 %iv, -1 + %sum.next = add i32 %sum, %cond %iv.next = add nuw nsw i32 %iv, 1 %cmp = icmp slt i32 %iv, %limit br i1 %cmp, label %loop.header, label %exit exit: - ret i32 %rec + ret i32 %sum.next } >From 8f8681c11d7b64d4e7801ac901664cb937164bea Mon Sep 17 00:00:00 2001 From: Zile Xiong <[email protected]> Date: Fri, 22 May 2026 20:57:19 +0800 Subject: [PATCH 6/6] use map_to_vector instead loop --- llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 3a268fa667416..59b2f8ec261f5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -32,7 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" - +#include "llvm/ADT/SmallVectorExtras.h" #define DEBUG_TYPE "vplan" using namespace llvm; @@ -864,12 +864,10 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, /// fails. static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, VPDominatorTree &VPDT) { - SmallVector<VPFirstOrderRecurrencePHIRecipe *> FORs; - - for (VPRecipeBase &R : HeaderVPBB->phis()) - if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) - FORs.push_back(FOR); - + auto FORs = map_to_vector( + make_filter_range(HeaderVPBB->phis(), + IsaPred<VPFirstOrderRecurrencePHIRecipe>), + [](VPRecipeBase &R) { return cast<VPFirstOrderRecurrencePHIRecipe>(&R); }); for (VPFirstOrderRecurrencePHIRecipe *FOR : FORs) { // Follow through FOR phi chains to find the actual Previous recipe. // Fixed-order recurrences do not contain cycles, so this loop is _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
