https://github.com/adams381 updated https://github.com/llvm/llvm-project/pull/206575
>From 10d5f8f6e7e2987c849414f2c037567566e2d52e Mon Sep 17 00:00:00 2001 From: Adam Smith <[email protected]> Date: Mon, 29 Jun 2026 12:40:42 -0700 Subject: [PATCH 1/2] [CIR] Fix unsigned/wide switch case-range lowering A `switch` over an unsigned or wider-than-32-bit integer with a `case lo ... hi` range was miscompiled -- SingleSource pr34154.c switches on an `unsigned long long` and every value wrongly took the default. FlattenCFG tested whether the range was empty with a signed comparison (`lowerBound.sgt(upperBound)`), so an unsigned range whose upper bound has the high bit set (negative when read as signed) was treated as empty and dropped. Behind that sat three more width/signedness slips: the small-range size gate compared a 64-bit difference against a 32-bit `APInt`, the expansion loop advanced a signed `APInt` cursor that looped forever for a range ending at the type's maximum (the cursor wraps past the top of the domain), and `condBrToRangeDestination` built the range-check constants and the subtraction at a hardcoded 32-bit width. Derive the switch operand's integer type once and evaluate the emptiness test in its signedness; gate and expand small ranges with a wrap-safe count-based cursor; and build the range-check constants and subtraction at the operand width. The `sub x, lo; ule (x - lo), (hi - lo)` idiom is a cyclic-distance test correct for both signed and unsigned switches, so the comparison stays unsigned and the existing 32-bit signed-range lowering is unchanged. --- .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 56 +++++--- clang/test/CIR/CodeGen/switch-range.c | 123 ++++++++++++++++++ clang/test/CIR/Transforms/switch.cir | 29 +++++ 3 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 clang/test/CIR/CodeGen/switch-range.c diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index 0b8e6b57c6e7a..5ded45e841a97 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -263,28 +263,38 @@ class CIRSwitchOpFlattening : public mlir::OpRewritePattern<cir::SwitchOp> { mlir::Block *defaultDestination, const APInt &lowerBound, const APInt &upperBound) const { - assert(lowerBound.sle(upperBound) && "Invalid range"); + auto condType = mlir::cast<cir::IntType>(op.getCondition().getType()); + bool isSigned = condType.isSigned(); + assert( + (isSigned ? lowerBound.sle(upperBound) : lowerBound.ule(upperBound)) && + "Invalid range"); mlir::Block *resBlock = rewriter.createBlock(defaultDestination); - cir::IntType sIntType = cir::IntType::get(op.getContext(), 32, true); - cir::IntType uIntType = cir::IntType::get(op.getContext(), 32, false); - cir::ConstantOp rangeLength = cir::ConstantOp::create( - rewriter, op.getLoc(), - cir::IntAttr::get(sIntType, upperBound - lowerBound)); + // Build the range check at the switch operand's width. The classic + // `sub x, lo; ule (x - lo), (hi - lo)` idiom is a cyclic-distance test that + // is correct for both signed and unsigned switches, so the comparison is + // always unsigned. + cir::IntType uIntType = + cir::IntType::get(op.getContext(), condType.getWidth(), false); cir::ConstantOp lowerBoundValue = cir::ConstantOp::create( - rewriter, op.getLoc(), cir::IntAttr::get(sIntType, lowerBound)); + rewriter, op.getLoc(), cir::IntAttr::get(condType, lowerBound)); mlir::Value diffValue = cir::SubOp::create( rewriter, op.getLoc(), op.getCondition(), lowerBoundValue); - // Use unsigned comparison to check if the condition is in the range. - cir::CastOp uDiffValue = cir::CastOp::create( - rewriter, op.getLoc(), uIntType, CastKind::integral, diffValue); - cir::CastOp uRangeLength = cir::CastOp::create( - rewriter, op.getLoc(), uIntType, CastKind::integral, rangeLength); + // Use unsigned comparison to check if the condition is in the range. A + // signed difference is reinterpreted as unsigned; an unsigned one already + // has the right type. + if (isSigned) + diffValue = cir::CastOp::create(rewriter, op.getLoc(), uIntType, + CastKind::integral, diffValue); + + cir::ConstantOp rangeLength = cir::ConstantOp::create( + rewriter, op.getLoc(), + cir::IntAttr::get(uIntType, upperBound - lowerBound)); cir::CmpOp cmpResult = cir::CmpOp::create( - rewriter, op.getLoc(), cir::CmpOpKind::le, uDiffValue, uRangeLength); + rewriter, op.getLoc(), cir::CmpOpKind::le, diffValue, rangeLength); cir::BrCondOp::create(rewriter, op.getLoc(), cmpResult, rangeDestination, defaultDestination); return resBlock; @@ -443,22 +453,28 @@ class CIRSwitchOpFlattening : public mlir::OpRewritePattern<cir::SwitchOp> { rewriter.eraseOp(caseOp); } + bool isSigned = + mlir::cast<cir::IntType>(op.getCondition().getType()).isSigned(); for (auto [rangeVal, operand, destination] : llvm::zip(rangeValues, rangeOperands, rangeDestinations)) { APInt lowerBound = rangeVal.first; APInt upperBound = rangeVal.second; - // The case range is unreachable, skip it. - if (lowerBound.sgt(upperBound)) + // An empty range (lo > hi in the switch's signedness) is unreachable. + if (isSigned ? lowerBound.sgt(upperBound) : lowerBound.ugt(upperBound)) continue; // If range is small, add multiple switch instruction cases. // This magical number is from the original CGStmt code. - constexpr int kSmallRangeThreshold = 64; - if ((upperBound - lowerBound) - .ult(llvm::APInt(32, kSmallRangeThreshold))) { - for (APInt iValue = lowerBound; iValue.sle(upperBound); ++iValue) { - caseValues.push_back(iValue); + constexpr uint64_t kSmallRangeThreshold = 64; + APInt rangeSize = upperBound - lowerBound; + if (rangeSize.ult(kSmallRangeThreshold)) { + // Expand into individual cases with a count-based cursor so a range at + // the top of the domain cannot overflow the APInt and loop forever. + APInt caseValue = lowerBound; + for (uint64_t i = 0, e = rangeSize.getZExtValue(); i <= e; + ++i, ++caseValue) { + caseValues.push_back(caseValue); caseOperands.push_back(operand); caseDestinations.push_back(destination); } diff --git a/clang/test/CIR/CodeGen/switch-range.c b/clang/test/CIR/CodeGen/switch-range.c new file mode 100644 index 0000000000000..32643c104ed03 --- /dev/null +++ b/clang/test/CIR/CodeGen/switch-range.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM + +typedef unsigned long long u64; + +// Large unsigned range; high-bit upper bound must not be treated as empty. +int range_u64(u64 x) { + switch (x) { + case 1000000000000000000ULL ... 9999999999999999999ULL: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_u64 +// CIR: cir.case(range, [#cir.int<1000000000000000000> : !u64i, #cir.int<9999999999999999999> : !u64i]) + +// LLVM-LABEL: define dso_local i32 @range_u64( +// LLVM: %[[D:.*]] = sub i64 %{{.*}}, 1000000000000000000 +// LLVM: icmp ule i64 %[[D]], 8999999999999999999 + +// Small unsigned range at the top of the domain. +int range_u64_top(u64 x) { + switch (x) { + case 18446744073709551613ULL ... 18446744073709551615ULL: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_u64_top +// CIR: cir.case(range, [#cir.int<18446744073709551613> : !u64i, #cir.int<18446744073709551615> : !u64i]) + +// LLVM-LABEL: define dso_local i32 @range_u64_top( +// LLVM: switch i64 %{{.*}}, label %{{.*}} [ +// LLVM: i64 -3, label %[[BB:.*]] +// LLVM: i64 -2, label %[[BB]] +// LLVM: i64 -1, label %[[BB]] +// LLVM: ] + +// Signed range spanning negative to positive, wider than the expansion +// threshold. +int range_s32_neg(int x) { + switch (x) { + case -2000000000 ... 2000000000: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_s32_neg +// CIR: cir.case(range, [#cir.int<-2000000000> : !s32i, #cir.int<2000000000> : !s32i]) + +// LLVM-LABEL: define dso_local i32 @range_s32_neg( +// LLVM: %[[D:.*]] = sub i32 %{{.*}}, -2000000000 +// LLVM: icmp ule i32 %[[D]], -294967296 + +// Small signed range (existing expansion path). +int range_s32_small(int x) { + switch (x) { + case 3 ... 6: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_s32_small +// CIR: cir.case(range, [#cir.int<3> : !s32i, #cir.int<6> : !s32i]) + +// LLVM-LABEL: define dso_local i32 @range_s32_small( +// LLVM: switch i32 %{{.*}}, label %{{.*}} [ +// LLVM: i32 3, label %[[BB:.*]] +// LLVM: i32 4, label %[[BB]] +// LLVM: i32 5, label %[[BB]] +// LLVM: i32 6, label %[[BB]] +// LLVM: ] + +// Small signed range at the top of the domain: the expansion cursor must not +// run past INT_MAX. +int range_s32_top(int x) { + switch (x) { + case 2147483645 ... 2147483647: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_s32_top +// CIR: cir.case(range, [#cir.int<2147483645> : !s32i, #cir.int<2147483647> : !s32i]) + +// LLVM-LABEL: define dso_local i32 @range_s32_top( +// LLVM: switch i32 %{{.*}}, label %{{.*}} [ +// LLVM: i32 2147483645, label %[[BB:.*]] +// LLVM: i32 2147483646, label %[[BB]] +// LLVM: i32 2147483647, label %[[BB]] +// LLVM: ] + +// A range of size 64 sits just past the expansion threshold, so it lowers via +// the sub + ule range check rather than individual cases. +int range_thresh64(int x) { + switch (x) { + case 0 ... 64: + return 1; + default: + return 0; + } +} + +// CIR-LABEL: cir.func {{.*}} @range_thresh64 +// CIR: cir.case(range, [#cir.int<0> : !s32i, #cir.int<64> : !s32i]) + +// LLVM-LABEL: define dso_local i32 @range_thresh64( +// LLVM: %[[D:.*]] = sub i32 %{{.*}}, 0 +// LLVM: icmp ule i32 %[[D]], 64 diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir index 0ac323800a7fb..902c1e8612336 100644 --- a/clang/test/CIR/Transforms/switch.cir +++ b/clang/test/CIR/Transforms/switch.cir @@ -3,6 +3,7 @@ !s8i = !cir.int<s, 8> !s32i = !cir.int<s, 32> !s64i = !cir.int<s, 64> +!u64i = !cir.int<u, 64> module { cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) { @@ -313,4 +314,32 @@ module { // CHECK: ^bb[[#RET_BB]]: // pred: ^bb[[#EXIT]] // CHECK: cir.return // CHECK: } + + // A large unsigned 64-bit range with a high-bit upper bound must lower at the + // operand width (not a hardcoded i32) and use an unsigned compare with no + // sign cast. + cir.func @bigRangeU64(%arg0: !u64i) { + cir.switch (%arg0 : !u64i) { + cir.case(range, [#cir.int<1000000000000000000> : !u64i, + #cir.int<9999999999999999999> : !u64i]) { + cir.break + } + cir.case(default, []) { + cir.break + } + cir.yield + } + cir.return + } + +// CHECK: cir.func{{.*}} @bigRangeU64(%arg0: !u64i) { +// CHECK: cir.switch.flat %[[COND:.*]] : !u64i, ^bb[[#RANGE_BR:]] [ +// CHECK: ] +// CHECK: ^bb[[#RANGE_BR]]: +// CHECK: %[[LO:.*]] = cir.const #cir.int<1000000000000000000> : !u64i +// CHECK: %[[SUB:.*]] = cir.sub %[[COND]], %[[LO]] : !u64i +// CHECK-NOT: cir.cast +// CHECK: %[[LEN:.*]] = cir.const #cir.int<8999999999999999999> : !u64i +// CHECK: %[[CMP:.*]] = cir.cmp le %[[SUB]], %[[LEN]] : !u64i +// CHECK: cir.brcond %[[CMP]] } >From 949830ab7db21fe835bf7b4b09bde13b9cd3d931 Mon Sep 17 00:00:00 2001 From: Adam Smith <[email protected]> Date: Mon, 29 Jun 2026 15:54:13 -0700 Subject: [PATCH 2/2] [CIR] Address review on switch case-range lowering Rework the small-range case expansion from a uint64_t count cursor to an APInt loop that breaks on `caseValue == upperBound` before incrementing. The cursor was introduced to avoid the upstream `for (APInt i = lo; i.sle(hi); ++i)` loop, which runs forever when the upper bound is the type's maximum: the increment wraps below the bound and the value-based condition never goes false. The break-before-increment form keeps that guarantee without the separate index. Reword the range-check comment to match the emitted op: the signed difference takes a same-width signed->unsigned integral cast (a signedness reinterpretation) so the comparison is unsigned, since cir.cmp derives signedness from the operand type. Annotate the unsigned IntType's signedness argument. Extend switch-range.c to pin the sub/icmp/branch sequence and the switch case lists, and add flatCaseRangeTopExpand to switch.cir covering the top-of-domain expansion that the upstream sle loop could not terminate. --- .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 23 +++++---- clang/test/CIR/CodeGen/switch-range.c | 47 ++++++++++--------- clang/test/CIR/Transforms/switch.cir | 24 ++++++++++ 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index 5ded45e841a97..7c47cdb60a9fd 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -275,16 +275,19 @@ class CIRSwitchOpFlattening : public mlir::OpRewritePattern<cir::SwitchOp> { // is correct for both signed and unsigned switches, so the comparison is // always unsigned. cir::IntType uIntType = - cir::IntType::get(op.getContext(), condType.getWidth(), false); + cir::IntType::get(op.getContext(), condType.getWidth(), + /*isSigned=*/false); cir::ConstantOp lowerBoundValue = cir::ConstantOp::create( rewriter, op.getLoc(), cir::IntAttr::get(condType, lowerBound)); mlir::Value diffValue = cir::SubOp::create( rewriter, op.getLoc(), op.getCondition(), lowerBoundValue); - // Use unsigned comparison to check if the condition is in the range. A - // signed difference is reinterpreted as unsigned; an unsigned one already - // has the right type. + // Use an unsigned comparison to check whether the condition is in range. + // cir.cmp takes its signedness from the operand type, so a signed + // difference needs a same-width integral cast to the unsigned type (a + // signedness reinterpretation) to make the `le` unsigned; an unsigned + // difference already has the right type. if (isSigned) diffValue = cir::CastOp::create(rewriter, op.getLoc(), uIntType, CastKind::integral, diffValue); @@ -469,14 +472,18 @@ class CIRSwitchOpFlattening : public mlir::OpRewritePattern<cir::SwitchOp> { constexpr uint64_t kSmallRangeThreshold = 64; APInt rangeSize = upperBound - lowerBound; if (rangeSize.ult(kSmallRangeThreshold)) { - // Expand into individual cases with a count-based cursor so a range at - // the top of the domain cannot overflow the APInt and loop forever. + // Expand into individual cases. Break on equality before incrementing + // so the cursor never steps past the top of the domain: when + // upperBound is the type's maximum, ++caseValue would wrap and a + // value-based loop condition would never terminate. APInt caseValue = lowerBound; - for (uint64_t i = 0, e = rangeSize.getZExtValue(); i <= e; - ++i, ++caseValue) { + while (true) { caseValues.push_back(caseValue); caseOperands.push_back(operand); caseDestinations.push_back(destination); + if (caseValue == upperBound) + break; + ++caseValue; } continue; } diff --git a/clang/test/CIR/CodeGen/switch-range.c b/clang/test/CIR/CodeGen/switch-range.c index 32643c104ed03..45a08bd183575 100644 --- a/clang/test/CIR/CodeGen/switch-range.c +++ b/clang/test/CIR/CodeGen/switch-range.c @@ -21,8 +21,9 @@ int range_u64(u64 x) { // CIR: cir.case(range, [#cir.int<1000000000000000000> : !u64i, #cir.int<9999999999999999999> : !u64i]) // LLVM-LABEL: define dso_local i32 @range_u64( -// LLVM: %[[D:.*]] = sub i64 %{{.*}}, 1000000000000000000 -// LLVM: icmp ule i64 %[[D]], 8999999999999999999 +// LLVM: %[[D:.*]] = sub i64 %{{.*}}, 1000000000000000000 +// LLVM-NEXT: %[[C:.*]] = icmp ule i64 %[[D]], 8999999999999999999 +// LLVM-NEXT: br i1 %[[C]], label %{{.*}}, label %{{.*}} // Small unsigned range at the top of the domain. int range_u64_top(u64 x) { @@ -38,11 +39,11 @@ int range_u64_top(u64 x) { // CIR: cir.case(range, [#cir.int<18446744073709551613> : !u64i, #cir.int<18446744073709551615> : !u64i]) // LLVM-LABEL: define dso_local i32 @range_u64_top( -// LLVM: switch i64 %{{.*}}, label %{{.*}} [ -// LLVM: i64 -3, label %[[BB:.*]] -// LLVM: i64 -2, label %[[BB]] -// LLVM: i64 -1, label %[[BB]] -// LLVM: ] +// LLVM: switch i64 %{{.*}}, label %{{.*}} [ +// LLVM-NEXT: i64 -3, label %[[BB:.*]] +// LLVM-NEXT: i64 -2, label %[[BB]] +// LLVM-NEXT: i64 -1, label %[[BB]] +// LLVM-NEXT: ] // Signed range spanning negative to positive, wider than the expansion // threshold. @@ -59,8 +60,9 @@ int range_s32_neg(int x) { // CIR: cir.case(range, [#cir.int<-2000000000> : !s32i, #cir.int<2000000000> : !s32i]) // LLVM-LABEL: define dso_local i32 @range_s32_neg( -// LLVM: %[[D:.*]] = sub i32 %{{.*}}, -2000000000 -// LLVM: icmp ule i32 %[[D]], -294967296 +// LLVM: %[[D:.*]] = sub i32 %{{.*}}, -2000000000 +// LLVM-NEXT: %[[C:.*]] = icmp ule i32 %[[D]], -294967296 +// LLVM-NEXT: br i1 %[[C]], label %{{.*}}, label %{{.*}} // Small signed range (existing expansion path). int range_s32_small(int x) { @@ -76,12 +78,12 @@ int range_s32_small(int x) { // CIR: cir.case(range, [#cir.int<3> : !s32i, #cir.int<6> : !s32i]) // LLVM-LABEL: define dso_local i32 @range_s32_small( -// LLVM: switch i32 %{{.*}}, label %{{.*}} [ -// LLVM: i32 3, label %[[BB:.*]] -// LLVM: i32 4, label %[[BB]] -// LLVM: i32 5, label %[[BB]] -// LLVM: i32 6, label %[[BB]] -// LLVM: ] +// LLVM: switch i32 %{{.*}}, label %{{.*}} [ +// LLVM-NEXT: i32 3, label %[[BB:.*]] +// LLVM-NEXT: i32 4, label %[[BB]] +// LLVM-NEXT: i32 5, label %[[BB]] +// LLVM-NEXT: i32 6, label %[[BB]] +// LLVM-NEXT: ] // Small signed range at the top of the domain: the expansion cursor must not // run past INT_MAX. @@ -98,11 +100,11 @@ int range_s32_top(int x) { // CIR: cir.case(range, [#cir.int<2147483645> : !s32i, #cir.int<2147483647> : !s32i]) // LLVM-LABEL: define dso_local i32 @range_s32_top( -// LLVM: switch i32 %{{.*}}, label %{{.*}} [ -// LLVM: i32 2147483645, label %[[BB:.*]] -// LLVM: i32 2147483646, label %[[BB]] -// LLVM: i32 2147483647, label %[[BB]] -// LLVM: ] +// LLVM: switch i32 %{{.*}}, label %{{.*}} [ +// LLVM-NEXT: i32 2147483645, label %[[BB:.*]] +// LLVM-NEXT: i32 2147483646, label %[[BB]] +// LLVM-NEXT: i32 2147483647, label %[[BB]] +// LLVM-NEXT: ] // A range of size 64 sits just past the expansion threshold, so it lowers via // the sub + ule range check rather than individual cases. @@ -119,5 +121,6 @@ int range_thresh64(int x) { // CIR: cir.case(range, [#cir.int<0> : !s32i, #cir.int<64> : !s32i]) // LLVM-LABEL: define dso_local i32 @range_thresh64( -// LLVM: %[[D:.*]] = sub i32 %{{.*}}, 0 -// LLVM: icmp ule i32 %[[D]], 64 +// LLVM: %[[D:.*]] = sub i32 %{{.*}}, 0 +// LLVM-NEXT: %[[C:.*]] = icmp ule i32 %[[D]], 64 +// LLVM-NEXT: br i1 %[[C]], label %{{.*}}, label %{{.*}} diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir index 902c1e8612336..d7456948fb1e4 100644 --- a/clang/test/CIR/Transforms/switch.cir +++ b/clang/test/CIR/Transforms/switch.cir @@ -342,4 +342,28 @@ module { // CHECK: %[[LEN:.*]] = cir.const #cir.int<8999999999999999999> : !u64i // CHECK: %[[CMP:.*]] = cir.cmp le %[[SUB]], %[[LEN]] : !u64i // CHECK: cir.brcond %[[CMP]] + + // A small range at the top of the signed domain expands to individual cases. + // The expansion cursor must stop at the upper bound instead of incrementing + // past INT_MAX, which would wrap and make the expansion loop run forever. + cir.func @flatCaseRangeTopExpand(%arg0: !s32i) { + cir.switch (%arg0 : !s32i) { + cir.case(range, [#cir.int<2147483645> : !s32i, + #cir.int<2147483647> : !s32i]) { + cir.break + } + cir.case(default, []) { + cir.break + } + cir.yield + } + cir.return + } + +// CHECK: cir.func{{.*}} @flatCaseRangeTopExpand(%arg0: !s32i) { +// CHECK: cir.switch.flat %{{.*}} : !s32i, ^bb[[#DEFAULT:]] [ +// CHECK-NEXT: 2147483645: ^bb[[#CASE:]], +// CHECK-NEXT: 2147483646: ^bb[[#CASE]], +// CHECK-NEXT: 2147483647: ^bb[[#CASE]] +// CHECK-NEXT: ] } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
