Author: Jasmine Tang Date: 2025-08-13T00:07:58-07:00 New Revision: fe0e8f3ee1c8c4aa33221a19f0cd1dec6eb32059
URL: https://github.com/llvm/llvm-project/commit/fe0e8f3ee1c8c4aa33221a19f0cd1dec6eb32059 DIFF: https://github.com/llvm/llvm-project/commit/fe0e8f3ee1c8c4aa33221a19f0cd1dec6eb32059.diff LOG: Revert "[WebAssembly] Combine i128 to v16i8 for setcc & expand memcmp for 16 …" This reverts commit 348f01f89c272c08e46bbdc1779d658323f17214. Added: Modified: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp llvm/test/CodeGen/WebAssembly/memcmp-expand.ll Removed: llvm/test/CodeGen/WebAssembly/simd-setcc.ll ################################################################################ diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 99cfe20ffb7fc..f9eba4b0ff6e1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3383,55 +3383,8 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); } -/// Try to convert a i128 comparison to a v16i8 comparison before type -/// legalization splits it up into chunks -static SDValue -combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const WebAssemblySubtarget *Subtarget) { - - SDLoc DL(N); - SDValue X = N->getOperand(0); - SDValue Y = N->getOperand(1); - EVT VT = N->getValueType(0); - EVT OpVT = X.getValueType(); - - SelectionDAG &DAG = DCI.DAG; - if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat)) - return SDValue(); - - ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); - // We're looking for an oversized integer equality comparison with SIMD - if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || - !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC)) - return SDValue(); - - // Don't perform this combine if constructing the vector will be expensive. - auto IsVectorBitCastCheap = [](SDValue X) { - X = peekThroughBitcasts(X); - return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD; - }; - - if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) - return SDValue(); - - SDValue VecX = DAG.getBitcast(MVT::v16i8, X); - SDValue VecY = DAG.getBitcast(MVT::v16i8, Y); - SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC); - - SDValue Intr = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue - : Intrinsic::wasm_anytrue, - DL, MVT::i32), - Cmp}); - - return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32), CC); -} - static SDValue performSETCCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const WebAssemblySubtarget *Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -3439,9 +3392,6 @@ static SDValue performSETCCCombine(SDNode *N, if (!VT.isScalarInteger()) return SDValue(); - if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget)) - return V; - SDValue LHS = N->getOperand(0); if (LHS->getOpcode() != ISD::BITCAST) return SDValue(); @@ -3621,7 +3571,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return performBitcastCombine(N, DCI); case ISD::SETCC: - return performSETCCCombine(N, DCI, Subtarget); + return performSETCCCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return performVECTOR_SHUFFLECombine(N, DCI); case ISD::SIGN_EXTEND: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 08fb7586d215e..52e706514226b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -147,8 +147,7 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.AllowOverlappingLoads = true; - if (ST->hasSIMD128()) - Options.LoadSizes.push_back(16); + // TODO: Teach WebAssembly backend about load v128. Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 9c520ff9a4db7..8030438645f82 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -127,16 +127,24 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) { ret i1 %res } +; TODO: Should be using a single load i64x2 or equivalent in bitsizes define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 -; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 -; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.all_true $push3=, $pop2 -; CHECK-NEXT: i32.eqz $push4=, $pop3 -; CHECK-NEXT: return $pop4 +; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 8 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i64.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll deleted file mode 100644 index 9efa63ab9bbbd..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s - -target triple = "wasm32-unknown-unknown" - -declare i32 @memcmp(ptr, ptr, i32) - -define i1 @setcc_load(ptr %a, ptr %b) { -; CHECK-LABEL: setcc_load: -; CHECK: .functype setcc_load (i32, i32) -> (i32) -; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 -; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 -; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.all_true $push3=, $pop2 -; CHECK-NEXT: i32.eqz $push4=, $pop3 -; CHECK-NEXT: return $pop4 - %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) - %res = icmp eq i32 %cmp_16, 0 - ret i1 %res -} - -; INFO: Negative test: noimplicitfloat disables simd -define i1 @setcc_load_should_not_vectorize(ptr %a, ptr %b) noimplicitfloat { -; CHECK-LABEL: setcc_load_should_not_vectorize: -; CHECK: .functype setcc_load_should_not_vectorize (i32, i32) -> (i32) -; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0 -; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0 -; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3 -; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0 -; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 -; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0 -; CHECK-NEXT: i64.or $push6=, $pop5, $pop2 -; CHECK-NEXT: i64.eqz $push7=, $pop6 -; CHECK-NEXT: return $pop7 - %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) - %res = icmp eq i32 %cmp_16, 0 - ret i1 %res -} - -define i1 @setcc_eq_const_i128(ptr %ptr) { -; CHECK-LABEL: setcc_eq_const_i128: -; CHECK: .functype setcc_eq_const_i128 (i32) -> (i32) -; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.load $push0=, 0($0) -; CHECK-NEXT: v128.const $push1=, 6, 0 -; CHECK-NEXT: i8x16.eq $push2=, $pop0, $pop1 -; CHECK-NEXT: i8x16.all_true $push3=, $pop2 -; CHECK-NEXT: i32.eqz $push4=, $pop3 -; CHECK-NEXT: return $pop4 - %l = load i128, ptr %ptr - %res = icmp eq i128 %l, 6 - ret i1 %res -} - -define i1 @setcc_ne_const_i128(ptr %ptr) { -; CHECK-LABEL: setcc_ne_const_i128: -; CHECK: .functype setcc_ne_const_i128 (i32) -> (i32) -; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.load $push0=, 0($0) -; CHECK-NEXT: v128.const $push1=, 16, 0 -; CHECK-NEXT: i8x16.ne $push2=, $pop0, $pop1 -; CHECK-NEXT: v128.any_true $push3=, $pop2 -; CHECK-NEXT: return $pop3 - %l = load i128, ptr %ptr - %res = icmp ne i128 %l, 16 - ret i1 %res -} - -; INFO: Negative test: only eq and ne works -define i1 @setcc_slt_const_i128(ptr %ptr) { -; CHECK-LABEL: setcc_slt_const_i128: -; CHECK: .functype setcc_slt_const_i128 (i32) -> (i32) -; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push2=, 0($0) -; CHECK-NEXT: i64.const $push3=, 25 -; CHECK-NEXT: i64.lt_u $push4=, $pop2, $pop3 -; CHECK-NEXT: i64.load $push8=, 8($0) -; CHECK-NEXT: local.tee $push7=, $1=, $pop8 -; CHECK-NEXT: i64.const $push0=, 0 -; CHECK-NEXT: i64.lt_s $push1=, $pop7, $pop0 -; CHECK-NEXT: i64.eqz $push5=, $1 -; CHECK-NEXT: i32.select $push6=, $pop4, $pop1, $pop5 -; CHECK-NEXT: return $pop6 - %l = load i128, ptr %ptr - %res = icmp slt i128 %l, 25 - ret i1 %res -} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits