================ @@ -4386,34 +4386,59 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); + SDNodeFlags ScaleFlags; + // The multiplication of an index by the type size does not wrap the + // pointer index type in a signed sense (mul nsw). + if (NW.hasNoUnsignedSignedWrap()) + ScaleFlags.setNoSignedWrap(true); + + // The multiplication of an index by the type size does not wrap the + // pointer index type in an unsigned sense (mul nuw). + if (NW.hasNoUnsignedWrap()) + ScaleFlags.setNoUnsignedWrap(true); + if (ElementScalable) { EVT VScaleTy = N.getValueType().getScalarType(); SDValue VScale = DAG.getNode( ISD::VSCALE, dl, VScaleTy, DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); if (IsVectorGEP) VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); - IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale, + ScaleFlags); } else { // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementMul != 1) { if (ElementMul.isPowerOf2()) { unsigned Amt = ElementMul.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType()), + ScaleFlags); } else { SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale, + ScaleFlags); } } } - N = DAG.getNode(ISD::ADD, dl, - N.getValueType(), N, IdxN); + SDNodeFlags AddFlags; + + // The successive addition of each offset (without adding the base + // address) does not wrap the pointer index type in a signed sense (add + // nsw). + if (NW.hasNoUnsignedSignedWrap()) + AddFlags.setNoSignedWrap(true); ---------------- arsenm wrote:
That's already tested here: https://github.com/llvm/llvm-project/blob/56474dac206d8592cccc229cb56e1f12b543ec97/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll#L134 But it's still not enough. computeKnownBits still can't prove the sign bit is zero during selection with all flags on both GEPs: ``` define void @gep_all_flags(i32 %idx, i32 %val) { %alloca = alloca [32 x i32], align 4, addrspace(5) %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx %gep1 = getelementptr inbounds nuw i8, ptr addrspace(5) %gep0, i32 16 store volatile i32 %val, ptr addrspace(5) %gep1, align 4 ret void } ``` ``` Optimized legalized selection DAG: %bb.0 'gep_all_flags:' SelectionDAG has 14 nodes: t0: ch,glue = EntryToken t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %8 t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %7 t7: i32 = shl nuw nsw # D:1 t2, Constant:i32<2> t8: i32 = add nuw # D:1 FrameIndex:i32<0>, t7 t10: i32 = add nuw # D:1 t8, Constant:i32<16> t13: ch = store<(volatile store (s32) into %ir.gep1, addrspace 5)> # D:1 t0, t4, t10, undef:i32 t14: ch = RET_GLUE t13 ``` https://github.com/llvm/llvm-project/pull/110815 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits