Patryk27 created this revision.
Herald added subscribers: Jim, JDevlieghere, hiraditya, dylanmckay.
Herald added a project: All.
Patryk27 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.
Some passes can introduce shifts after AVRShiftExpandPass has completed;
if this happens, we panic during isel because we assume such shifts must
have been already expanded before.
This commit integrates our shift-expansion pass with isel-selection pass
so that isel doesn't get surprised by shifts of non-constant amounts
anymore.
Spotted in the wild in rustc:
- https://github.com/rust-lang/compiler-builtins/issues/523
- https://github.com/rust-lang/rust/issues/112140
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D153197
Files:
clang/docs/tools/clang-formatted-files.txt
llvm/lib/Target/AVR/AVR.h
llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
llvm/lib/Target/AVR/AVRISelLowering.cpp
llvm/lib/Target/AVR/AVRShiftExpand.cpp
llvm/lib/Target/AVR/AVRTargetMachine.cpp
llvm/lib/Target/AVR/CMakeLists.txt
llvm/test/CodeGen/AVR/shift-expand.ll
llvm/test/CodeGen/AVR/shift-loop.ll
llvm/test/CodeGen/AVR/shift32.ll
llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn
Index: llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn
===================================================================
--- llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn
+++ llvm/utils/gn/secondary/llvm/lib/Target/AVR/BUILD.gn
@@ -37,7 +37,6 @@
"AVRInstrInfo.cpp",
"AVRMCInstLower.cpp",
"AVRRegisterInfo.cpp",
- "AVRShiftExpand.cpp",
"AVRSubtarget.cpp",
"AVRTargetMachine.cpp",
"AVRTargetObjectFile.cpp",
Index: llvm/test/CodeGen/AVR/shift32.ll
===================================================================
--- llvm/test/CodeGen/AVR/shift32.ll
+++ llvm/test/CodeGen/AVR/shift32.ll
@@ -1,6 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=avr -mattr=movw -verify-machineinstrs | FileCheck %s
+; Shift by a number unknown at compile time.
+; The 'optsize' attribute is set to avoid duplicating part of the loop.
+; TODO: it is more efficent to jump at the start and do the check where the
+; 'rjmp' is now. The branch relaxation pass puts them in this non-optimal order.
+
+define i32 @shl_i32_n(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: shl_i32_n:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: dec r18
+; CHECK-NEXT: brmi .LBB0_3
+; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: lsl r22
+; CHECK-NEXT: rol r23
+; CHECK-NEXT: rol r24
+; CHECK-NEXT: rol r25
+; CHECK-NEXT: rjmp .LBB0_1
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: ret
+ %res = shl i32 %a, %b
+ ret i32 %res
+}
+
+define i32 @lshr_i32_n(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: lshr_i32_n:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: dec r18
+; CHECK-NEXT: brmi .LBB1_3
+; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT: lsr r25
+; CHECK-NEXT: ror r24
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: ror r22
+; CHECK-NEXT: rjmp .LBB1_1
+; CHECK-NEXT: .LBB1_3:
+; CHECK-NEXT: ret
+ %res = lshr i32 %a, %b
+ ret i32 %res
+}
+
+define i32 @ashr_i32_n(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: ashr_i32_n:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: dec r18
+; CHECK-NEXT: brmi .LBB2_3
+; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT: asr r25
+; CHECK-NEXT: ror r24
+; CHECK-NEXT: ror r23
+; CHECK-NEXT: ror r22
+; CHECK-NEXT: rjmp .LBB2_1
+; CHECK-NEXT: .LBB2_3:
+; CHECK-NEXT: ret
+ %res = ashr i32 %a, %b
+ ret i32 %res
+}
+
+; Shift by a constant known at compile time.
+
define i32 @shl_i32_1(i32 %a) {
; CHECK-LABEL: shl_i32_1:
; CHECK: ; %bb.0:
@@ -575,3 +636,5 @@
%res = ashr i32 %a, 31
ret i32 %res
}
+
+attributes #0 = { optsize }
Index: llvm/test/CodeGen/AVR/shift-loop.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AVR/shift-loop.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=avr -verify-machineinstrs -stop-after=dead-mi-elimination | FileCheck %s
+
+; This test shows the machine IR that is generated when lowering a shift
+; operation to a loop.
+
+define i32 @shl_i32_n(i32 %a, i32 %b) #0 {
+ ; CHECK-LABEL: name: shl_i32_n
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $r23r22, $r25r24, $r19r18
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dregs = COPY $r19r18
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dregs = COPY $r25r24
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dregs = COPY $r23r22
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr8 = COPY [[COPY]].sub_lo
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_hi, %bb.0, %15, %bb.2
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr8 = PHI [[COPY1]].sub_lo, %bb.0, %14, %bb.2
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_hi, %bb.0, %13, %bb.2
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr8 = PHI [[COPY2]].sub_lo, %bb.0, %12, %bb.2
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr8 = PHI [[COPY3]], %bb.0, %17, %bb.2
+ ; CHECK-NEXT: [[DECRd:%[0-9]+]]:gpr8 = DECRd [[PHI4]], implicit-def $sreg
+ ; CHECK-NEXT: BRMIk %bb.3, implicit $sreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDRdRr:%[0-9]+]]:gpr8 = ADDRdRr [[PHI3]], [[PHI3]], implicit-def $sreg
+ ; CHECK-NEXT: [[ADCRdRr:%[0-9]+]]:gpr8 = ADCRdRr [[PHI2]], [[PHI2]], implicit-def $sreg, implicit $sreg
+ ; CHECK-NEXT: [[ADCRdRr1:%[0-9]+]]:gpr8 = ADCRdRr [[PHI1]], [[PHI1]], implicit-def $sreg, implicit $sreg
+ ; CHECK-NEXT: [[ADCRdRr2:%[0-9]+]]:gpr8 = ADCRdRr [[PHI]], [[PHI]], implicit-def $sreg, implicit $sreg
+ ; CHECK-NEXT: RJMPk %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3 (%ir-block.0):
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI]], %subreg.sub_hi, [[PHI1]], %subreg.sub_lo
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:dregs = REG_SEQUENCE [[PHI2]], %subreg.sub_hi, [[PHI3]], %subreg.sub_lo
+ ; CHECK-NEXT: $r23r22 = COPY [[REG_SEQUENCE1]]
+ ; CHECK-NEXT: $r25r24 = COPY [[REG_SEQUENCE]]
+ ; CHECK-NEXT: RET implicit $r23r22, implicit $r25r24, implicit $r1
+ %res = shl i32 %a, %b
+ ret i32 %res
+}
Index: llvm/test/CodeGen/AVR/shift-expand.ll
===================================================================
--- llvm/test/CodeGen/AVR/shift-expand.ll
+++ /dev/null
@@ -1,89 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s
-
-; The avr-shift-expand pass expands large shifts with a non-constant shift
-; amount to a loop. These loops avoid generating a (non-existing) builtin such
-; as __ashlsi3.
-
-target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
-target triple = "avr"
-
-define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
-; CHECK-LABEL: @shl(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
-; CHECK: shift.loop:
-; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
-; CHECK-NEXT: [[TMP6]] = shl i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
-; CHECK: shift.done:
-; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: ret i32 [[TMP8]]
-;
- %result = shl i32 %value, %amount
- ret i32 %result
-}
-
-define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
-; CHECK-LABEL: @lshr(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
-; CHECK: shift.loop:
-; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
-; CHECK-NEXT: [[TMP6]] = lshr i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
-; CHECK: shift.done:
-; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: ret i32 [[TMP8]]
-;
- %result = lshr i32 %value, %amount
- ret i32 %result
-}
-
-define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
-; CHECK-LABEL: @ashr(
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
-; CHECK: shift.loop:
-; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
-; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
-; CHECK: shift.done:
-; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
-; CHECK-NEXT: ret i32 [[TMP10]]
-;
- %3 = ashr i32 %0, %1
- ret i32 %3
-}
-
-; This function is not modified because it is not an i32.
-define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
-; CHECK-LABEL: @shl40(
-; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
-; CHECK-NEXT: ret i40 [[RESULT]]
-;
- %result = shl i40 %value, %amount
- ret i40 %result
-}
-
-; This function isn't either, although perhaps it should.
-define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
-; CHECK-LABEL: @shl24(
-; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
-; CHECK-NEXT: ret i24 [[RESULT]]
-;
- %result = shl i24 %value, %amount
- ret i24 %result
-}
Index: llvm/lib/Target/AVR/CMakeLists.txt
===================================================================
--- llvm/lib/Target/AVR/CMakeLists.txt
+++ llvm/lib/Target/AVR/CMakeLists.txt
@@ -23,7 +23,6 @@
AVRISelLowering.cpp
AVRMCInstLower.cpp
AVRRegisterInfo.cpp
- AVRShiftExpand.cpp
AVRSubtarget.cpp
AVRTargetMachine.cpp
AVRTargetObjectFile.cpp
Index: llvm/lib/Target/AVR/AVRTargetMachine.cpp
===================================================================
--- llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -68,7 +68,6 @@
return getTM<AVRTargetMachine>();
}
- void addIRPasses() override;
bool addInstSelector() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -79,22 +78,12 @@
return new AVRPassConfig(*this, PM);
}
-void AVRPassConfig::addIRPasses() {
- // Expand instructions like
- // %result = shl i32 %n, %amount
- // to a loop so that library calls are avoided.
- addPass(createAVRShiftExpandPass());
-
- TargetPassConfig::addIRPasses();
-}
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
// Register the target.
RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
auto &PR = *PassRegistry::getPassRegistry();
initializeAVRExpandPseudoPass(PR);
- initializeAVRShiftExpandPass(PR);
initializeAVRDAGToDAGISelPass(PR);
}
Index: llvm/lib/Target/AVR/AVRShiftExpand.cpp
===================================================================
--- llvm/lib/Target/AVR/AVRShiftExpand.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
-/// like avr-gcc. This must be done in IR because otherwise the type legalizer
-/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AVR.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-
-using namespace llvm;
-
-namespace {
-
-class AVRShiftExpand : public FunctionPass {
-public:
- static char ID;
-
- AVRShiftExpand() : FunctionPass(ID) {}
-
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override { return "AVR Shift Expansion"; }
-
-private:
- void expand(BinaryOperator *BI);
-};
-
-} // end of anonymous namespace
-
-char AVRShiftExpand::ID = 0;
-
-INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion",
- false, false)
-
-Pass *llvm::createAVRShiftExpandPass() { return new AVRShiftExpand(); }
-
-bool AVRShiftExpand::runOnFunction(Function &F) {
- SmallVector<BinaryOperator *, 1> ShiftInsts;
- auto &Ctx = F.getContext();
- for (Instruction &I : instructions(F)) {
- if (!I.isShift())
- // Only expand shift instructions (shl, lshr, ashr).
- continue;
- if (I.getType() != Type::getInt32Ty(Ctx))
- // Only expand plain i32 types.
- continue;
- if (isa<ConstantInt>(I.getOperand(1)))
- // Only expand when the shift amount is not known.
- // Known shift amounts are (currently) better expanded inline.
- continue;
- ShiftInsts.push_back(cast<BinaryOperator>(&I));
- }
-
- // The expanding itself needs to be done separately as expand() will remove
- // these instructions. Removing instructions while iterating over a basic
- // block is not a great idea.
- for (auto *I : ShiftInsts) {
- expand(I);
- }
-
- // Return whether this function expanded any shift instructions.
- return ShiftInsts.size() > 0;
-}
-
-void AVRShiftExpand::expand(BinaryOperator *BI) {
- auto &Ctx = BI->getContext();
- IRBuilder<> Builder(BI);
- Type *Int32Ty = Type::getInt32Ty(Ctx);
- Type *Int8Ty = Type::getInt8Ty(Ctx);
- Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
-
- // Split the current basic block at the point of the existing shift
- // instruction and insert a new basic block for the loop.
- BasicBlock *BB = BI->getParent();
- Function *F = BB->getParent();
- BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB);
-
- // Truncate the shift amount to i8, which is trivially lowered to a single
- // AVR register.
- Builder.SetInsertPoint(&BB->back());
- Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty);
-
- // Replace the unconditional branch that splitBasicBlock created with a
- // conditional branch.
- Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero);
- Builder.CreateCondBr(Cmp1, EndBB, LoopBB);
- BB->back().eraseFromParent();
-
- // Create the loop body starting with PHI nodes.
- Builder.SetInsertPoint(LoopBB);
- PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
- ShiftAmountPHI->addIncoming(ShiftAmount, BB);
- PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
- ValuePHI->addIncoming(BI->getOperand(0), BB);
-
- // Subtract the shift amount by one, as we're shifting one this loop
- // iteration.
- Value *ShiftAmountSub =
- Builder.CreateSub(ShiftAmountPHI, ConstantInt::get(Int8Ty, 1));
- ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB);
-
- // Emit the actual shift instruction. The difference is that this shift
- // instruction has a constant shift amount, which can be emitted inline
- // without a library call.
- Value *ValueShifted;
- switch (BI->getOpcode()) {
- case Instruction::Shl:
- ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
- break;
- case Instruction::LShr:
- ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
- break;
- case Instruction::AShr:
- ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
- break;
- default:
- llvm_unreachable("asked to expand an instruction that is not a shift");
- }
- ValuePHI->addIncoming(ValueShifted, LoopBB);
-
- // Branch to either the loop again (if there is more to shift) or to the
- // basic block after the loop (if all bits are shifted).
- Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero);
- Builder.CreateCondBr(Cmp2, EndBB, LoopBB);
-
- // Collect the resulting value. This is necessary in the IR but won't produce
- // any actual instructions.
- Builder.SetInsertPoint(BI);
- PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
- Result->addIncoming(BI->getOperand(0), BB);
- Result->addIncoming(ValueShifted, LoopBB);
-
- // Replace the original shift instruction.
- BI->replaceAllUsesWith(Result);
- BI->eraseFromParent();
-}
Index: llvm/lib/Target/AVR/AVRISelLowering.cpp
===================================================================
--- llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -286,11 +286,6 @@
"Expected power-of-2 shift amount");
if (VT.getSizeInBits() == 32) {
- if (!isa<ConstantSDNode>(N->getOperand(1))) {
- // 32-bit shifts are converted to a loop in IR.
- // This should be unreachable.
- report_fatal_error("Expected a constant shift amount!");
- }
SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);
SDValue SrcLo =
DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
@@ -298,25 +293,34 @@
SDValue SrcHi =
DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
DAG.getConstant(1, dl, MVT::i16));
- uint64_t ShiftAmount =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- if (ShiftAmount == 16) {
- // Special case these two operations because they appear to be used by the
- // generic codegen parts to lower 32-bit numbers.
- // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
- // shift of a part of the 32-bit value?
- switch (Op.getOpcode()) {
- case ISD::SHL: {
- SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
- return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
- }
- case ISD::SRL: {
- SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
- return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
- }
+ SDValue Cnt;
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ // The amount to shift is known at compile time, so we can create an
+ // optimized sequence of instructions to shift this value.
+ uint64_t ShiftAmount =
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (ShiftAmount == 16) {
+ // Special case these two operations because they appear to be used by
+ // the generic codegen parts to lower 32-bit numbers.
+ // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
+ // shift of a part of the 32-bit value?
+ switch (Op.getOpcode()) {
+ case ISD::SHL: {
+ SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
+ }
+ case ISD::SRL: {
+ SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
+ }
+ }
}
+ Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
+ } else {
+ // The shift is not known at compile time, so we have to emit this as a
+ // loop.
+ Cnt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(1));
}
- SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
unsigned Opc;
switch (Op.getOpcode()) {
default:
@@ -1917,20 +1921,20 @@
// shifted.
// For more information and background, see this blogpost:
// https://aykevl.nl/2021/02/avr-bitshift
-static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,
+static void insertMultibyteShift(MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock *BB, const DebugLoc &DL,
MutableArrayRef<std::pair<Register, int>> Regs,
ISD::NodeType Opc, int64_t ShiftAmt) {
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const DebugLoc &dl = MI.getDebugLoc();
const bool ShiftLeft = Opc == ISD::SHL;
const bool ArithmeticShift = Opc == ISD::SRA;
// Zero a register, for use in later operations.
Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::COPY), ZeroReg)
.addReg(STI.getZeroRegister());
// Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
@@ -1949,18 +1953,18 @@
// Shift one to the right, keeping the least significant bit as the carry
// bit.
- insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
+ insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SRL, 1);
// Rotate the least significant bit from the carry bit into a new register
// (that starts out zero).
Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
// Shift one more to the right if this is a modulo-6 shift.
if (ShiftAmt % 8 == 6) {
- insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
+ insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SRL, 1);
Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
LowByte = NewLowByte;
}
@@ -1988,7 +1992,7 @@
Regs.slice(0, ShiftRegsSize);
// Shift one to the left.
- insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
+ insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SHL, 1);
// Sign or zero extend the most significant register into a new register.
// The HighByte is the byte that still has one (or two) bits from the
@@ -1998,7 +2002,7 @@
Register ExtByte = 0;
if (ArithmeticShift) {
// Sign-extend bit that was shifted out last.
- BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::SBCRdRr), HighByte)
.addReg(HighByte, RegState::Undef)
.addReg(HighByte, RegState::Undef);
ExtByte = HighByte;
@@ -2008,17 +2012,17 @@
// Use the zero register for zero extending.
ExtByte = ZeroReg;
// Rotate most significant bit into a new register (that starts out zero).
- BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), HighByte)
.addReg(ExtByte)
.addReg(ExtByte);
}
// Shift one more to the left for modulo 6 shifts.
if (ShiftAmt % 8 == 6) {
- insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
+ insertMultibyteShift(MBBI, BB, DL, ShiftRegs, ISD::SHL, 1);
// Shift the topmost bit into the HighByte.
Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), NewExt)
.addReg(HighByte)
.addReg(HighByte);
HighByte = NewExt;
@@ -2063,10 +2067,10 @@
// Sign extend the most significant register into ShrExtendReg.
ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ADDRdRr), Tmp)
.addReg(Regs[0].first, 0, Regs[0].second)
.addReg(Regs[0].first, 0, Regs[0].second);
- BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::SBCRdRr), ShrExtendReg)
.addReg(Tmp)
.addReg(Tmp);
} else {
@@ -2112,22 +2116,22 @@
for (size_t I = 0; I < Regs.size(); I++) {
size_t Idx = ShiftLeft ? I : Regs.size() - I - 1;
Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::SWAPRd), SwapReg)
.addReg(Regs[Idx].first, 0, Regs[Idx].second);
if (I != 0) {
Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::EORRdRr), R)
.addReg(Prev)
.addReg(SwapReg);
Prev = R;
}
Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ANDIRdK), AndReg)
.addReg(SwapReg)
.addImm(ShiftLeft ? 0xf0 : 0x0f);
if (I != 0) {
Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
- BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::EORRdRr), R)
.addReg(Prev)
.addReg(AndReg);
size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1;
@@ -2148,11 +2152,11 @@
Register In = Regs[I].first;
Register InSubreg = Regs[I].second;
if (I == (ssize_t)Regs.size() - 1) { // first iteration
- BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ADDRdRr), Out)
.addReg(In, 0, InSubreg)
.addReg(In, 0, InSubreg);
} else {
- BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::ADCRdRr), Out)
.addReg(In, 0, InSubreg)
.addReg(In, 0, InSubreg);
}
@@ -2168,9 +2172,10 @@
Register InSubreg = Regs[I].second;
if (I == 0) {
unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;
- BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg);
+ BuildMI(*BB, MBBI, DL, TII.get(Opc), Out).addReg(In, 0, InSubreg);
} else {
- BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg);
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::RORRd), Out)
+ .addReg(In, 0, InSubreg);
}
Regs[I] = std::pair(Out, 0);
}
@@ -2182,16 +2187,94 @@
}
}
+// Do a multibyte shift by shifting one bit at a time in a loop. It works very
+// similar to insertMultibyteShift in that it modifies the Regs array in-place
+// (the output registers are stored in this array on return).
+static MachineBasicBlock *insertMultibyteShiftLoop(
+ MachineInstr &MI, MachineBasicBlock *BB, Register ShiftNum,
+ MutableArrayRef<std::pair<Register, int>> Regs, ISD::NodeType Opc) {
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ MachineBasicBlock *EntryBB = BB;
+ MachineBasicBlock *CheckBB = MF->CreateMachineBasicBlock(BB->getBasicBlock());
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(BB->getBasicBlock());
+
+ MF->push_back(CheckBB);
+ MF->push_back(LoopBB);
+ MachineBasicBlock *ExitBB = EntryBB->splitAt(MI, false);
+
+ CheckBB->moveAfter(EntryBB);
+ LoopBB->moveAfter(CheckBB);
+ ExitBB->moveAfter(LoopBB);
+
+ EntryBB->addSuccessor(CheckBB);
+ LoopBB->addSuccessor(CheckBB);
+ CheckBB->addSuccessor(LoopBB);
+ CheckBB->addSuccessor(ExitBB);
+ EntryBB->removeSuccessor(ExitBB);
+
+ // Create virtual registers for the value phi nodes.
+ SmallVector<Register, 4> PhiRegs;
+ SmallVector<std::pair<Register, int>, 4> PhiRegPairs;
+
+ for (size_t I = 0; I < Regs.size(); I++) {
+ Register Reg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ PhiRegs.push_back(Reg);
+ PhiRegPairs.push_back(std::pair(Reg, 0));
+ }
+
+ // Shift the registers by one.
+ //
+ // Note that we build blocks kinda in a reversed-order (in reality LoopBB is
+ // *after* CheckBB), because in order to build CheckBB, we need to know the
+ // PHI nodes from LoopBB.
+ insertMultibyteShift(LoopBB->end(), LoopBB, DL, PhiRegPairs, Opc, 1);
+
+ // Jump back to the loop's body.
+ BuildMI(LoopBB, DL, TII.get(AVR::RJMPk)).addMBB(CheckBB);
+
+ // Create PHI nodes for the value that is shifted.
+ for (size_t I = 0; I < Regs.size(); I++) {
+ auto Pair = Regs[I];
+
+ BuildMI(CheckBB, DL, TII.get(AVR::PHI), PhiRegs[I])
+ .addReg(Pair.first, 0, Pair.second)
+ .addMBB(EntryBB)
+ .addReg(PhiRegPairs[I].first, 0, PhiRegPairs[I].second)
+ .addMBB(LoopBB);
+
+ Regs[I] = std::pair(PhiRegs[I], 0);
+ }
+
+ // Create a PHI node for the loop counter.
+ Register CntPhi = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+ Register CntDec = MRI.createVirtualRegister(&AVR::GPR8RegClass);
+
+ BuildMI(CheckBB, DL, TII.get(AVR::PHI), CntPhi)
+ .addReg(ShiftNum)
+ .addMBB(EntryBB)
+ .addReg(CntDec)
+ .addMBB(LoopBB);
+
+ // Decrement the counter; if we're done, jump to the exit and otherwise fall
+ // through to the CheckBB.
+ BuildMI(CheckBB, DL, TII.get(AVR::DECRd), CntDec).addReg(CntPhi);
+ BuildMI(CheckBB, DL, TII.get(AVR::BRMIk)).addMBB(ExitBB);
+
+ return ExitBB;
+}
+
// Do a wide (32-bit) shift.
MachineBasicBlock *
AVRTargetLowering::insertWideShift(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- const DebugLoc &dl = MI.getDebugLoc();
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator MBBI(&MI);
- // How much to shift to the right (meaning: a negative number indicates a left
- // shift).
- int64_t ShiftAmt = MI.getOperand(4).getImm();
ISD::NodeType Opc;
switch (MI.getOpcode()) {
case AVR::Lsl32:
@@ -2214,7 +2297,19 @@
};
// Do the shift. The registers are modified in-place.
- insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);
+ int64_t ShiftAmt = 1;
+ if (MI.getOperand(4).isImm()) {
+ // The shift amount is known at compile time.
+ ShiftAmt = MI.getOperand(4).getImm();
+ insertMultibyteShift(MBBI, BB, MI.getDebugLoc(), Registers, Opc, ShiftAmt);
+ } else {
+ // The shift amount is not known at compile time. We need to create a loop.
+ Register ShiftNum = MI.getOperand(4).getReg();
+ BB = insertMultibyteShiftLoop(MI, BB, ShiftNum, Registers, Opc);
+
+ // Insert REG_SEQUENCE instructions at the beginning of ExitBB.
+ MBBI = BB->begin();
+ }
// Combine the 8-bit registers into 16-bit register pairs.
// This done either from LSB to MSB or from MSB to LSB, depending on the
@@ -2230,24 +2325,28 @@
if (Opc != ISD::SHL &&
(Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) {
// Use the resulting registers starting with the least significant byte.
- BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE),
+ MI.getOperand(0).getReg())
.addReg(Registers[3].first, 0, Registers[3].second)
.addImm(AVR::sub_lo)
.addReg(Registers[2].first, 0, Registers[2].second)
.addImm(AVR::sub_hi);
- BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE),
+ MI.getOperand(1).getReg())
.addReg(Registers[1].first, 0, Registers[1].second)
.addImm(AVR::sub_lo)
.addReg(Registers[0].first, 0, Registers[0].second)
.addImm(AVR::sub_hi);
} else {
// Use the resulting registers starting with the most significant byte.
- BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE),
+ MI.getOperand(1).getReg())
.addReg(Registers[0].first, 0, Registers[0].second)
.addImm(AVR::sub_hi)
.addReg(Registers[1].first, 0, Registers[1].second)
.addImm(AVR::sub_lo);
- BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
+ BuildMI(*BB, MBBI, DL, TII.get(AVR::REG_SEQUENCE),
+ MI.getOperand(0).getReg())
.addReg(Registers[2].first, 0, Registers[2].second)
.addImm(AVR::sub_hi)
.addReg(Registers[3].first, 0, Registers[3].second)
Index: llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
===================================================================
--- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -229,6 +229,11 @@
return false;
}
+/// Returns whether given logic operation effectively does not depend its
+/// first argument.
+///
+/// For instrance, `reg & 0x00` will behave the same way regardless of the reg's
+/// value.
bool AVRExpandPseudo::isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const {
// ANDI Rd, 0x00 clears all input bits.
if (Op == AVR::ANDIRdK && ImmVal == 0x00)
Index: llvm/lib/Target/AVR/AVR.h
===================================================================
--- llvm/lib/Target/AVR/AVR.h
+++ llvm/lib/Target/AVR/AVR.h
@@ -25,7 +25,6 @@
class FunctionPass;
class PassRegistry;
-Pass *createAVRShiftExpandPass();
FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAVRExpandPseudoPass();
@@ -34,7 +33,6 @@
void initializeAVRDAGToDAGISelPass(PassRegistry &);
void initializeAVRExpandPseudoPass(PassRegistry &);
-void initializeAVRShiftExpandPass(PassRegistry &);
/// Contains the AVR backend.
namespace AVR {
Index: clang/docs/tools/clang-formatted-files.txt
===================================================================
--- clang/docs/tools/clang-formatted-files.txt
+++ clang/docs/tools/clang-formatted-files.txt
@@ -6384,7 +6384,6 @@
llvm/lib/Target/AVR/AVRRegisterInfo.cpp
llvm/lib/Target/AVR/AVRRegisterInfo.h
llvm/lib/Target/AVR/AVRSelectionDAGInfo.h
-llvm/lib/Target/AVR/AVRShiftExpand.cpp
llvm/lib/Target/AVR/AVRSubtarget.cpp
llvm/lib/Target/AVR/AVRSubtarget.h
llvm/lib/Target/AVR/AVRTargetMachine.cpp
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits