comaniac commented on a change in pull request #8467: URL: https://github.com/apache/tvm/pull/8467#discussion_r669863574
########## File path: src/tir/schedule/primitive/fuse_split.cc ########## @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "../utils.h" +namespace tvm { +namespace tir { + +/*! \brief Append a new predicate to the each children of type BlockRealize (not recursively) */ +class PredicateUpdater : public StmtMutator { + public: + /*! + * \brief Constructor + * \param predicate The predicate to be apppend to BlockRealizeNode + */ + explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana) + : predicate_(predicate) { + if (!ana->CanProve(predicate)) { + add_predicate_ = true; + } Review comment: ```suggestion add_predicate_ = !ana->CanProve(predicate)); ``` ########## File path: python/tvm/tir/schedule/schedule.py ########## @@ -257,6 +257,133 @@ def get_loops(self, block: BlockRV) -> List[LoopRV]: return _ffi_api_schedule.ScheduleGetLoops(self, block) # type: ignore # pylint: disable=no-member ########## Schedule: loops manipulation ########## + def fuse(self, *loops: List[LoopRV]) -> LoopRV: + """Fuse a list of consecutive loops into one. It requires: + 1) The loops can't have annotations or thread bindings. + 2) The (i+1)-th loop must be the only child of the i-th loop. + 3) All loops must start with 0. + + Parameters + ---------- + *loops : List[LoopRV] + The loops to be fused + + Returns + ---------- + fused_loop : LoopRV + The new loop after fusion + + Examples + -------- + + Before fuse, in TensorIR, the IR is: + + .. code-block:: python + + @tvm.script.tir + def before_fuse(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, (128, 128)) + with tir.block([128, 128], "B") as [vi, vj]: + B[vi, vj] = A[vi, vj] * 2.0 + + Create the schedule and do fuse: + + .. code-block:: python + + sch = tir.Schedule(before_fuse, debug_mode=True) + i, j = sch.get_loops(sch.get_block("B")) + sch.fuse(i, j) + print(tvm.script.asscript(sch.mod["main"])) + + After applying fuse, the IR becomes: + + .. code-block:: python + + @tvm.script.tir + def after_fuse(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, [128, 128]) + for i0_i1_fused in tir.serial(0, 16384): + with tir.block([128, 128], "B") as [vi, vj]: + tir.bind(vi, tir.floordiv(i0_i1_fused, 128)) + tir.bind(vj, tir.floormod(i0_i1_fused, 128)) + tir.reads([A[vi, vj]]) + tir.writes([B[vi, vj]]) + B[vi, vj] = A[vi, vj] * 2.0 + + """ + return _ffi_api_schedule.ScheduleFuse(self, loops) # type: ignore # pylint: disable=no-member + + def split( + self, + loop: LoopRV, + factors: List[Optional[ExprRV]], Review comment: nit: I'll prefer the following typing, as the element in this list is not really "optional". ```suggestion factors: List[Union[ExprRV, None]], ``` ########## File path: src/tir/schedule/primitive/fuse_split.cc ########## @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "../utils.h" +namespace tvm { +namespace tir { + +/*! \brief Append a new predicate to the each children of type BlockRealize (not recursively) */ +class PredicateUpdater : public StmtMutator { + public: + /*! + * \brief Constructor + * \param predicate The predicate to be apppend to BlockRealizeNode + */ + explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana) + : predicate_(predicate) { + if (!ana->CanProve(predicate)) { + add_predicate_ = true; + } + } + + private: + // For each direct child of type BlockRealizeNode, append the predicate + Stmt VisitStmt_(const BlockRealizeNode* realize) final { + // We do not recursively do this + if (add_predicate_) { + ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize); + n->predicate = n->predicate && predicate_; + return BlockRealize(n); + } else { + return GetRef<BlockRealize>(realize); + } + } + + /*! \brief The predicate to be added */ + const PrimExpr& predicate_; + /*! \brief whether to add predicate */ + bool add_predicate_; +}; +/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */ Review comment: Add an empty line. ########## File path: python/tvm/tir/schedule/schedule.py ########## @@ -43,7 +43,7 @@ class BlockRV(Object): """A random variable that refers to a block""" -ExprRV = PrimExpr # A random variable that evaluates to an integer +ExprRV = Union[PrimExpr] # A random variable that evaluates to an integer Review comment: This is exactly the same as `PrimExpr`? ########## File path: src/tir/schedule/primitive/fuse_split.cc ########## @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "../utils.h" +namespace tvm { +namespace tir { + +/*! \brief Append a new predicate to the each children of type BlockRealize (not recursively) */ +class PredicateUpdater : public StmtMutator { + public: + /*! + * \brief Constructor + * \param predicate The predicate to be apppend to BlockRealizeNode + */ + explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana) + : predicate_(predicate) { + if (!ana->CanProve(predicate)) { + add_predicate_ = true; + } + } + + private: + // For each direct child of type BlockRealizeNode, append the predicate + Stmt VisitStmt_(const BlockRealizeNode* realize) final { + // We do not recursively do this + if (add_predicate_) { + ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize); + n->predicate = n->predicate && predicate_; + return BlockRealize(n); + } else { + return GetRef<BlockRealize>(realize); + } + } + + /*! \brief The predicate to be added */ + const PrimExpr& predicate_; + /*! \brief whether to add predicate */ + bool add_predicate_; +}; +/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */ +class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator { + public: + explicit IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> vmap, + Map<Block, Block>* opaque_blocks) + : vmap_(vmap), opaque_blocks_(opaque_blocks) {} + + private: + PrimExpr VisitExpr_(const VarNode* op) final { + Var var = GetRef<Var>(op); + Optional<PrimExpr> ret = vmap_(var); + if (ret.defined()) { + return ret.value(); + } else { + return std::move(var); + } + } + + Stmt VisitStmt_(const BlockRealizeNode* op) final { + Stmt res = StmtMutator::VisitStmt_(op); + if (op->block->iter_vars.empty()) { + const BlockRealizeNode* realize = res.as<BlockRealizeNode>(); + opaque_blocks_->Set(op->block, realize->block); + } + return res; + } + + /*! \brief The substitute function */ + std::function<Optional<PrimExpr>(const Var&)> vmap_; + /*! \brief The reuse mapping */ + Map<Block, Block>* opaque_blocks_; +}; + +Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* opaque_blocks, + std::function<Optional<PrimExpr>(const Var&)> vmap) { + return IRSubstituteAndCollectOpaqueBlock(vmap, opaque_blocks)(std::move(stmt)); +} + +/*! \brief Simplify the binding of block realize and update the opaque block reuse mapping*/ +class BlockRealizeRewriter : public StmtExprMutator { + public: + explicit BlockRealizeRewriter( + const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& loop_map, + Map<Block, Block>* opaque_blocks) + : opaque_blocks_(opaque_blocks) { + loop_map_.insert(loop_map.begin(), loop_map.end()); + } + + private: + Stmt VisitStmt_(const ForNode* op) final { + loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent); + Stmt res = StmtMutator::VisitStmt_(op); + loop_map_.erase(op->loop_var); + return res; + } + + Stmt VisitStmt_(const BlockRealizeNode* op) final { + // skip opaque block and update mapping + if (op->iter_values.empty()) { + Stmt res = StmtMutator::VisitStmt_(op); + const BlockRealizeNode* realize = res.as<BlockRealizeNode>(); + for (const std::pair<Block, Block>& entry : *opaque_blocks_) { + if (entry.second.same_as(op->block)) { + opaque_blocks_->Set(entry.first, realize->block); + break; + } + } + return res; + } + auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, false); + if (v.same_as(op->iter_values)) { + return GetRef<Stmt>(op); + } else { + auto n = CopyOnWrite(op); + n->iter_values = std::move(v); + return Stmt(n); + } + } + /*! \brief The range of loops */ + std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_; + /*! \brief The reuse mapping */ + Map<Block, Block>* opaque_blocks_; +}; + +Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops, + Map<Block, Block>* opaque_blocks) { + std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map; + for (const StmtSRef& sref : loops) { + const auto* loop = sref->StmtAs<ForNode>(); + loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent); + } + BlockRealizeRewriter rewriter(loop_map, opaque_blocks); + return rewriter(stmt); +} + +class NotLoopError : public ScheduleError { + public: + explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {} + + String FastErrorString() const final { + return "ScheduleError: this primitive only operates on a " + "loop"; Review comment: ```suggestion return "ScheduleError: this primitive only operates on a loop"; ``` ########## File path: python/tvm/tir/schedule/schedule.py ########## @@ -257,6 +257,133 @@ def get_loops(self, block: BlockRV) -> List[LoopRV]: return _ffi_api_schedule.ScheduleGetLoops(self, block) # type: ignore # pylint: disable=no-member ########## Schedule: loops manipulation ########## + def fuse(self, *loops: List[LoopRV]) -> LoopRV: + """Fuse a list of consecutive loops into one. It requires: + 1) The loops can't have annotations or thread bindings. + 2) The (i+1)-th loop must be the only child of the i-th loop. + 3) All loops must start with 0. + + Parameters + ---------- + *loops : List[LoopRV] + The loops to be fused + + Returns + ---------- + fused_loop : LoopRV + The new loop after fusion + + Examples + -------- + + Before fuse, in TensorIR, the IR is: + + .. code-block:: python + + @tvm.script.tir + def before_fuse(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, (128, 128)) + with tir.block([128, 128], "B") as [vi, vj]: + B[vi, vj] = A[vi, vj] * 2.0 + + Create the schedule and do fuse: + + .. code-block:: python + + sch = tir.Schedule(before_fuse, debug_mode=True) + i, j = sch.get_loops(sch.get_block("B")) + sch.fuse(i, j) + print(tvm.script.asscript(sch.mod["main"])) + + After applying fuse, the IR becomes: + + .. code-block:: python + + @tvm.script.tir + def after_fuse(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, [128, 128]) + for i0_i1_fused in tir.serial(0, 16384): + with tir.block([128, 128], "B") as [vi, vj]: + tir.bind(vi, tir.floordiv(i0_i1_fused, 128)) + tir.bind(vj, tir.floormod(i0_i1_fused, 128)) + tir.reads([A[vi, vj]]) + tir.writes([B[vi, vj]]) + B[vi, vj] = A[vi, vj] * 2.0 + + """ + return _ffi_api_schedule.ScheduleFuse(self, loops) # type: ignore # pylint: disable=no-member + + def split( + self, + loop: LoopRV, + factors: List[Optional[ExprRV]], + ) -> List[LoopRV]: + """Split a loop into a list of consecutive loops. It requires: + 1) The loop can't have annotation or thread binding. + 2) The loop must start with 0. + Predicates may be added to ensure the total loop numbers keeps unchanged. + In `factors`, at most one of the factors can be None or -1, + which will be automatically inferred. + Parameters + ---------- + loop : LoopRV + The loop to be split + + factors: List[Optional[ExprRV]] + The splitting factors + + Returns + ---------- + split_loops : List[LoopRV] + The new loops after split + + Examples + -------- + + Before split, in TensorIR, the IR is: + + .. code-block:: python + + @tvm.script.tir + def before_split(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, (128, 128)) + with tir.block([128, 128], "B") as [vi, vj]: + B[vi, vj] = A[vi, vj] * 2.0 + + Create the schedule and do fuse: + + .. code-block:: python + + sch = tir.Schedule(before_split, debug_mode=True) + i, j = sch.get_loops(sch.get_block("B")) + sch.split(i, factors=[2, 64]) + print(tvm.script.asscript(sch.mod["main"])) + + After applying split, the IR becomes: + + .. code-block:: python + + @tvm.script.tir + def after_split(a: ty.handle, b: ty.handle) -> None: + A = tir.match_buffer(a, (128, 128)) + B = tir.match_buffer(b, [128, 128]) + for i0_outer, i0_inner, i1 in tir.grid(2, 64, 128): + with tir.block([128, 128], "B") as [vi, vj]: + tir.bind(vi, ((i0_outer*64) + i0_inner)) + tir.bind(vj, i1) + tir.reads([A[vi, vj]]) + tir.writes([B[vi, vj]]) + B[vi, vj] = A[vi, vj] * 2.0 + + """ + for i, factor in enumerate(factors): + if factor is None: Review comment: Need to check if it has at most one `None`? Update: I found the check in the C++ implementation. Then it would be better to comment here saying that this will be checked later. ########## File path: src/tir/schedule/primitive/fuse_split.cc ########## @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "../utils.h" +namespace tvm { +namespace tir { + +/*! \brief Append a new predicate to the each children of type BlockRealize (not recursively) */ +class PredicateUpdater : public StmtMutator { + public: + /*! + * \brief Constructor + * \param predicate The predicate to be apppend to BlockRealizeNode + */ + explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana) + : predicate_(predicate) { + if (!ana->CanProve(predicate)) { + add_predicate_ = true; + } + } + + private: + // For each direct child of type BlockRealizeNode, append the predicate + Stmt VisitStmt_(const BlockRealizeNode* realize) final { + // We do not recursively do this + if (add_predicate_) { + ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize); + n->predicate = n->predicate && predicate_; + return BlockRealize(n); + } else { + return GetRef<BlockRealize>(realize); + } + } + + /*! \brief The predicate to be added */ + const PrimExpr& predicate_; + /*! \brief whether to add predicate */ + bool add_predicate_; +}; +/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */ +class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator { + public: + explicit IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> vmap, + Map<Block, Block>* opaque_blocks) + : vmap_(vmap), opaque_blocks_(opaque_blocks) {} + + private: + PrimExpr VisitExpr_(const VarNode* op) final { + Var var = GetRef<Var>(op); + Optional<PrimExpr> ret = vmap_(var); + if (ret.defined()) { + return ret.value(); + } else { + return std::move(var); + } + } + + Stmt VisitStmt_(const BlockRealizeNode* op) final { + Stmt res = StmtMutator::VisitStmt_(op); + if (op->block->iter_vars.empty()) { + const BlockRealizeNode* realize = res.as<BlockRealizeNode>(); + opaque_blocks_->Set(op->block, realize->block); + } + return res; + } + + /*! \brief The substitute function */ + std::function<Optional<PrimExpr>(const Var&)> vmap_; + /*! \brief The reuse mapping */ + Map<Block, Block>* opaque_blocks_; +}; + +Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* opaque_blocks, + std::function<Optional<PrimExpr>(const Var&)> vmap) { + return IRSubstituteAndCollectOpaqueBlock(vmap, opaque_blocks)(std::move(stmt)); +} + +/*! \brief Simplify the binding of block realize and update the opaque block reuse mapping*/ +class BlockRealizeRewriter : public StmtExprMutator { + public: + explicit BlockRealizeRewriter( + const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& loop_map, + Map<Block, Block>* opaque_blocks) + : opaque_blocks_(opaque_blocks) { + loop_map_.insert(loop_map.begin(), loop_map.end()); + } + + private: + Stmt VisitStmt_(const ForNode* op) final { + loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent); + Stmt res = StmtMutator::VisitStmt_(op); + loop_map_.erase(op->loop_var); + return res; + } + + Stmt VisitStmt_(const BlockRealizeNode* op) final { + // skip opaque block and update mapping + if (op->iter_values.empty()) { + Stmt res = StmtMutator::VisitStmt_(op); + const BlockRealizeNode* realize = res.as<BlockRealizeNode>(); + for (const std::pair<Block, Block>& entry : *opaque_blocks_) { + if (entry.second.same_as(op->block)) { + opaque_blocks_->Set(entry.first, realize->block); + break; + } + } + return res; + } + auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, false); + if (v.same_as(op->iter_values)) { + return GetRef<Stmt>(op); + } else { + auto n = CopyOnWrite(op); + n->iter_values = std::move(v); + return Stmt(n); + } + } + /*! \brief The range of loops */ + std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_; + /*! \brief The reuse mapping */ + Map<Block, Block>* opaque_blocks_; +}; + +Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops, + Map<Block, Block>* opaque_blocks) { + std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map; + for (const StmtSRef& sref : loops) { + const auto* loop = sref->StmtAs<ForNode>(); + loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent); + } + BlockRealizeRewriter rewriter(loop_map, opaque_blocks); + return rewriter(stmt); +} + +class NotLoopError : public ScheduleError { + public: + explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {} + + String FastErrorString() const final { + return "ScheduleError: this primitive only operates on a " + "loop"; + } + + String DetailRenderTemplate() const final { + return "this primitive only operates on a loop, but the StmtSref passed in points to" + "type: {0} "; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {type_}; } + + IRModule mod_; + String type_; +}; + +class HasAnnotationError : public ScheduleError { + public: + explicit HasAnnotationError(IRModule mod, For loop) : mod_(mod), loop_(loop) {} + + String FastErrorString() const final { + return "ScheduleError: The primitive can't be applied because the loop has annotation"; + } + + String DetailRenderTemplate() const final { + return "The primitive can't be applied because the loop {0} has annotation"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; } + + IRModule mod_; + For loop_; +}; + +class HasThreadBindingError : public ScheduleError { + public: + explicit HasThreadBindingError(IRModule mod, For loop) : mod_(mod), loop_(loop) {} + + String FastErrorString() const final { + return "ScheduleError: The primitive can't be applied because the loop has thread binding"; + } + + String DetailRenderTemplate() const final { + return "The primitive can't be applied because the loop {0} has thread binding"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; } + + IRModule mod_; + For loop_; +}; + +class OuterNotInnerParent : public ScheduleError { + public: + explicit OuterNotInnerParent(IRModule mod, For outer, For inner) + : mod_(mod), outer_(outer), inner_(inner) {} + + String FastErrorString() const final { + return "ScheduleError: the outer loop is not the parent of the inner loop"; + } + + String DetailRenderTemplate() const final { + return "The loops can't be fused because the outer loop {0} is not the parent of the inner " + "loop {1}"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {outer_, inner_}; } + + IRModule mod_; + For outer_; + For inner_; +}; + +class NotOnlyChildError : public ScheduleError { + public: + explicit NotOnlyChildError(IRModule mod, For outer, For inner) + : mod_(mod), outer_(outer), inner_(inner) {} + + String FastErrorString() const final { + return "ScheduleError: the inner loop is not the only child of outer loop"; + } + + String DetailRenderTemplate() const final { + return "The loops can't be fused because the inner loop {1} is not the only child of outer " + "loop {0}."; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {outer_, inner_}; } + + IRModule mod_; + For outer_; + For inner_; +}; + +class LoopNotStartWithZeroError : public ScheduleError { + public: + explicit LoopNotStartWithZeroError(IRModule mod, For loop) : mod_(mod), loop_(loop) {} + + String FastErrorString() const final { + return "ScheduleError: the primitive only supports loop starting with 0"; + } + + String DetailRenderTemplate() const final { + return "The loop {0} does not start with 0, which is not supported"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; } + + IRModule mod_; + For loop_; +}; + +class NotSingleInferFactorError : public ScheduleError { + public: + explicit NotSingleInferFactorError(IRModule mod) : mod_(mod) {} + + String FastErrorString() const final { + return "ScheduleError: only one factor can be specified as -1 or none"; + } + + String DetailRenderTemplate() const final { + return "Only one factor can be specified as -1 or none"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {}; } + + IRModule mod_; +}; + +class WrongFactorProductError : public ScheduleError { + public: + explicit WrongFactorProductError(IRModule mod, For loop) : mod_(mod), loop_(loop) {} + + String FastErrorString() const final { + return "ScheduleError: The product of factors is not larger than or equal to the extent of " + "loop"; + } + + String DetailRenderTemplate() const final { + return "The product of factors is not larger than or equal to the extent of loop {0}"; + } + + IRModule mod() const final { return mod_; } + Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; } + + IRModule mod_; + For loop_; +}; + +Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref, + const Array<PrimExpr>& factors) { + // Invariance + // - The total repeat number has not changed for each direct child block with updating predicate. + // - The execution order has not changed. (The block executes with the same args and the same + // order with before. + // Step 1. Check correctness + GetScopeRootAndCheckStagePipeline(self, loop_sref); + const auto* loop = loop_sref->StmtAs<ForNode>(); + if (loop == nullptr) { + throw NotLoopError(self->mod, loop_sref->stmt->GetTypeKey()); + } + if (!loop->annotations.empty()) { + throw HasAnnotationError(self->mod, GetRef<For>(loop)); + } + if (loop->thread_binding.defined()) { + throw HasThreadBindingError(self->mod, GetRef<For>(loop)); + } + // Currently, loops starting with 0 is not supported + arith::Analyzer analyzer; + if (!analyzer.CanProve(loop->min == 0)) { + throw LoopNotStartWithZeroError(self->mod, GetRef<For>(loop)); + } + PrimExpr tot_length = 1; + int infer_index = -1; + for (size_t i = 0; i < factors.size(); i++) { + if (!analyzer.CanProve(factors[i] == -1)) { + tot_length *= factors[i]; + } else { + if (infer_index != -1) { + throw NotSingleInferFactorError(self->mod); + } else { + infer_index = i; + } + } + } + // Step 2. infer factors if needed + Array<PrimExpr> inferred_factors(factors); + if (infer_index != -1) { + inferred_factors.Set(infer_index, + analyzer.Simplify(floordiv(loop->extent + tot_length - 1, tot_length))); + } else { + if (!analyzer.CanProve(tot_length >= loop->extent)) { + throw WrongFactorProductError(self->mod, GetRef<For>(loop)); + } Review comment: ```suggestion } else if (!analyzer.CanProve(tot_length >= loop->extent)) { throw WrongFactorProductError(self->mod, GetRef<For>(loop)); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
