junrushao1994 commented on a change in pull request #8467:
URL: https://github.com/apache/tvm/pull/8467#discussion_r670062536



##########
File path: src/tir/schedule/primitive/fuse_split.cc
##########
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "../utils.h"
+namespace tvm {
+namespace tir {
+
+/*! \brief Append a new predicate to the each children of type BlockRealize 
(not recursively) */
+class PredicateUpdater : public StmtMutator {
+ public:
+  /*!
+   * \brief Constructor
+   * \param predicate The predicate to be apppend to BlockRealizeNode
+   */
+  explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana)
+      : predicate_(predicate) {
+    if (!ana->CanProve(predicate)) {
+      add_predicate_ = true;
+    }
+  }
+
+ private:
+  // For each direct child of type BlockRealizeNode, append the predicate
+  Stmt VisitStmt_(const BlockRealizeNode* realize) final {
+    // We do not recursively do this
+    if (add_predicate_) {
+      ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize);
+      n->predicate = n->predicate && predicate_;
+      return BlockRealize(n);
+    } else {
+      return GetRef<BlockRealize>(realize);
+    }
+  }
+
+  /*! \brief The predicate to be added */
+  const PrimExpr& predicate_;
+  /*! \brief whether to add predicate */
+  bool add_predicate_;
+};
+/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */
+class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator {
+ public:
+  explicit 
IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> 
vmap,
+                                             Map<Block, Block>* opaque_blocks)
+      : vmap_(vmap), opaque_blocks_(opaque_blocks) {}
+
+ private:
+  PrimExpr VisitExpr_(const VarNode* op) final {
+    Var var = GetRef<Var>(op);
+    Optional<PrimExpr> ret = vmap_(var);
+    if (ret.defined()) {
+      return ret.value();
+    } else {
+      return std::move(var);
+    }
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    Stmt res = StmtMutator::VisitStmt_(op);
+    if (op->block->iter_vars.empty()) {
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      opaque_blocks_->Set(op->block, realize->block);
+    }
+    return res;
+  }
+
+  /*! \brief The substitute function */
+  std::function<Optional<PrimExpr>(const Var&)> vmap_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* 
opaque_blocks,
+                                     std::function<Optional<PrimExpr>(const 
Var&)> vmap) {
+  return IRSubstituteAndCollectOpaqueBlock(vmap, 
opaque_blocks)(std::move(stmt));
+}
+
+/*! \brief Simplify the binding of block realize and update the opaque block 
reuse mapping*/
+class BlockRealizeRewriter : public StmtExprMutator {
+ public:
+  explicit BlockRealizeRewriter(
+      const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& 
loop_map,
+      Map<Block, Block>* opaque_blocks)
+      : opaque_blocks_(opaque_blocks) {
+    loop_map_.insert(loop_map.begin(), loop_map.end());
+  }
+
+ private:
+  Stmt VisitStmt_(const ForNode* op) final {
+    loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent);
+    Stmt res = StmtMutator::VisitStmt_(op);
+    loop_map_.erase(op->loop_var);
+    return res;
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    // skip opaque block and update mapping
+    if (op->iter_values.empty()) {
+      Stmt res = StmtMutator::VisitStmt_(op);
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      for (const std::pair<Block, Block>& entry : *opaque_blocks_) {
+        if (entry.second.same_as(op->block)) {
+          opaque_blocks_->Set(entry.first, realize->block);
+          break;
+        }
+      }
+      return res;
+    }
+    auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, 
false);
+    if (v.same_as(op->iter_values)) {
+      return GetRef<Stmt>(op);
+    } else {
+      auto n = CopyOnWrite(op);
+      n->iter_values = std::move(v);
+      return Stmt(n);
+    }
+  }
+  /*! \brief The range of loops */
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops,
+                      Map<Block, Block>* opaque_blocks) {
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map;
+  for (const StmtSRef& sref : loops) {
+    const auto* loop = sref->StmtAs<ForNode>();
+    loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent);
+  }
+  BlockRealizeRewriter rewriter(loop_map, opaque_blocks);
+  return rewriter(stmt);
+}
+
+class NotLoopError : public ScheduleError {
+ public:
+  explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: this primitive only operates on a "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "this primitive only operates on a loop, but the StmtSref passed in 
points to"
+           "type: {0} ";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {type_}; }
+
+  IRModule mod_;
+  String type_;
+};
+
+class HasAnnotationError : public ScheduleError {
+ public:
+  explicit HasAnnotationError(IRModule mod, For loop) : mod_(mod), loop_(loop) 
{}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
annotation";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has 
annotation";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class HasThreadBindingError : public ScheduleError {
+ public:
+  explicit HasThreadBindingError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
thread binding";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has thread 
binding";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class OuterNotInnerParent : public ScheduleError {
+ public:
+  explicit OuterNotInnerParent(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the outer loop is not the parent of the inner loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the outer loop {0} is not the 
parent of the inner "
+           "loop {1}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class NotOnlyChildError : public ScheduleError {
+ public:
+  explicit NotOnlyChildError(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the inner loop is not the only child of outer loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the inner loop {1} is not the 
only child of outer "
+           "loop {0}.";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class LoopNotStartWithZeroError : public ScheduleError {
+ public:
+  explicit LoopNotStartWithZeroError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the primitive only supports loop starting with 0";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loop {0} does not start with 0, which is not supported";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class NotSingleInferFactorError : public ScheduleError {
+ public:
+  explicit NotSingleInferFactorError(IRModule mod) : mod_(mod) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: only one factor can be specified as -1 or none";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "Only one factor can be specified as -1 or none";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {}; }
+
+  IRModule mod_;
+};
+
+class WrongFactorProductError : public ScheduleError {
+ public:
+  explicit WrongFactorProductError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The product of factors is not larger than or equal 
to the extent of "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The product of factors is not larger than or equal to the extent 
of loop {0}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
+                      const Array<PrimExpr>& factors) {
+  // Invariance
+  // - The total repeat number has not changed for each direct child block 
with updating predicate.
+  // - The execution order has not changed. (The block executes with the same 
args and the same
+  // order with before.
+  // Step 1. Check correctness
+  GetScopeRootAndCheckStagePipeline(self, loop_sref);
+  const auto* loop = loop_sref->StmtAs<ForNode>();
+  if (loop == nullptr) {
+    throw NotLoopError(self->mod, loop_sref->stmt->GetTypeKey());
+  }

Review comment:
       Remove this. it should be an internal error, because on the user side we 
have already guarantee that it is a LoopRV

##########
File path: src/tir/schedule/primitive/fuse_split.cc
##########
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "../utils.h"
+namespace tvm {
+namespace tir {
+
+/*! \brief Append a new predicate to the each children of type BlockRealize 
(not recursively) */
+class PredicateUpdater : public StmtMutator {
+ public:
+  /*!
+   * \brief Constructor
+   * \param predicate The predicate to be apppend to BlockRealizeNode
+   */
+  explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana)
+      : predicate_(predicate) {
+    if (!ana->CanProve(predicate)) {
+      add_predicate_ = true;
+    }
+  }
+
+ private:
+  // For each direct child of type BlockRealizeNode, append the predicate
+  Stmt VisitStmt_(const BlockRealizeNode* realize) final {
+    // We do not recursively do this
+    if (add_predicate_) {
+      ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize);
+      n->predicate = n->predicate && predicate_;
+      return BlockRealize(n);
+    } else {
+      return GetRef<BlockRealize>(realize);
+    }
+  }
+
+  /*! \brief The predicate to be added */
+  const PrimExpr& predicate_;
+  /*! \brief whether to add predicate */
+  bool add_predicate_;
+};
+/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */
+class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator {
+ public:
+  explicit 
IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> 
vmap,
+                                             Map<Block, Block>* opaque_blocks)
+      : vmap_(vmap), opaque_blocks_(opaque_blocks) {}
+
+ private:
+  PrimExpr VisitExpr_(const VarNode* op) final {
+    Var var = GetRef<Var>(op);
+    Optional<PrimExpr> ret = vmap_(var);
+    if (ret.defined()) {
+      return ret.value();
+    } else {
+      return std::move(var);
+    }
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    Stmt res = StmtMutator::VisitStmt_(op);
+    if (op->block->iter_vars.empty()) {
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      opaque_blocks_->Set(op->block, realize->block);
+    }
+    return res;
+  }
+
+  /*! \brief The substitute function */
+  std::function<Optional<PrimExpr>(const Var&)> vmap_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* 
opaque_blocks,
+                                     std::function<Optional<PrimExpr>(const 
Var&)> vmap) {
+  return IRSubstituteAndCollectOpaqueBlock(vmap, 
opaque_blocks)(std::move(stmt));
+}
+
+/*! \brief Simplify the binding of block realize and update the opaque block 
reuse mapping*/
+class BlockRealizeRewriter : public StmtExprMutator {
+ public:
+  explicit BlockRealizeRewriter(
+      const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& 
loop_map,
+      Map<Block, Block>* opaque_blocks)
+      : opaque_blocks_(opaque_blocks) {
+    loop_map_.insert(loop_map.begin(), loop_map.end());
+  }
+
+ private:
+  Stmt VisitStmt_(const ForNode* op) final {
+    loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent);
+    Stmt res = StmtMutator::VisitStmt_(op);
+    loop_map_.erase(op->loop_var);
+    return res;
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    // skip opaque block and update mapping
+    if (op->iter_values.empty()) {
+      Stmt res = StmtMutator::VisitStmt_(op);
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      for (const std::pair<Block, Block>& entry : *opaque_blocks_) {
+        if (entry.second.same_as(op->block)) {
+          opaque_blocks_->Set(entry.first, realize->block);
+          break;
+        }
+      }
+      return res;
+    }
+    auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, 
false);
+    if (v.same_as(op->iter_values)) {
+      return GetRef<Stmt>(op);
+    } else {
+      auto n = CopyOnWrite(op);
+      n->iter_values = std::move(v);
+      return Stmt(n);
+    }
+  }
+  /*! \brief The range of loops */
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops,
+                      Map<Block, Block>* opaque_blocks) {
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map;
+  for (const StmtSRef& sref : loops) {
+    const auto* loop = sref->StmtAs<ForNode>();
+    loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent);
+  }
+  BlockRealizeRewriter rewriter(loop_map, opaque_blocks);
+  return rewriter(stmt);
+}
+
+class NotLoopError : public ScheduleError {
+ public:
+  explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: this primitive only operates on a "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "this primitive only operates on a loop, but the StmtSref passed in 
points to"
+           "type: {0} ";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {type_}; }
+
+  IRModule mod_;
+  String type_;
+};
+
+class HasAnnotationError : public ScheduleError {
+ public:
+  explicit HasAnnotationError(IRModule mod, For loop) : mod_(mod), loop_(loop) 
{}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
annotation";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has 
annotation";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class HasThreadBindingError : public ScheduleError {
+ public:
+  explicit HasThreadBindingError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
thread binding";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has thread 
binding";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class OuterNotInnerParent : public ScheduleError {
+ public:
+  explicit OuterNotInnerParent(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the outer loop is not the parent of the inner loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the outer loop {0} is not the 
parent of the inner "
+           "loop {1}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class NotOnlyChildError : public ScheduleError {
+ public:
+  explicit NotOnlyChildError(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the inner loop is not the only child of outer loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the inner loop {1} is not the 
only child of outer "
+           "loop {0}.";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class LoopNotStartWithZeroError : public ScheduleError {
+ public:
+  explicit LoopNotStartWithZeroError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the primitive only supports loop starting with 0";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loop {0} does not start with 0, which is not supported";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class NotSingleInferFactorError : public ScheduleError {
+ public:
+  explicit NotSingleInferFactorError(IRModule mod) : mod_(mod) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: only one factor can be specified as -1 or none";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "Only one factor can be specified as -1 or none";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {}; }
+
+  IRModule mod_;
+};
+
+class WrongFactorProductError : public ScheduleError {
+ public:
+  explicit WrongFactorProductError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The product of factors is not larger than or equal 
to the extent of "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The product of factors is not larger than or equal to the extent 
of loop {0}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
+                      const Array<PrimExpr>& factors) {
+  // Invariance
+  // - The total repeat number has not changed for each direct child block 
with updating predicate.
+  // - The execution order has not changed. (The block executes with the same 
args and the same
+  // order with before.
+  // Step 1. Check correctness
+  GetScopeRootAndCheckStagePipeline(self, loop_sref);
+  const auto* loop = loop_sref->StmtAs<ForNode>();
+  if (loop == nullptr) {
+    throw NotLoopError(self->mod, loop_sref->stmt->GetTypeKey());
+  }
+  if (!loop->annotations.empty()) {
+    throw HasAnnotationError(self->mod, GetRef<For>(loop));
+  }
+  if (loop->thread_binding.defined()) {
+    throw HasThreadBindingError(self->mod, GetRef<For>(loop));
+  }

Review comment:
       Merge these two errors so that it is more succinct

##########
File path: src/tir/schedule/primitive/fuse_split.cc
##########
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "../utils.h"
+namespace tvm {
+namespace tir {
+
+/*! \brief Append a new predicate to the each children of type BlockRealize 
(not recursively) */
+class PredicateUpdater : public StmtMutator {
+ public:
+  /*!
+   * \brief Constructor
+   * \param predicate The predicate to be apppend to BlockRealizeNode
+   */
+  explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana)
+      : predicate_(predicate) {
+    if (!ana->CanProve(predicate)) {
+      add_predicate_ = true;
+    }
+  }
+
+ private:
+  // For each direct child of type BlockRealizeNode, append the predicate
+  Stmt VisitStmt_(const BlockRealizeNode* realize) final {
+    // We do not recursively do this
+    if (add_predicate_) {
+      ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize);
+      n->predicate = n->predicate && predicate_;
+      return BlockRealize(n);
+    } else {
+      return GetRef<BlockRealize>(realize);
+    }
+  }
+
+  /*! \brief The predicate to be added */
+  const PrimExpr& predicate_;
+  /*! \brief whether to add predicate */
+  bool add_predicate_;
+};
+/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */
+class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator {
+ public:
+  explicit 
IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> 
vmap,
+                                             Map<Block, Block>* opaque_blocks)
+      : vmap_(vmap), opaque_blocks_(opaque_blocks) {}
+
+ private:
+  PrimExpr VisitExpr_(const VarNode* op) final {
+    Var var = GetRef<Var>(op);
+    Optional<PrimExpr> ret = vmap_(var);
+    if (ret.defined()) {
+      return ret.value();
+    } else {
+      return std::move(var);
+    }
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    Stmt res = StmtMutator::VisitStmt_(op);
+    if (op->block->iter_vars.empty()) {
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      opaque_blocks_->Set(op->block, realize->block);
+    }
+    return res;
+  }
+
+  /*! \brief The substitute function */
+  std::function<Optional<PrimExpr>(const Var&)> vmap_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* 
opaque_blocks,
+                                     std::function<Optional<PrimExpr>(const 
Var&)> vmap) {
+  return IRSubstituteAndCollectOpaqueBlock(vmap, 
opaque_blocks)(std::move(stmt));
+}
+
+/*! \brief Simplify the binding of block realize and update the opaque block 
reuse mapping*/
+class BlockRealizeRewriter : public StmtExprMutator {
+ public:
+  explicit BlockRealizeRewriter(
+      const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& 
loop_map,
+      Map<Block, Block>* opaque_blocks)
+      : opaque_blocks_(opaque_blocks) {
+    loop_map_.insert(loop_map.begin(), loop_map.end());
+  }
+
+ private:
+  Stmt VisitStmt_(const ForNode* op) final {
+    loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent);
+    Stmt res = StmtMutator::VisitStmt_(op);
+    loop_map_.erase(op->loop_var);
+    return res;
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    // skip opaque block and update mapping
+    if (op->iter_values.empty()) {
+      Stmt res = StmtMutator::VisitStmt_(op);
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      for (const std::pair<Block, Block>& entry : *opaque_blocks_) {
+        if (entry.second.same_as(op->block)) {
+          opaque_blocks_->Set(entry.first, realize->block);
+          break;
+        }
+      }
+      return res;
+    }
+    auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, 
false);
+    if (v.same_as(op->iter_values)) {
+      return GetRef<Stmt>(op);
+    } else {
+      auto n = CopyOnWrite(op);
+      n->iter_values = std::move(v);
+      return Stmt(n);
+    }
+  }
+  /*! \brief The range of loops */
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops,
+                      Map<Block, Block>* opaque_blocks) {
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map;
+  for (const StmtSRef& sref : loops) {
+    const auto* loop = sref->StmtAs<ForNode>();
+    loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent);
+  }
+  BlockRealizeRewriter rewriter(loop_map, opaque_blocks);
+  return rewriter(stmt);
+}
+
+class NotLoopError : public ScheduleError {
+ public:
+  explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: this primitive only operates on a "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "this primitive only operates on a loop, but the StmtSref passed in 
points to"
+           "type: {0} ";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {type_}; }
+
+  IRModule mod_;
+  String type_;
+};
+
+class HasAnnotationError : public ScheduleError {
+ public:
+  explicit HasAnnotationError(IRModule mod, For loop) : mod_(mod), loop_(loop) 
{}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
annotation";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has 
annotation";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class HasThreadBindingError : public ScheduleError {
+ public:
+  explicit HasThreadBindingError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
thread binding";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has thread 
binding";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class OuterNotInnerParent : public ScheduleError {
+ public:
+  explicit OuterNotInnerParent(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the outer loop is not the parent of the inner loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the outer loop {0} is not the 
parent of the inner "
+           "loop {1}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class NotOnlyChildError : public ScheduleError {
+ public:
+  explicit NotOnlyChildError(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the inner loop is not the only child of outer loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the inner loop {1} is not the 
only child of outer "
+           "loop {0}.";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class LoopNotStartWithZeroError : public ScheduleError {
+ public:
+  explicit LoopNotStartWithZeroError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the primitive only supports loop starting with 0";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loop {0} does not start with 0, which is not supported";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class NotSingleInferFactorError : public ScheduleError {
+ public:
+  explicit NotSingleInferFactorError(IRModule mod) : mod_(mod) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: only one factor can be specified as -1 or none";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "Only one factor can be specified as -1 or none";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {}; }
+
+  IRModule mod_;
+};
+
+class WrongFactorProductError : public ScheduleError {
+ public:
+  explicit WrongFactorProductError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The product of factors is not larger than or equal 
to the extent of "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The product of factors is not larger than or equal to the extent 
of loop {0}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
+                      const Array<PrimExpr>& factors) {
+  // Invariance
+  // - The total repeat number has not changed for each direct child block 
with updating predicate.
+  // - The execution order has not changed. (The block executes with the same 
args and the same
+  // order with before.
+  // Step 1. Check correctness
+  GetScopeRootAndCheckStagePipeline(self, loop_sref);
+  const auto* loop = loop_sref->StmtAs<ForNode>();

Review comment:
       ```suggestion
     const ForNode* loop = TVM_SREF_TO_FOR(loop, loop_sref);
   ```

##########
File path: src/tir/schedule/primitive/fuse_split.cc
##########
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "../utils.h"
+namespace tvm {
+namespace tir {
+
+/*! \brief Append a new predicate to the each children of type BlockRealize 
(not recursively) */
+class PredicateUpdater : public StmtMutator {
+ public:
+  /*!
+   * \brief Constructor
+   * \param predicate The predicate to be apppend to BlockRealizeNode
+   */
+  explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana)
+      : predicate_(predicate) {
+    if (!ana->CanProve(predicate)) {
+      add_predicate_ = true;
+    }
+  }
+
+ private:
+  // For each direct child of type BlockRealizeNode, append the predicate
+  Stmt VisitStmt_(const BlockRealizeNode* realize) final {
+    // We do not recursively do this
+    if (add_predicate_) {
+      ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize);
+      n->predicate = n->predicate && predicate_;
+      return BlockRealize(n);
+    } else {
+      return GetRef<BlockRealize>(realize);
+    }
+  }
+
+  /*! \brief The predicate to be added */
+  const PrimExpr& predicate_;
+  /*! \brief whether to add predicate */
+  bool add_predicate_;
+};
+/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */
+class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator {
+ public:
+  explicit 
IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> 
vmap,
+                                             Map<Block, Block>* opaque_blocks)
+      : vmap_(vmap), opaque_blocks_(opaque_blocks) {}
+
+ private:
+  PrimExpr VisitExpr_(const VarNode* op) final {
+    Var var = GetRef<Var>(op);
+    Optional<PrimExpr> ret = vmap_(var);
+    if (ret.defined()) {
+      return ret.value();
+    } else {
+      return std::move(var);
+    }
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    Stmt res = StmtMutator::VisitStmt_(op);
+    if (op->block->iter_vars.empty()) {
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      opaque_blocks_->Set(op->block, realize->block);
+    }
+    return res;
+  }
+
+  /*! \brief The substitute function */
+  std::function<Optional<PrimExpr>(const Var&)> vmap_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* 
opaque_blocks,
+                                     std::function<Optional<PrimExpr>(const 
Var&)> vmap) {
+  return IRSubstituteAndCollectOpaqueBlock(vmap, 
opaque_blocks)(std::move(stmt));
+}
+
+/*! \brief Simplify the binding of block realize and update the opaque block 
reuse mapping*/
+class BlockRealizeRewriter : public StmtExprMutator {
+ public:
+  explicit BlockRealizeRewriter(
+      const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& 
loop_map,
+      Map<Block, Block>* opaque_blocks)
+      : opaque_blocks_(opaque_blocks) {
+    loop_map_.insert(loop_map.begin(), loop_map.end());
+  }
+
+ private:
+  Stmt VisitStmt_(const ForNode* op) final {
+    loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent);
+    Stmt res = StmtMutator::VisitStmt_(op);
+    loop_map_.erase(op->loop_var);
+    return res;
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    // skip opaque block and update mapping
+    if (op->iter_values.empty()) {
+      Stmt res = StmtMutator::VisitStmt_(op);
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      for (const std::pair<Block, Block>& entry : *opaque_blocks_) {
+        if (entry.second.same_as(op->block)) {
+          opaque_blocks_->Set(entry.first, realize->block);
+          break;
+        }
+      }
+      return res;
+    }
+    auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, 
false);
+    if (v.same_as(op->iter_values)) {
+      return GetRef<Stmt>(op);
+    } else {
+      auto n = CopyOnWrite(op);
+      n->iter_values = std::move(v);
+      return Stmt(n);
+    }
+  }
+  /*! \brief The range of loops */
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops,
+                      Map<Block, Block>* opaque_blocks) {
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map;

Review comment:
       btw, `tvm::MapNode` is actually faster than `std::unordered_map` after I 
re-implemented it, so no need to use `std::unordered_map` here

##########
File path: src/tir/schedule/primitive/fuse_split.cc
##########
@@ -0,0 +1,483 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "../utils.h"
+namespace tvm {
+namespace tir {
+
+/*! \brief Append a new predicate to the each children of type BlockRealize 
(not recursively) */
+class PredicateUpdater : public StmtMutator {
+ public:
+  /*!
+   * \brief Constructor
+   * \param predicate The predicate to be apppend to BlockRealizeNode
+   */
+  explicit PredicateUpdater(const PrimExpr& predicate, arith::Analyzer* ana)
+      : predicate_(predicate) {
+    if (!ana->CanProve(predicate)) {
+      add_predicate_ = true;
+    }
+  }
+
+ private:
+  // For each direct child of type BlockRealizeNode, append the predicate
+  Stmt VisitStmt_(const BlockRealizeNode* realize) final {
+    // We do not recursively do this
+    if (add_predicate_) {
+      ObjectPtr<BlockRealizeNode> n = CopyOnWrite(realize);
+      n->predicate = n->predicate && predicate_;
+      return BlockRealize(n);
+    } else {
+      return GetRef<BlockRealize>(realize);
+    }
+  }
+
+  /*! \brief The predicate to be added */
+  const PrimExpr& predicate_;
+  /*! \brief whether to add predicate */
+  bool add_predicate_;
+};
+/*! \brief Substitute vars and collect the reuse mapping of opaque blocks */
+class IRSubstituteAndCollectOpaqueBlock : public StmtExprMutator {
+ public:
+  explicit 
IRSubstituteAndCollectOpaqueBlock(std::function<Optional<PrimExpr>(const Var&)> 
vmap,
+                                             Map<Block, Block>* opaque_blocks)
+      : vmap_(vmap), opaque_blocks_(opaque_blocks) {}
+
+ private:
+  PrimExpr VisitExpr_(const VarNode* op) final {
+    Var var = GetRef<Var>(op);
+    Optional<PrimExpr> ret = vmap_(var);
+    if (ret.defined()) {
+      return ret.value();
+    } else {
+      return std::move(var);
+    }
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    Stmt res = StmtMutator::VisitStmt_(op);
+    if (op->block->iter_vars.empty()) {
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      opaque_blocks_->Set(op->block, realize->block);
+    }
+    return res;
+  }
+
+  /*! \brief The substitute function */
+  std::function<Optional<PrimExpr>(const Var&)> vmap_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SubstituteAndCollectOpaqueBlock(Stmt stmt, Map<Block, Block>* 
opaque_blocks,
+                                     std::function<Optional<PrimExpr>(const 
Var&)> vmap) {
+  return IRSubstituteAndCollectOpaqueBlock(vmap, 
opaque_blocks)(std::move(stmt));
+}
+
+/*! \brief Simplify the binding of block realize and update the opaque block 
reuse mapping*/
+class BlockRealizeRewriter : public StmtExprMutator {
+ public:
+  explicit BlockRealizeRewriter(
+      const std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual>& 
loop_map,
+      Map<Block, Block>* opaque_blocks)
+      : opaque_blocks_(opaque_blocks) {
+    loop_map_.insert(loop_map.begin(), loop_map.end());
+  }
+
+ private:
+  Stmt VisitStmt_(const ForNode* op) final {
+    loop_map_[op->loop_var] = Range::FromMinExtent(op->min, op->extent);
+    Stmt res = StmtMutator::VisitStmt_(op);
+    loop_map_.erase(op->loop_var);
+    return res;
+  }
+
+  Stmt VisitStmt_(const BlockRealizeNode* op) final {
+    // skip opaque block and update mapping
+    if (op->iter_values.empty()) {
+      Stmt res = StmtMutator::VisitStmt_(op);
+      const BlockRealizeNode* realize = res.as<BlockRealizeNode>();
+      for (const std::pair<Block, Block>& entry : *opaque_blocks_) {
+        if (entry.second.same_as(op->block)) {
+          opaque_blocks_->Set(entry.first, realize->block);
+          break;
+        }
+      }
+      return res;
+    }
+    auto v = arith::IterMapSimplify(op->iter_values, loop_map_, op->predicate, 
false);
+    if (v.same_as(op->iter_values)) {
+      return GetRef<Stmt>(op);
+    } else {
+      auto n = CopyOnWrite(op);
+      n->iter_values = std::move(v);
+      return Stmt(n);
+    }
+  }
+  /*! \brief The range of loops */
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map_;
+  /*! \brief The reuse mapping */
+  Map<Block, Block>* opaque_blocks_;
+};
+
+Stmt SimplifyBindings(const Stmt& stmt, const Array<StmtSRef>& loops,
+                      Map<Block, Block>* opaque_blocks) {
+  std::unordered_map<Var, Range, ObjectPtrHash, ObjectPtrEqual> loop_map;
+  for (const StmtSRef& sref : loops) {
+    const auto* loop = sref->StmtAs<ForNode>();
+    loop_map[loop->loop_var] = Range::FromMinExtent(loop->min, loop->extent);
+  }
+  BlockRealizeRewriter rewriter(loop_map, opaque_blocks);
+  return rewriter(stmt);
+}
+
+class NotLoopError : public ScheduleError {
+ public:
+  explicit NotLoopError(IRModule mod, String type) : mod_(mod), type_(type) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: this primitive only operates on a "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "this primitive only operates on a loop, but the StmtSref passed in 
points to"
+           "type: {0} ";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {type_}; }
+
+  IRModule mod_;
+  String type_;
+};
+
+class HasAnnotationError : public ScheduleError {
+ public:
+  explicit HasAnnotationError(IRModule mod, For loop) : mod_(mod), loop_(loop) 
{}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
annotation";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has 
annotation";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class HasThreadBindingError : public ScheduleError {
+ public:
+  explicit HasThreadBindingError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The primitive can't be applied because the loop has 
thread binding";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The primitive can't be applied because the loop {0} has thread 
binding";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class OuterNotInnerParent : public ScheduleError {
+ public:
+  explicit OuterNotInnerParent(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the outer loop is not the parent of the inner loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the outer loop {0} is not the 
parent of the inner "
+           "loop {1}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class NotOnlyChildError : public ScheduleError {
+ public:
+  explicit NotOnlyChildError(IRModule mod, For outer, For inner)
+      : mod_(mod), outer_(outer), inner_(inner) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the inner loop is not the only child of outer loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loops can't be fused because the inner loop {1} is not the 
only child of outer "
+           "loop {0}.";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {outer_, 
inner_}; }
+
+  IRModule mod_;
+  For outer_;
+  For inner_;
+};
+
+class LoopNotStartWithZeroError : public ScheduleError {
+ public:
+  explicit LoopNotStartWithZeroError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: the primitive only supports loop starting with 0";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The loop {0} does not start with 0, which is not supported";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+class NotSingleInferFactorError : public ScheduleError {
+ public:
+  explicit NotSingleInferFactorError(IRModule mod) : mod_(mod) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: only one factor can be specified as -1 or none";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "Only one factor can be specified as -1 or none";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {}; }
+
+  IRModule mod_;
+};
+
+class WrongFactorProductError : public ScheduleError {
+ public:
+  explicit WrongFactorProductError(IRModule mod, For loop) : mod_(mod), 
loop_(loop) {}
+
+  String FastErrorString() const final {
+    return "ScheduleError: The product of factors is not larger than or equal 
to the extent of "
+           "loop";
+  }
+
+  String DetailRenderTemplate() const final {
+    return "The product of factors is not larger than or equal to the extent 
of loop {0}";
+  }
+
+  IRModule mod() const final { return mod_; }
+  Array<ObjectRef> LocationsOfInterest() const final { return {loop_}; }
+
+  IRModule mod_;
+  For loop_;
+};
+
+Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
+                      const Array<PrimExpr>& factors) {
+  // Invariance
+  // - The total repeat number has not changed for each direct child block 
with updating predicate.
+  // - The execution order has not changed. (The block executes with the same 
args and the same
+  // order with before.
+  // Step 1. Check correctness
+  GetScopeRootAndCheckStagePipeline(self, loop_sref);

Review comment:
       Do we really need `stage_pipeline=True` to do loop splitting? @tqchen 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to