vinx13 commented on code in PR #12171:
URL: https://github.com/apache/tvm/pull/12171#discussion_r929182643


##########
src/tir/transforms/inject_software_pipeline.cc:
##########
@@ -530,18 +620,269 @@ class PipelineRewriter : public StmtExprMutator {
       Block new_block = 
Downcast<Block>(PipelineBodyRewriter(buffer_data_to_buffer_, buffer_remap_,
                                                              pipeline_loop_, 
max_stage_ != 1,
                                                              
fragment_info_)(block));
-      Map<Var, PrimExpr> subst_map;
-      if (is_unit_loop) {
-        subst_map.Set(pipeline_loop_->loop_var, skewed_loop_var);
-      } else {
-        // normalize loop range
-        PrimExpr delta = start - pipeline_loop_->min;
-        subst_map.Set(pipeline_loop_->loop_var, skewed_loop_var + delta);
+
+      PrimExpr delta = start - pipeline_loop_->min;
+      // This variable corresponds to
+      // - "producer_head" if this stage is an async producer
+      // - "consumer_head" if this stage reads from asynchronously written 
buffers.
+      PrimExpr normalized_access_index = is_unit_loop ? skewed_loop_var : 
skewed_loop_var + delta;
+
+      // Adjust the block predicate and the body according to the final loop 
bound
+      //  [pipeline_loop_->min, extent).
+      if (!is_unit_loop) {
         Var loop_iter = Downcast<Var>(new_loop_var);
-        inbound = Substitute(inbound, Map<Var, PrimExpr>{{loop_iter, loop_iter 
+ delta}});
+        inbound = Substitute(inbound, {{loop_iter, loop_iter + delta}});
+      }
+
+      new_block = Downcast<Block>(
+          Substitute(new_block, {{pipeline_loop_->loop_var, 
normalized_access_index}}));
+
+      if (pipeline_info_[block].async) {

Review Comment:
   can we refactor async pipeline related into some functions to make the 
original `EmitImpl` logic more concise?



##########
src/tir/transforms/thread_storage_sync.cc:
##########
@@ -384,6 +426,9 @@ class ThreadSyncInserter : public StmtExprMutator {
 
 Stmt ThreadSync(Stmt stmt, std::string storage_scope) {
   StorageScope sync_scope = StorageScope::Create(storage_scope);
+  if (sync_scope.rank == StorageRank::kShared && sync_scope.tag == "") {

Review Comment:
   do we need to check `sync_scope.tag`? I assume it also works for dynamic 
shared memory



##########
src/tir/transforms/inject_software_pipeline.cc:
##########
@@ -727,11 +1069,27 @@ class PipelineInjector : private StmtExprMutator {
         
Downcast<Array<Integer>>(op->annotations.at(attr::software_pipeline_order));
     CHECK_EQ(pipeline_stages.size(), original_order.size());
     CHECK_EQ(pipeline_orders.size(), original_order.size());
+
+    std::unordered_set<int> pipeline_async_stages;
+    // The software_pipeline_async_stages annotation provides a list of stages 
that should run
+    // asynchronously. All statements in the provided stages are assumed to 
have asynchronous
+    // semantics (e.g. CUDA async global to shared memory copy).
+    if (op->annotations.count("software_pipeline_async_stages")) {

Review Comment:
   nit: use `op->annotations.Get` to avoid duplicating look up; define 
`"software_pipeline_async_stages"` as a constant in `stmt.h`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to