crazydemo opened a new pull request, #11820:
URL: https://github.com/apache/tvm/pull/11820

   Modify the schedule for injective ops. All outputs of injective ops should 
share the same schedule optimization, e.g. outputs of OP split are all supposed 
to be lowered into parallel for.
   
   ```
   new ir:
   IRModule({GlobalVar(tvmgen_default_fused_split): PrimFunc([placeholder, 
T_split_sections, T_split_sections]) attrs={"from_legacy_te_schedule": (bool)1, 
"global_symbol": "tvmgen_default_fused_split", "tir.noalias": (bool)1} {
     buffer_realize T_split_sections([0, 1], [0, 64], [0, 56], [0, 56]) {
       parallel (ax0.ax1.fused, 0, 64) {
         for (ax2, 0, 56) {
           for (ax3.outer, 0, 4) {
             vectorized (ax3.inner, 0, 16) {
               if (tir.likely(((ax3.inner + (ax3.outer*16)) < 56))) {
                 T_split_sections[floordiv(ax0.ax1.fused, 64), 
floormod(ax0.ax1.fused, 64), ax2, (ax3.inner + (ax3.outer*16))] = 
placeholder[floordiv(ax0.ax1.fused, 64), floormod(ax0.ax1.fused, 64), ax2, 
(ax3.inner + (ax3.outer*16))]
               }
             }
           }
         }
       }
       buffer_realize T_split_sections([0, 1], [0, 64], [0, 56], [0, 56]) {
         parallel (ax0.ax1.fused, 0, 64) {
           for (ax2, 0, 56) {
             for (ax3.outer, 0, 4) {
               vectorized (ax3.inner, 0, 16) {
                 if (tir.likely(((ax3.inner + (ax3.outer*16)) < 56))) {
                   T_split_sections[floordiv(ax0.ax1.fused, 64), 
floormod(ax0.ax1.fused, 64), ax2, (ax3.inner + (ax3.outer*16))] = 
placeholder[floordiv(ax0.ax1.fused, 64), (floormod(ax0.ax1.fused, 64) + 64), 
ax2, (ax3.inner + (ax3.outer*16))]
                 }
               }
             }
           }
         }
       }
     }
   }
   })
   
   old ir:
   IRModule({GlobalVar(tvmgen_default_fused_split): PrimFunc([placeholder, 
T_split_sections, T_split_sections]) attrs={"from_legacy_te_schedule": (bool)1, 
"global_symbol": "tvmgen_default_fused_split", "tir.noalias": (bool)1} {
     buffer_realize T_split_sections([0, 1], [0, 64], [0, 56], [0, 56]) {
       parallel (ax0.ax1.fused, 0, 64) {
         for (ax2, 0, 56) {
           for (ax3.outer, 0, 4) {
             vectorized (ax3.inner, 0, 16) {
               if (tir.likely(((ax3.inner + (ax3.outer*16)) < 56))) {
                 T_split_sections[floordiv(ax0.ax1.fused, 64), 
floormod(ax0.ax1.fused, 64), ax2, (ax3.inner + (ax3.outer*16))] = 
placeholder[floordiv(ax0.ax1.fused, 64), floormod(ax0.ax1.fused, 64), ax2, 
(ax3.inner + (ax3.outer*16))]
               }
             }
           }
         }
       }
       buffer_realize T_split_sections([0, 1], [0, 64], [0, 56], [0, 56]) {
         for (ax1, 0, 64) {
           for (ax2, 0, 56) {
             for (ax3, 0, 56) {
               T_split_sections[0, ax1, ax2, ax3] = placeholder[0, (ax1 + 64), 
ax2, ax3]
             }
           }
         }
       }
     }
   }
   })
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to