elvin-n commented on code in PR #12286:
URL: https://github.com/apache/tvm/pull/12286#discussion_r939159079


##########
python/tvm/relay/op/strategy/adreno.py:
##########
@@ -257,3 +257,21 @@ def schedule_pool_adreno(attrs, outs, target):
         if attrs.layout == "NCHW4c":
             return topi.adreno.schedule_pool(outs, attrs.layout)
         return topi.cuda.schedule_pool(outs, attrs.layout)
+
+
+@schedule_injective.register(["adreno"])
+def schedule_injective_adreno(attrs, outs, target):
+    """schedule injective ops for cuda"""

Review Comment:
   cuda->adreno



##########
python/tvm/topi/adreno/utils.py:
##########
@@ -522,6 +534,13 @@ def bind_data_copy(stage, axis_to_vectorize=None):
             stage.bind(thread, te.thread_axis("threadIdx.x"))
             if shape[-1] == 4:
                 stage.vectorize(axes[-1])
+        elif shape[-1] > 1024:

Review Comment:
   why 1024? add comment?



##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -479,7 +584,7 @@ class CollectVirtualDevices : public 
transform::DeviceAwareExprVisitor {
  * \param expr The expression.
  * \return The device based storage mapping.
  */
-Map<Expr, Array<String>> CollectStorageInfo(const Expr& expr) {
+Map<Expr, Map<Expr, Array<String>>> CollectStorageInfo(const Expr& expr) {

Review Comment:
   we need to describe somewhere in this file/classes/methods why we introduced 
new map in existing map.



##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -350,6 +410,9 @@ class StorageInfo : private 
transform::DeviceAwareExprVisitor {
   std::unordered_map<const ExprNode*, std::vector<std::string>> 
consumer_storage_scopes_;
   /*! \brief mapping of arguments to call to function variables*/
   std::unordered_map<Expr, std::vector<Var>, ObjectPtrHash, ObjectPtrEqual> 
args_to_vars_;
+
+  Map<Expr, Map<Expr, Array<String>>> accept_textures_;

Review Comment:
   add comments for new fields



##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -55,15 +57,17 @@ class StorageInfo : private 
transform::DeviceAwareExprVisitor {
  public:
   StorageInfo() : transform::DeviceAwareExprVisitor(Optional<IRModule>()) {}
 
-  static Map<Expr, Array<String>> GetStorageMap(const Expr& expr) {
+  static Map<Expr, Map<Expr, Array<String>>> GetStorageMap(const Expr& expr) {

Review Comment:
   Introducing of more textures scopes will fails some existing tests. I have 
not seen any changes in the existing adreno tests in the PR. There should be 
many changes of "global" to "global.texture" scope in 
test_conv2d_nchw_texture.py



##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -365,43 +428,85 @@ class RewriteVDStorageScopes : public 
transform::DeviceAwareExprMutator {
   using VarMap = std::unordered_map<Expr, Var, ObjectPtrHash, ObjectPtrEqual>;
 
  public:
-  explicit RewriteVDStorageScopes(const Map<Expr, Array<String>>& 
storage_scope)
+  explicit RewriteVDStorageScopes(const Map<Expr, Map<Expr, Array<String>>>& 
storage_scope)
       : transform::DeviceAwareExprMutator(Optional<IRModule>()), 
storage_scope_(storage_scope) {}
 
   Function Rewrite(const Expr& expr) { return 
Downcast<Function>(Mutate(expr)); }
 
   Expr VisitExpr_(const VarNode* vn) final {
     if (storage_scope_.find(GetRef<Expr>(vn)) != storage_scope_.end() &&
-        storage_scope_[GetRef<Expr>(vn)][0] != "global") {
+        storage_scope_[GetRef<Expr>(vn)].find(Expr()) != 
storage_scope_[GetRef<Expr>(vn)].end() &&
+        storage_scope_[GetRef<Expr>(vn)][Expr()][0] != "global") {
       Var c = Var(vn->vid, vn->type_annotation, vn->span);
       auto virtual_device = GetVirtualDevice(GetRef<Expr>(vn));
       c->virtual_device_ =
           VirtualDevice(virtual_device->device_type(), 
virtual_device->virtual_device_id,
-                        virtual_device->target, 
storage_scope_[GetRef<Expr>(vn)][0]);
+                        virtual_device->target, 
storage_scope_[GetRef<Expr>(vn)][Expr()][0]);
       return c;
     }
     return GetRef<Var>(vn);
   }
 
   Expr VisitExpr_(const ConstantNode* vn) final {
-    if (storage_scope_.find(GetRef<Expr>(vn)) != storage_scope_.end()) {
+    if (storage_scope_.find(GetRef<Expr>(vn)) != storage_scope_.end() &&
+        storage_scope_[GetRef<Expr>(vn)].find(Expr()) != 
storage_scope_[GetRef<Expr>(vn)].end()) {
       Expr c = Constant(vn->data, vn->span);
       auto virtual_device = GetVirtualDevice(GetRef<Expr>(vn));
-      c = OnDevice(c,
-                   VirtualDevice(virtual_device->device_type(), 
virtual_device->virtual_device_id,
-                                 virtual_device->target, 
storage_scope_[GetRef<Expr>(vn)][0]),
-                   true);
+      c = OnDevice(
+          c,
+          VirtualDevice(virtual_device->device_type(), 
virtual_device->virtual_device_id,
+                        virtual_device->target, 
storage_scope_[GetRef<Expr>(vn)][Expr()][0]),
+          true);
       return c;
     }
     return GetRef<Constant>(vn);
   }
 
   Expr DeviceAwareVisitExpr_(const CallNode* call_node) final {
-    auto new_call = 
transform::DeviceAwareExprMutator::DeviceAwareVisitExpr_(call_node);
+    // we need to duplicate ExprMutator::VisitExpr_ to correct argument scopes 
and
+    // put device_copy
+    auto new_op = this->Mutate(call_node->op);
+
+    tvm::Array<Type> ty_args;
+    ty_args.reserve(call_node->type_args.size());
+
+    for (auto ty_arg : call_node->type_args) {
+      auto new_ty_arg = this->VisitType(ty_arg);
+      ty_args.push_back(new_ty_arg);
+    }
+
+    tvm::Array<Expr> call_args;
+    call_args.reserve(call_node->args.size());
+    for (auto arg : call_node->args) {
+      auto new_arg = this->Mutate(arg);
+      // verification if we need to put device_copy
+      if (storage_scope_.count(arg) && 
storage_scope_[arg].count(GetRef<Expr>(call_node))) {
+        auto virtual_device = GetVirtualDevice(GetRef<Expr>(call_node));
+        // TOO(amalyshe): hardcoded source scope, need to fix later

Review Comment:
   address this TODO - need to get memory scope here and point in the next line 
in generic form instead of hardcoded `global`



##########
src/relay/backend/build_module.cc:
##########
@@ -397,7 +397,6 @@ class RelayBuildModule : public runtime::ModuleNode {
     relay_module = transform::InferType()(relay_module);
     relay_module = transform::LabelOps()(relay_module);
     relay_module = transform::AnnotateMemoryScope(config_)(relay_module);
-

Review Comment:
   remove this change?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to