mbs-octoml commented on code in PR #11357:
URL: https://github.com/apache/tvm/pull/11357#discussion_r887093771
##########
src/driver/driver_api.cc:
##########
@@ -82,9 +82,12 @@ Target DefaultTargetHost(Target target) {
}
tir::Buffer BufferWithOffsetAlignment(Array<PrimExpr> shape, DataType dtype,
std::string name,
- int data_alignment, int offset_factor,
bool compact) {
+ int data_alignment, int offset_factor,
bool compact,
+ std::string memory_scope) {
DataType storage_dtype = (dtype == DataType::Bool() ? DataType::Int(8) :
dtype);
- auto data = tir::Var(name, PointerType(PrimType(storage_dtype)));
+ auto data =
+ tir::Var(name, memory_scope.empty() ?
PointerType(PrimType(storage_dtype))
+ :
PointerType(PrimType(storage_dtype), memory_scope));
Review Comment:
The default for memory_scope in the PointerType ctor is "" anyway, so no
need for test.
##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -0,0 +1,486 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file annotate_texture_storage.cc
+ * \brief Collection of target specific relay passes which
+ * storage scope related information.
+ *
+ * - CollectStorageInfo returns a mapping from relay expr
+ * to a list of output storage scopes for each output.
+ * These scopes are used during memory planning as well
+ * as downstream when doing codegen and in the graph runtime when doing
runtime dataspace
+ * allocations.
+ *
+ * - AnnotateMemoryScope calls *target.CollectStorageInfo for all target been
represented
+ * in the graph and rewrites graph modifying or inserting of VirtualDevice
with required
+ * memory_scop collected from the CollectStorageInfo
+ */
+
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/expr.h>
+
+#include <memory>
+#include <unordered_map>
+
+#include "../transforms/device_aware_visitors.h"
+
+namespace tvm {
+namespace relay {
+namespace {
+
+/**
+ * @brief Analyzes the graph and returns mapping of expressions vs desired
memory scope
+ */
+class StorageInfo : private transform::DeviceAwareExprVisitor {
+ public:
+ StorageInfo() : transform::DeviceAwareExprVisitor(Optional<IRModule>()) {}
+
+ static Map<Expr, Array<String>> GetStorageMap(const Expr& expr) {
+ StorageInfo storage_info;
+ storage_info.VisitExpr(expr);
+ storage_info.LegalizeProducerStorage();
+ // For now we force write to global for the outputs of the function over
which
+ // memory planning will be performed. This should incur only a trivial
change
+ // in performance.
+ storage_info.ForceGlobalOutputStorage(expr);
+ Map<Expr, Array<String>> storage_map;
+ for (auto& kv : storage_info.storage_scope_) {
+ if (storage_info.constants_expr_.find(GetRef<Expr>(kv.first)) !=
+ storage_info.constants_expr_.end()) {
+ std::vector<String> storage_scopes;
+ std::copy(kv.second.begin(), kv.second.end(),
std::back_inserter(storage_scopes));
+ storage_map.Set(GetRef<Expr>(kv.first), Array<String>{storage_scopes});
+ }
+ }
+
+ // initial algo assumes mapping of outputs of the expr that is not enough,
need to update
+ // VirtualDevice for function variables to get proper codegen. Adding vars
to storage_map
+ for (const auto& a : storage_info.args_to_vars_) {
+ if (storage_map.count(a.first)) {
+ storage_map.Set(a.second, storage_map[a.first]);
+ }
+ }
+ return storage_map;
+ }
+
+ private:
+ void Visit(const Expr& expr) {
+ // Pre-order traversal to enable upward propagation
+ // of consumer storage scopes to producers when desirable.
+ if (const auto* fn = expr.as<FunctionNode>()) {
+ this->VisitExpr(fn->body);
+ for (const auto& param : fn->params) {
+ this->VisitExpr(param);
+ }
+ } else {
+ this->VisitExpr(expr);
+ }
+ }
+
+ void VisitExpr_(const VarNode* vn) final { ApplyConsumerScopeToInputs(vn); }
+
+ void VisitExpr_(const ConstantNode* cn) final {
+ constants_expr_.insert(GetRef<Expr>(cn));
+ ApplyConsumerScopeToInputs(cn);
+ }
+
+ void DeviceAwareVisitExpr_(const CallNode* call) final {
+ // Check the contents of this primitive function
+ if (DeviceSupportsTextureStorage(GetRef<Expr>(call))) {
+ if (const auto* fn = call->op.as<FunctionNode>()) {
+ if (fn->HasNonzeroAttr(attr::kPrimitive)) {
+ primitive_supports_texture_ = false;
+ Visit(call->op);
+ if (primitive_supports_texture_) {
+ if (call->checked_type().as<TensorTypeNode>()) {
+ std::string scope = "global.texture";
+ if (const auto* ttype =
call->checked_type().as<TensorTypeNode>()) {
+ if (ttype->shape.size() == 5) {
+ scope = Scope(ttype->shape);
+ }
+ }
+ storage_scope_[call].push_back(scope);
+ } else {
+ const auto* tuple_type = call->type_as<TupleTypeNode>();
+ ICHECK(tuple_type);
+ // TODO(csullivan): Add support for mixed output storage scope.
+ // In current adreno storage planner all outputs of a
+ // primitive function are assumed to be of the same storage
+ // type. This should be easy to extend in the future.
+ for (size_t i = 0; i < tuple_type->fields.size(); i++) {
+ storage_scope_[call].push_back("global.texture");
+ }
+ }
+ for (size_t i = 0; i < fn->params.size(); i++) {
+ args_to_vars_[call->args[i]] = fn->params[i];
+ }
+ }
+ // Add consumer storage scope information for call arguments
+ for (auto& arg : call->args) {
+ if (storage_scope_.count(call)) {
+ ICHECK(!HasMixedStorageOutputs(call))
+ << "Mixed output storage scopes are not currently supported";
+
consumer_storage_scopes_[arg.operator->()].push_back(storage_scope_[call][0]);
+ } else {
+ consumer_storage_scopes_[arg.operator->()].push_back("global");
+ }
+ }
+ }
+ }
+ }
+
+ primitive_supports_texture_ = SupportsTextureStorage(call);
+
+ for (auto& arg : call->args) {
+ Visit(arg);
+ }
+ }
+
+ std::string Scope(Array<PrimExpr> shape) {
+ std::map<int, std::string> diffs;
+ int limit = 16384;
Review Comment:
All the hard coded rules and dimensions suggest we make this an ardreno
specific pass.
##########
src/relay/transforms/annotate_texture_storage.cc:
##########
@@ -0,0 +1,486 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file annotate_texture_storage.cc
+ * \brief Collection of target specific relay passes which
+ * storage scope related information.
+ *
+ * - CollectStorageInfo returns a mapping from relay expr
+ * to a list of output storage scopes for each output.
+ * These scopes are used during memory planning as well
+ * as downstream when doing codegen and in the graph runtime when doing
runtime dataspace
+ * allocations.
+ *
+ * - AnnotateMemoryScope calls *target.CollectStorageInfo for all target been
represented
+ * in the graph and rewrites graph modifying or inserting of VirtualDevice
with required
+ * memory_scop collected from the CollectStorageInfo
+ */
+
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/expr.h>
+
+#include <memory>
+#include <unordered_map>
+
+#include "../transforms/device_aware_visitors.h"
+
+namespace tvm {
+namespace relay {
+namespace {
+
+/**
+ * @brief Analyzes the graph and returns mapping of expressions vs desired
memory scope
+ */
+class StorageInfo : private transform::DeviceAwareExprVisitor {
+ public:
+ StorageInfo() : transform::DeviceAwareExprVisitor(Optional<IRModule>()) {}
+
+ static Map<Expr, Array<String>> GetStorageMap(const Expr& expr) {
+ StorageInfo storage_info;
+ storage_info.VisitExpr(expr);
+ storage_info.LegalizeProducerStorage();
+ // For now we force write to global for the outputs of the function over
which
+ // memory planning will be performed. This should incur only a trivial
change
+ // in performance.
+ storage_info.ForceGlobalOutputStorage(expr);
+ Map<Expr, Array<String>> storage_map;
+ for (auto& kv : storage_info.storage_scope_) {
+ if (storage_info.constants_expr_.find(GetRef<Expr>(kv.first)) !=
+ storage_info.constants_expr_.end()) {
+ std::vector<String> storage_scopes;
+ std::copy(kv.second.begin(), kv.second.end(),
std::back_inserter(storage_scopes));
+ storage_map.Set(GetRef<Expr>(kv.first), Array<String>{storage_scopes});
+ }
+ }
+
+ // initial algo assumes mapping of outputs of the expr that is not enough,
need to update
+ // VirtualDevice for function variables to get proper codegen. Adding vars
to storage_map
+ for (const auto& a : storage_info.args_to_vars_) {
+ if (storage_map.count(a.first)) {
+ storage_map.Set(a.second, storage_map[a.first]);
+ }
+ }
+ return storage_map;
+ }
+
+ private:
+ void Visit(const Expr& expr) {
+ // Pre-order traversal to enable upward propagation
+ // of consumer storage scopes to producers when desirable.
+ if (const auto* fn = expr.as<FunctionNode>()) {
+ this->VisitExpr(fn->body);
+ for (const auto& param : fn->params) {
+ this->VisitExpr(param);
+ }
+ } else {
+ this->VisitExpr(expr);
+ }
+ }
+
+ void VisitExpr_(const VarNode* vn) final { ApplyConsumerScopeToInputs(vn); }
+
+ void VisitExpr_(const ConstantNode* cn) final {
+ constants_expr_.insert(GetRef<Expr>(cn));
+ ApplyConsumerScopeToInputs(cn);
+ }
+
+ void DeviceAwareVisitExpr_(const CallNode* call) final {
+ // Check the contents of this primitive function
+ if (DeviceSupportsTextureStorage(GetRef<Expr>(call))) {
+ if (const auto* fn = call->op.as<FunctionNode>()) {
+ if (fn->HasNonzeroAttr(attr::kPrimitive)) {
+ primitive_supports_texture_ = false;
+ Visit(call->op);
+ if (primitive_supports_texture_) {
+ if (call->checked_type().as<TensorTypeNode>()) {
Review Comment:
See tvm::relay::FlattenTupleType
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]