This is an automated email from the ASF dual-hosted git repository.
csullivan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 34cacb0a64 [VM][Textures] Enable OpenCL textures for VM (#15419)
34cacb0a64 is described below
commit 34cacb0a6487d6300ffc8bf0bd018879d0a8d548
Author: Egor Churaev <[email protected]>
AuthorDate: Tue Aug 8 17:56:55 2023 +0300
[VM][Textures] Enable OpenCL textures for VM (#15419)
* [VM][Textures] Enable OpenCL textures for VM
This commit introduces memory scope to VM and enables using textures.
The following changes have been made:
- AnnotateMemoryScope pass is used in VM compilation pipeline
- VM allows to use more than one device with the same device type.
Also, virtual devices in VM contains information about memory
scope.
- Instructions LoadConst and AllocStorage were extended to support
textures.
- VM bytecode was updated to support memory scope.
- Annotate texture storage pass was updated to support dynamic shape.
- Some other minor changes have been made.
* Implement tests for vm
* Fix lint
* Fix tests
* Use union in allocate_storage struct
* Apply comments
* Fix copy ctor and assignment operator
---
include/tvm/runtime/ndarray.h | 8 +-
include/tvm/runtime/vm/bytecode.h | 19 +-
include/tvm/runtime/vm/executable.h | 5 +-
src/relay/backend/vm/compiler.cc | 41 ++-
src/relay/backend/vm/manifest_lifetimes.cc | 4 +-
src/relay/op/memory/memory.cc | 20 +-
src/relay/op/memory/memory.h | 5 +-
src/relay/transforms/annotate_texture_storage.cc | 9 +
src/relay/transforms/device_domains.cc | 7 +-
src/relay/transforms/memory_alloc.cc | 4 +-
src/runtime/c_runtime_api.cc | 2 +-
src/runtime/opencl/opencl_device_api.cc | 2 +-
src/runtime/vm/bytecode.cc | 56 +++-
src/runtime/vm/executable.cc | 34 +-
src/runtime/vm/profiler/vm.cc | 16 +-
src/runtime/vm/vm.cc | 47 ++-
.../opencl_texture/test_conv2d_nchw_texture.py | 361 ++++++++++++++++-----
.../opencl_texture/test_conv2d_nhwc_texture.py | 245 ++++++++++----
.../test_depthwise_conv2d_nchw_texture.py | 52 ++-
.../test_depthwise_conv2d_nhwc_texture.py | 50 ++-
.../relay/opencl_texture/test_injection_texture.py | 33 +-
tests/python/relay/opencl_texture/test_network.py | 24 +-
.../relay/opencl_texture/test_pool_texture.py | 63 +++-
.../relay/opencl_texture/test_reduction_texture.py | 87 +++--
.../relay/opencl_texture/utils/adreno_utils.py | 84 +++++
.../relay/test_pass_dead_code_elimination.py | 18 +-
tests/python/relay/test_pass_plan_devices.py | 11 +-
27 files changed, 996 insertions(+), 311 deletions(-)
diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h
index 119d0f7fd3..2a06856fea 100644
--- a/include/tvm/runtime/ndarray.h
+++ b/include/tvm/runtime/ndarray.h
@@ -110,9 +110,10 @@ class NDArray : public ObjectRef {
/*!
* \brief Copy the data to another device.
* \param dev The target device.
+ * \param mem_scope The memory scope of the target array.
* \return The array under another device.
*/
- inline NDArray CopyTo(const Device& dev) const;
+ inline NDArray CopyTo(const Device& dev, Optional<String> mem_scope =
NullOpt) const;
/*!
* \brief Load NDArray from stream
* \param stream The input data stream
@@ -398,10 +399,11 @@ inline void NDArray::CopyTo(const NDArray& other) const {
CopyFromTo(&(get_mutable()->dl_tensor), &(other.get_mutable()->dl_tensor));
}
-inline NDArray NDArray::CopyTo(const Device& dev) const {
+inline NDArray NDArray::CopyTo(const Device& dev, Optional<String> mem_scope)
const {
ICHECK(data_ != nullptr);
const DLTensor* dptr = operator->();
- NDArray ret = Empty(ShapeTuple(dptr->shape, dptr->shape + dptr->ndim),
dptr->dtype, dev);
+ NDArray ret =
+ Empty(ShapeTuple(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype,
dev, mem_scope);
this->CopyTo(ret);
return ret;
}
diff --git a/include/tvm/runtime/vm/bytecode.h
b/include/tvm/runtime/vm/bytecode.h
index 2fe855f964..637c1e70a7 100644
--- a/include/tvm/runtime/vm/bytecode.h
+++ b/include/tvm/runtime/vm/bytecode.h
@@ -157,6 +157,8 @@ struct Instruction {
struct /* LoadConst Operands */ {
/* \brief The index into the constant pool. */
Index const_index;
+ /*! \brief The index of the device on which the load will be made. */
+ Index device_index;
};
struct /* LoadConsti Operands */ {
/* \brief The index into the constant pool. */
@@ -195,12 +197,18 @@ struct Instruction {
RegName* free_vars;
};
struct /* AllocStorage Operands */ {
- /*! \brief The size of the allocation. */
- RegName allocation_size;
/*! \brief The alignment of the allocation. */
Index alignment;
/*! \brief The hint of the dtype. */
DLDataType dtype_hint;
+ /*! \brief The number of dimensions. */
+ uint32_t ndim;
+ union {
+ /*! \brief The shape of tensor. */
+ int64_t* shape;
+ /*! \brief The size of the allocation. */
+ RegName allocation_size;
+ };
/*! \brief The index of the device on which the allocation will be made.
*/
Index device_index;
} alloc_storage;
@@ -332,10 +340,11 @@ struct Instruction {
/*!
* \brief Construct a load constant instruction.
* \param const_index The index of the constant.
+ * \param device_index The index of the device to load on.
* \param dst The destination register.
* \return The load constant instruction.
*/
- static Instruction LoadConst(Index const_index, RegName dst);
+ static Instruction LoadConst(Index const_index, Index device_index, RegName
dst);
/*!
* \brief Construct a load_constanti instruction.
* \param val The interger constant value.
@@ -356,11 +365,13 @@ struct Instruction {
* \param alignment The allocation's alignment.
* \param dtype_hint The data type hint for the allocator.
* \param device_index The index of the device to allocate on.
+ * \param shape The shape of the allocation.
* \param dst The destination to place the storage.
* \return The alloc storage instruction.
*/
static Instruction AllocStorage(RegName size, Index alignment, DLDataType
dtype_hint,
- Index device_index, RegName dst);
+ Index device_index, const
std::vector<int64_t>& shape,
+ RegName dst);
/*!
* \brief Get the shape of an input tensor.
* \param tensor The input tensor.
diff --git a/include/tvm/runtime/vm/executable.h
b/include/tvm/runtime/vm/executable.h
index 0714847400..d4872837b0 100644
--- a/include/tvm/runtime/vm/executable.h
+++ b/include/tvm/runtime/vm/executable.h
@@ -34,6 +34,7 @@
#include <map>
#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
namespace tvm {
@@ -262,9 +263,9 @@ class TVM_DLL Executable : public ModuleNode {
/*!
* \brief The (compile-time, virtual) devices corresponding to each device
index.
- * Currently we only support at most one device per device type.
+ * This vector contains a pair Device and its memory_scope.
*/
- std::vector<Device> virtual_devices;
+ std::vector<std::pair<Device, std::string>> virtual_devices;
/*!
* \brief The device index corresponding to the 'host' device. That will
hold and evaluate
* shape-related data and code.
diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc
index c5b6c7f2f0..848c23eba6 100644
--- a/src/relay/backend/vm/compiler.cc
+++ b/src/relay/backend/vm/compiler.cc
@@ -352,19 +352,6 @@ class VMFunctionCompiler :
DeviceAwareExprFunctor<void(const Expr& n)> {
return 0;
}
- // However, otherwise we allow at most one VirtualDevice per device type.
- // TODO(mbs): This will eventually need to account for memory scopes
somehow so device_copy
- // instructions can do the right thing.
- itr = std::find_if(context_->virtual_devices_.begin() + 1,
context_->virtual_devices_.end(),
- [&virtual_device](const VirtualDevice&
existing_virtual_device) {
- return existing_virtual_device->device_type() ==
- virtual_device->device_type();
- });
- CHECK(itr == context_->virtual_devices_.end())
- << "The VM does not currently support using more than one device with
the same device type "
- "for primitives, however the program is using the distinct scopes "
- << virtual_device << " and " << *itr << " of device type " <<
virtual_device->device_type();
-
ICHECK(virtual_device != host_virtual_device_);
Index index = context_->virtual_devices_.size();
VLOG(2) << "virtual_device[" << index << "] = " << virtual_device;
@@ -384,7 +371,7 @@ class VMFunctionCompiler :
DeviceAwareExprFunctor<void(const Expr& n)> {
VLOG(2) << "constant[" << const_index << "] on device[" << device_index <<
"]";
context_->const_device_indexes.push_back(device_index);
context_->constants.push_back(const_node->data);
- Emit(Instruction::LoadConst(const_index, NewRegister()));
+ Emit(Instruction::LoadConst(const_index, device_index, NewRegister()));
}
void VisitExpr_(const VarNode* var_node) final {
@@ -602,13 +589,21 @@ class VMFunctionCompiler :
DeviceAwareExprFunctor<void(const Expr& n)> {
})
.Match("memory.alloc_storage",
[this](const Array<Expr>& args, const Attrs& attrs, const
Array<Type>& type_arg) {
- ICHECK_EQ(args.size(), 2);
+ ICHECK_EQ(args.size(), 3);
// Compute the size of the allocation.
this->VisitExpr(args[0]);
auto size_register = last_register_;
- ICHECK(args[1].as<ConstantNode>()); // Always a literal.
- NDArray alignment_arr = args[1].as<ConstantNode>()->data;
+ auto const_shape =
AsIgnoringOnDevice<ConstantNode>(args[1]);
+ std::vector<int64_t> raw_shape;
+ if (const_shape) {
+ NDArray shape = const_shape->data;
+ // TODO(@jroesch): we need to get an RFC done to
standarize shape dtype
+ raw_shape = ToAllocTensorShape(shape);
+ }
+
+ ICHECK(args[2].as<ConstantNode>()); // Always a literal.
+ NDArray alignment_arr = args[2].as<ConstantNode>()->data;
ICHECK_EQ(alignment_arr->dtype.code, 0U)
<< "The dtype of constant shape must be int32 or int64,
but got "
<< DLDataType2String(alignment_arr->dtype);
@@ -622,7 +617,7 @@ class VMFunctionCompiler :
DeviceAwareExprFunctor<void(const Expr& n)> {
Emit(Instruction::AllocStorage(size_register, alignment,
dtype,
GetDeviceIndex(alloc_attrs->virtual_device),
- NewRegister()));
+ raw_shape, NewRegister()));
})
.Match("vm.shape_of",
[this](const Array<Expr>& args, const Attrs& attrs, const
Array<Type>& type_arg) {
@@ -739,7 +734,7 @@ class VMFunctionCompiler :
DeviceAwareExprFunctor<void(const Expr& n)> {
/*!
* \brief Compile a match value
- * Generate byte code that compute the value specificed in val
+ * Generate byte code that compute the value specified in val
*
* \return The register number assigned for the final value
*/
@@ -946,9 +941,10 @@ void VMCompiler::LowerImpl(IRModule mod) {
for (const auto& virtual_device : context_.virtual_devices_) {
ICHECK(!virtual_device->IsFullyUnconstrained());
ICHECK_GT(virtual_device->device_type(), 0);
- // TODO(mbs): We forget the memory scope.
-
exec_->virtual_devices.push_back(Device{/*device_type=*/virtual_device->device_type(),
-
/*device_id=*/virtual_device->virtual_device_id});
+ exec_->virtual_devices.push_back(
+ std::make_pair(Device{/*device_type=*/virtual_device->device_type(),
+ /*device_id=*/virtual_device->virtual_device_id},
+ virtual_device->memory_scope));
}
exec_->host_device_index = kHostDeviceIndex;
@@ -1068,6 +1064,7 @@ IRModule VMCompiler::OptimizeModuleImpl(IRModule mod) {
}
pass_seqs.push_back(transform::FuseOps());
+ pass_seqs.push_back(transform::AnnotateMemoryScope());
// Do layout rewrite for auto-scheduler.
transform::PassContext pass_ctx = PassContext::Current();
diff --git a/src/relay/backend/vm/manifest_lifetimes.cc
b/src/relay/backend/vm/manifest_lifetimes.cc
index 7028c88f2e..892648d678 100644
--- a/src/relay/backend/vm/manifest_lifetimes.cc
+++ b/src/relay/backend/vm/manifest_lifetimes.cc
@@ -167,7 +167,9 @@ class AliasEliminator : public MixedModeMutator {
if (copy_props.src_virtual_device->device_type() ==
copy_props.dst_virtual_device->device_type() &&
copy_props.src_virtual_device->virtual_device_id ==
- copy_props.dst_virtual_device->virtual_device_id) {
+ copy_props.dst_virtual_device->virtual_device_id &&
+ copy_props.src_virtual_device->memory_scope ==
+ copy_props.dst_virtual_device->memory_scope) {
Expr to_copy = Downcast<Call>(unwrapped)->args[0];
if (const VarNode* alias_of_n = to_copy.as<VarNode>()) {
alias_[var] = Downcast<Var>(VisitExpr_(alias_of_n));
diff --git a/src/relay/op/memory/memory.cc b/src/relay/op/memory/memory.cc
index 6535156205..008dbff841 100644
--- a/src/relay/op/memory/memory.cc
+++ b/src/relay/op/memory/memory.cc
@@ -50,25 +50,32 @@ TVM_REGISTER_NODE_TYPE(AllocTensorAttrs);
// The passing value in attrs and args doesn't seem super great.
// We should consider a better solution, i.e the type relation
// being able to see the arguments as well?
-Expr AllocStorage(Expr size, Expr alignment, VirtualDevice virtual_device,
DataType dtype_hint) {
+Expr AllocStorage(Expr size, Expr shape, Expr alignment, VirtualDevice
virtual_device,
+ DataType dtype_hint) {
auto attrs = make_object<AllocStorageAttrs>();
attrs->dtype = dtype_hint;
attrs->virtual_device = std::move(virtual_device);
static const Op& op = Op::Get("memory.alloc_storage");
- return Call(op, {std::move(size), std::move(alignment)},
Attrs(std::move(attrs)), {});
+ return Call(op, {std::move(size), std::move(shape), std::move(alignment)},
+ Attrs(std::move(attrs)), {});
}
TVM_REGISTER_GLOBAL("relay.op.memory._make.alloc_storage").set_body_typed(AllocStorage);
bool AllocStorageRel(const Array<Type>& types, int num_inputs, const Attrs&
attrs,
const TypeReporter& reporter) {
- ICHECK_EQ(types.size(), 3u);
+ ICHECK_EQ(types.size(), 4u);
auto size_type = types[0];
auto tensor_type = size_type.as<TensorTypeNode>();
ICHECK(tensor_type != nullptr);
ICHECK_EQ(tensor_type->dtype, DataType::Int(64));
ICHECK_EQ(tensor_type->shape.size(), 0);
- auto align_type = types[1];
+
+ // Tensor shape
+ auto tt = types[1].as<TensorTypeNode>();
+ ICHECK(tt != nullptr) << "must be tensor type";
+
+ auto align_type = types[2];
auto align_ttype = align_type.as<TensorTypeNode>();
ICHECK(align_ttype != nullptr);
ICHECK_EQ(align_ttype->dtype, DataType::Int(64));
@@ -77,14 +84,15 @@ bool AllocStorageRel(const Array<Type>& types, int
num_inputs, const Attrs& attr
ICHECK(mod.defined());
auto storage_name = mod->GetGlobalTypeVar("Storage");
auto storage = TypeCall(storage_name, {});
- reporter->Assign(types[2], storage);
+ reporter->Assign(types[3], storage);
return true;
}
RELAY_REGISTER_OP("memory.alloc_storage")
.describe(R"code(Explicitly allocate storage to be used by tensors.)code"
TVM_ADD_FILELINE)
- .set_num_inputs(2)
+ .set_num_inputs(3)
.add_argument("size", "Tensor", "The size of the storage to allocate.")
+ .add_argument("shape", "Tensor", "The shape of the storage to allocate.")
.add_argument("alignment", "Tensor", "The alignment of the storage.")
.add_type_rel("AllocStorage", AllocStorageRel)
.set_attrs_type_key("relay.attrs.AllocStorageAttrs")
diff --git a/src/relay/op/memory/memory.h b/src/relay/op/memory/memory.h
index 690854c382..5533553393 100644
--- a/src/relay/op/memory/memory.h
+++ b/src/relay/op/memory/memory.h
@@ -34,10 +34,11 @@
namespace tvm {
namespace relay {
-Expr AllocStorage(Expr size, Expr alignment, VirtualDevice virtual_device,
DataType dtype_hint);
+Expr AllocStorage(Expr size, Expr shape, Expr alignment, VirtualDevice
virtual_device,
+ DataType dtype_hint);
/*! \brief Returns the "memory.alloc_tensor" operator. */
const Op& MemoryAllocTensorOp();
-Expr AllocTensor(Expr storage, Expr offset, tvm::relay::Expr shape, DataType
dtype,
+Expr AllocTensor(Expr storage, Expr offset, Expr shape, DataType dtype,
Array<IndexExpr> assert_shape);
Expr ToTupleType(const Type& ty, const std::vector<Expr>& exprs);
std::vector<Expr> FromTupleType(const Type& type, const Expr& expr);
diff --git a/src/relay/transforms/annotate_texture_storage.cc
b/src/relay/transforms/annotate_texture_storage.cc
index d3748449ad..4921cef4c8 100644
--- a/src/relay/transforms/annotate_texture_storage.cc
+++ b/src/relay/transforms/annotate_texture_storage.cc
@@ -407,6 +407,15 @@ class StorageInfo : private
transform::DeviceAwareExprVisitor {
if (pattern <= kCommReduce) {
if (const auto* ttype = call->checked_type().as<TensorTypeNode>()) {
if (ttype->shape.size() == 5) {
+ auto node0 = ttype->shape[0].as<IntImmNode>();
+ auto node1 = ttype->shape[1].as<IntImmNode>();
+ auto node2 = ttype->shape[2].as<IntImmNode>();
+ auto node3 = ttype->shape[3].as<IntImmNode>();
+ auto node4 = ttype->shape[4].as<IntImmNode>();
+ // if tensor has any dimension then textures are not supported
+ if (!node0 || !node1 || !node2 || !node3 || !node4) {
+ return false;
+ }
supports_texture_storage = true;
}
}
diff --git a/src/relay/transforms/device_domains.cc
b/src/relay/transforms/device_domains.cc
index e7d3a65dfe..e2af20022a 100644
--- a/src/relay/transforms/device_domains.cc
+++ b/src/relay/transforms/device_domains.cc
@@ -236,12 +236,13 @@ DeviceDomainPtr DeviceDomains::DomainForCallee(const
Call& call) {
args_and_result.emplace_back(ForVirtualDevice(device_copy_props.body->checked_type(),
device_copy_props.dst_virtual_device));
} else if (call->op == alloc_storage_op) {
- ICHECK_EQ(call->args.size(), 2U);
- // alloc_storage(size, alignment, virtual_device=<t>)
- // alloc_storage: fn(<cpu>, <cpu>):<t>
+ ICHECK_EQ(call->args.size(), 3U);
+ // alloc_storage(size, shape, alignment, virtual_device=<t>)
+ // alloc_storage: fn(<cpu>, <cpu>, <cpu>):<t>
const auto* attrs = call->attrs.as<AllocStorageAttrs>();
args_and_result.emplace_back(host_domain_);
args_and_result.emplace_back(host_domain_);
+ args_and_result.emplace_back(host_domain_);
args_and_result.emplace_back(ForVirtualDevice(call->checked_type(),
attrs->virtual_device));
} else if (call->op == alloc_tensor_op) {
ICHECK_EQ(call->args.size(), 3U);
diff --git a/src/relay/transforms/memory_alloc.cc
b/src/relay/transforms/memory_alloc.cc
index 5b584e199d..fcf8a784a9 100644
--- a/src/relay/transforms/memory_alloc.cc
+++ b/src/relay/transforms/memory_alloc.cc
@@ -260,7 +260,7 @@ class DialectRewriter : public
transform::DeviceAwareExprMutator {
Expr alignment = ComputeAlignment(type->dtype);
// Run type inference later to get the correct type.
Var var("storage_" + name_hint, Type(nullptr));
- Expr value = AllocStorage(size, alignment, virtual_device, type->dtype);
+ Expr value = AllocStorage(size, shape, alignment, virtual_device,
type->dtype);
auto sto = scope->Push(var, MaybeOnDeviceFixed(value, virtual_device));
// TODO(@jroesch): There is a bug with typing based on the constant shape.
@@ -366,7 +366,7 @@ class DialectRewriter : public
transform::DeviceAwareExprMutator {
// Alignment is directly captured in the instruction so don't wrap in
"on_device".
auto alignment = ComputeAlignment(out_type->dtype);
Var sto_var("storage_" + std::to_string(i), Type(nullptr));
- auto val = AllocStorage(size, alignment, virtual_device,
out_type->dtype);
+ auto val = AllocStorage(size, out_shape, alignment, virtual_device,
out_type->dtype);
storages.push_back(scope->Push(sto_var, MaybeOnDeviceFixed(val,
virtual_device)));
}
diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc
index 0132e9009c..d7739b7b22 100644
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -152,7 +152,7 @@ static size_t GetDataAlignment(const DLDataType dtype) {
void* DeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape,
DLDataType dtype,
Optional<String> mem_scope) {
- if (!mem_scope.defined() || mem_scope.value() == "global") {
+ if (!mem_scope.defined() || mem_scope.value() == "" || mem_scope.value() ==
"global") {
// by default, we can always redirect to the flat memory allocations
DLTensor temp;
temp.data = nullptr;
diff --git a/src/runtime/opencl/opencl_device_api.cc
b/src/runtime/opencl/opencl_device_api.cc
index 0d1f4af2bb..35e77eb6d1 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -239,7 +239,7 @@ void* OpenCLWorkspace::AllocDataSpace(Device dev, size_t
size, size_t alignment,
void* OpenCLWorkspace::AllocDataSpace(Device dev, int ndim, const int64_t*
shape, DLDataType dtype,
Optional<String> mem_scope) {
- if (!mem_scope.defined() || mem_scope.value() == "global") {
+ if (!mem_scope.defined() || mem_scope.value().empty() || mem_scope.value()
== "global") {
return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope);
}
ICHECK(IsTextureStorage(std::string(mem_scope.value())))
diff --git a/src/runtime/vm/bytecode.cc b/src/runtime/vm/bytecode.cc
index 424dfe87c7..dc52e8c8f0 100644
--- a/src/runtime/vm/bytecode.cc
+++ b/src/runtime/vm/bytecode.cc
@@ -99,6 +99,7 @@ Instruction::Instruction(const Instruction& instr) {
return;
case Opcode::LoadConst:
this->const_index = instr.const_index;
+ this->device_index = instr.device_index;
return;
case Opcode::LoadConsti:
this->load_consti = instr.load_consti;
@@ -114,7 +115,15 @@ Instruction::Instruction(const Instruction& instr) {
this->pc_offset = instr.pc_offset;
return;
case Opcode::AllocStorage:
- this->alloc_storage = instr.alloc_storage;
+ this->alloc_storage.allocation_size =
instr.alloc_storage.allocation_size;
+ this->alloc_storage.alignment = instr.alloc_storage.alignment;
+ this->alloc_storage.dtype_hint = instr.alloc_storage.dtype_hint;
+ this->alloc_storage.device_index = instr.alloc_storage.device_index;
+ this->alloc_storage.ndim = instr.alloc_storage.ndim;
+ if (this->alloc_storage.ndim > 0) {
+ this->alloc_storage.shape =
+ Duplicate<int64_t>(instr.alloc_storage.shape,
instr.alloc_storage.ndim);
+ }
return;
case Opcode::ShapeOf:
this->shape_of.tensor = instr.shape_of.tensor;
@@ -207,6 +216,7 @@ Instruction& Instruction::operator=(const Instruction&
instr) {
return *this;
case Opcode::LoadConst:
this->const_index = instr.const_index;
+ this->device_index = instr.device_index;
return *this;
case Opcode::GetField:
this->object = instr.object;
@@ -219,7 +229,15 @@ Instruction& Instruction::operator=(const Instruction&
instr) {
this->pc_offset = instr.pc_offset;
return *this;
case Opcode::AllocStorage:
- this->alloc_storage = instr.alloc_storage;
+ this->alloc_storage.allocation_size =
instr.alloc_storage.allocation_size;
+ this->alloc_storage.alignment = instr.alloc_storage.alignment;
+ this->alloc_storage.dtype_hint = instr.alloc_storage.dtype_hint;
+ this->alloc_storage.device_index = instr.alloc_storage.device_index;
+ this->alloc_storage.ndim = instr.alloc_storage.ndim;
+ if (this->alloc_storage.ndim > 0) {
+ this->alloc_storage.shape =
+ Duplicate<int64_t>(instr.alloc_storage.shape,
instr.alloc_storage.ndim);
+ }
return *this;
case Opcode::ShapeOf:
this->shape_of.tensor = instr.shape_of.tensor;
@@ -250,13 +268,17 @@ Instruction::~Instruction() {
case Opcode::GetTag:
case Opcode::Goto:
case Opcode::LoadConsti:
- case Opcode::AllocStorage:
case Opcode::ShapeOf:
case Opcode::ReshapeTensor:
case Opcode::DeviceCopy:
case Opcode::Fatal:
case Opcode::KillRegister:
return;
+ case Opcode::AllocStorage:
+ if (this->alloc_storage.ndim > 0) {
+ delete[] this->alloc_storage.shape;
+ }
+ return;
case Opcode::AllocTensor:
delete[] this->alloc_tensor.shape;
return;
@@ -338,7 +360,8 @@ Instruction Instruction::AllocTensorReg(RegName storage,
RegName offset, RegName
}
Instruction Instruction::AllocStorage(RegName size, Index alignment,
DLDataType dtype_hint,
- Index device_index, RegName dst) {
+ Index device_index, const
std::vector<int64_t>& shape,
+ RegName dst) {
Instruction instr;
instr.op = Opcode::AllocStorage;
instr.dst = dst;
@@ -346,6 +369,13 @@ Instruction Instruction::AllocStorage(RegName size, Index
alignment, DLDataType
instr.alloc_storage.alignment = alignment;
instr.alloc_storage.dtype_hint = dtype_hint;
instr.alloc_storage.device_index = device_index;
+ instr.alloc_storage.ndim = static_cast<uint32_t>(shape.size());
+ if (instr.alloc_storage.ndim > 0) {
+ instr.alloc_storage.shape = new int64_t[shape.size()];
+ for (size_t i = 0; i < shape.size(); ++i) {
+ instr.alloc_storage.shape[i] = shape[i];
+ }
+ }
return instr;
}
@@ -474,11 +504,12 @@ Instruction Instruction::InvokeClosure(RegName closure,
const std::vector<RegNam
return instr;
}
-Instruction Instruction::LoadConst(Index const_index, RegName dst) {
+Instruction Instruction::LoadConst(Index const_index, Index device_index,
RegName dst) {
Instruction instr;
instr.op = Opcode::LoadConst;
instr.dst = dst;
instr.const_index = const_index;
+ instr.device_index = device_index;
return instr;
}
@@ -596,7 +627,8 @@ void InstructionPrint(std::ostream& os, const Instruction&
instr) {
break;
}
case Opcode::LoadConst: {
- os << "load_const $" << instr.dst << " Const[" << instr.const_index <<
"]";
+ os << "load_const $" << instr.dst << " Const[" << instr.const_index <<
"] "
+ << instr.device_index;
break;
}
case Opcode::LoadConsti: {
@@ -616,9 +648,15 @@ void InstructionPrint(std::ostream& os, const Instruction&
instr) {
break;
}
case Opcode::AllocStorage: {
- os << "alloc_storage $" << instr.dst << " $" <<
instr.alloc_storage.allocation_size << " "
- << instr.alloc_storage.alignment << " "
- << DLDataType2String(instr.alloc_storage.dtype_hint) << " "
+ os << "alloc_storage $" << instr.dst << " ";
+ if (instr.alloc_storage.ndim > 0) {
+ os << "[" << StrJoin<int64_t>(instr.alloc_storage.shape, 0,
instr.alloc_storage.ndim)
+ << "] ";
+ } else {
+ os << "$" << instr.alloc_storage.allocation_size << " " <<
instr.alloc_storage.alignment
+ << " ";
+ }
+ os << DLDataType2String(instr.alloc_storage.dtype_hint) << " "
<< instr.alloc_storage.device_index;
break;
}
diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc
index 2b3119b169..58c509f8d9 100644
--- a/src/runtime/vm/executable.cc
+++ b/src/runtime/vm/executable.cc
@@ -183,7 +183,7 @@ std::string Executable::GetConstants() const {
const auto& constant = constants[i];
auto ndarray = Downcast<NDArray>(constant);
oss << "VM Const[" << i
- << "]: " << RuntimeObject2String(ndarray,
virtual_devices[host_device_index])
+ << "]: " << RuntimeObject2String(ndarray,
virtual_devices[host_device_index].first)
<< " on device index " << const_device_indexes[i] << std::endl;
}
return oss.str();
@@ -192,9 +192,9 @@ std::string Executable::GetConstants() const {
std::string Executable::GetVirtualDevices() const {
std::ostringstream oss;
for (size_t i = 0; i < virtual_devices.size(); ++i) {
- const auto& device = virtual_devices[i];
- oss << "VM VirtualDevice[" << i << "]: device type " << device.device_type
<< " and id "
- << device.device_id << std::endl;
+ const auto& [device, scope] = virtual_devices[i];
+ oss << "VM VirtualDevice[" << i << "]: device type " << device.device_type
<< ", id "
+ << device.device_id << " and mem_scope " << scope << std::endl;
}
return oss.str();
}
@@ -596,7 +596,13 @@ VMInstructionSerializer SerializeInstruction(const
Instruction& instr) {
fields.push_back(dtype.bits);
fields.push_back(dtype.lanes);
fields.push_back(instr.alloc_storage.device_index);
+ fields.push_back(instr.alloc_storage.ndim);
fields.push_back(instr.dst);
+
+ // Save the shape of the tensor.
+ // Note that this field is rotated to the end of the list.
+ fields.insert(fields.end(), instr.alloc_storage.shape,
+ instr.alloc_storage.shape + instr.alloc_storage.ndim);
break;
}
case Opcode::AllocADT: {
@@ -639,8 +645,8 @@ VMInstructionSerializer SerializeInstruction(const
Instruction& instr) {
break;
}
case Opcode::LoadConst: {
- // Number of fields = 2
- fields.assign({instr.const_index, instr.dst});
+ // Number of fields = 3
+ fields.assign({instr.const_index, instr.device_index, instr.dst});
break;
}
case Opcode::LoadConsti: {
@@ -910,8 +916,8 @@ Instruction DeserializeInstruction(const
VMInstructionSerializer& instr) {
return Instruction::AllocClosure(clo_index, num_freevar, free_vars, dst);
}
case Opcode::AllocStorage: {
- // Number of fields = 7
- DCHECK_GE(instr.fields.size(), 7U);
+ // Number of fields = 9
+ DCHECK_GE(instr.fields.size(), 9U);
Index allocation_size = instr.fields[0];
Index alignment = instr.fields[1];
@@ -921,9 +927,11 @@ Instruction DeserializeInstruction(const
VMInstructionSerializer& instr) {
dtype.lanes = instr.fields[4];
Index device_type = instr.fields[5];
- RegName dst = instr.fields[6];
+ Index ndim = instr.fields[6];
+ RegName dst = instr.fields[7];
+ std::vector<Index> shape = ExtractFields(instr.fields, 8, ndim);
- return Instruction::AllocStorage(allocation_size, alignment, dtype,
device_type, dst);
+ return Instruction::AllocStorage(allocation_size, alignment, dtype,
device_type, shape, dst);
}
case Opcode::If: {
// Number of fields = 4
@@ -960,9 +968,9 @@ Instruction DeserializeInstruction(const
VMInstructionSerializer& instr) {
return Instruction::InvokeClosure(closure, args, dst);
}
case Opcode::LoadConst: {
- // Number of fields = 2
- DCHECK_EQ(instr.fields.size(), 2U);
- return Instruction::LoadConst(instr.fields[0], instr.fields[1]);
+ // Number of fields = 3
+ DCHECK_EQ(instr.fields.size(), 3U);
+ return Instruction::LoadConst(instr.fields[0], instr.fields[1],
instr.fields[2]);
}
case Opcode::LoadConsti: {
// Number of fields = 2
diff --git a/src/runtime/vm/profiler/vm.cc b/src/runtime/vm/profiler/vm.cc
index 360185aac5..7df6b928a3 100644
--- a/src/runtime/vm/profiler/vm.cc
+++ b/src/runtime/vm/profiler/vm.cc
@@ -129,9 +129,21 @@ void VirtualMachineDebug::OpStartHook(Instruction instr) {
{{"Argument Shapes",
profiling::ShapeString(shape_tensor,
instr.alloc_tensor_reg.dtype)}});
} else if (instr.op == Opcode::AllocStorage) {
- auto size = LoadScalarInt(instr.alloc_storage.allocation_size);
std::ostringstream shape;
- shape << DLDataType2String(instr.alloc_storage.dtype_hint) << "[" <<
size << "]";
+ if (instr.alloc_storage.ndim > 0) {
+ std::string shape_str = "[";
+ for (uint32_t i = 0; i < instr.alloc_storage.ndim; ++i) {
+ if (i > 0) {
+ shape_str += ", ";
+ }
+ shape_str += std::to_string(instr.alloc_storage.shape[i]);
+ }
+ shape_str += "]";
+ shape << DLDataType2String(instr.alloc_storage.dtype_hint) <<
shape_str;
+ } else {
+ auto size = LoadScalarInt(instr.alloc_storage.allocation_size);
+ shape << DLDataType2String(instr.alloc_storage.dtype_hint) << "[" <<
size << "]";
+ }
Device dev = GetDevice(instr.alloc_storage.device_index);
prof_.operator*().StartCall("VM::AllocStorage", dev,
{{"VM::Argument Shapes",
String(shape.str())}});
diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc
index 50c757f8fb..188a4153e1 100644
--- a/src/runtime/vm/vm.cc
+++ b/src/runtime/vm/vm.cc
@@ -66,7 +66,7 @@ std::ostream& operator<<(std::ostream& os, const VMFunction&
vm_func) {
return os;
}
-inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev) {
+inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev, Optional<String>
mem_scope = NullOpt) {
if (src->IsInstance<NDArray::ContainerType>()) {
auto nd_array = Downcast<NDArray>(src);
// TODO(mbs): Should respect device id also.
@@ -79,7 +79,7 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev) {
VLOG(2) << "copying from " << nd_array->device.device_type << "["
<< nd_array->device.device_id << "] to " << dev.device_type <<
"[" << dev.device_id
<< "]";
- return nd_array.CopyTo(dev);
+ return nd_array.CopyTo(dev, mem_scope);
}
return src;
} else {
@@ -88,7 +88,7 @@ inline ObjectRef CopyTo(ObjectRef src, const DLDevice& dev) {
std::vector<ObjectRef> ret;
ADT adt = Downcast<ADT>(src);
for (size_t i = 0; i < adt.size(); i++) {
- ret.push_back(CopyTo(adt[i], dev));
+ ret.push_back(CopyTo(adt[i], dev, mem_scope));
}
return ADT(adt->tag, ret.begin(), ret.end());
}
@@ -532,7 +532,7 @@ void VirtualMachine::Init(const std::vector<Device>&
physical_devices,
for (size_t device_index = 0; device_index < num_virtual_devices;
++device_index) {
// We'll retain the legacy behaviour and just match by device type.
// TODO(mbs): Generalize.
- DLDeviceType virtual_device_type =
exec_->virtual_devices[device_index].device_type;
+ DLDeviceType virtual_device_type =
exec_->virtual_devices[device_index].first.device_type;
auto itr = std::find_if(physical_devices.begin(), physical_devices.end(),
[virtual_device_type](const Device&
physical_device) {
return physical_device.device_type ==
virtual_device_type;
@@ -658,8 +658,9 @@ void VirtualMachine::RunLoop(const std::vector<Index>&
output_tensor_reg_indices
}
if (!const_pool_[instr.const_index].defined()) {
- Device dev =
GetDevice(exec_->const_device_indexes[instr.const_index]);
- const_pool_[instr.const_index] = CopyTo(constant_obj, dev);
+ auto& [dev, mem_scope] =
+
exec_->virtual_devices[exec_->const_device_indexes[instr.const_index]];
+ const_pool_[instr.const_index] = CopyTo(constant_obj, dev,
String(mem_scope));
}
WriteRegister(instr.dst, const_pool_[instr.const_index]);
if (is_not_cached) {
@@ -819,17 +820,36 @@ void VirtualMachine::RunLoop(const std::vector<Index>&
output_tensor_reg_indices
}
case Opcode::AllocStorage: {
OpStartHook(instr);
- auto size = LoadScalarInt(instr.alloc_storage.allocation_size);
- auto alignment = instr.alloc_storage.alignment;
auto storage_obj = SimpleObjAllocator().make_object<StorageObj>();
Allocator* allocator = GetAllocator(instr.alloc_storage.device_index);
ICHECK(allocator) << "Did you forget to init the VirtualMachine with
devices?";
- VLOG(2) << "allocating with allocation_size=" << size << ",
alignment=" << alignment
- << ", dtype_hint=" <<
DLDataType2String(instr.alloc_storage.dtype_hint)
- << ", device_index=" << instr.alloc_storage.device_index;
- storage_obj->buffer = allocator->Alloc(size, alignment,
instr.alloc_storage.dtype_hint);
+ if (instr.alloc_storage.ndim > 0) {
+ std::string shape = "[";
+ for (uint32_t i = 0; i < instr.alloc_storage.ndim; ++i) {
+ if (i > 0) {
+ shape += ", ";
+ }
+ shape += std::to_string(instr.alloc_storage.shape[i]);
+ }
+ shape += "]";
+ std::string mem_scope =
exec_->virtual_devices[instr.alloc_storage.device_index].second;
+ VLOG(2) << "allocating with ndims=" << instr.alloc_storage.ndim <<
", shape=" << shape
+ << ", dtype_hint=" <<
DLDataType2String(instr.alloc_storage.dtype_hint)
+ << ", device_index=" << instr.alloc_storage.device_index
+ << ", memory_scope=" << mem_scope;
+ storage_obj->buffer =
+ allocator->Alloc(instr.alloc_storage.ndim,
instr.alloc_storage.shape,
+ instr.alloc_storage.dtype_hint, mem_scope);
+ } else {
+ auto size = LoadScalarInt(instr.alloc_storage.allocation_size);
+ auto alignment = instr.alloc_storage.alignment;
+ VLOG(2) << "allocating with allocation_size=" << size << ",
alignment=" << alignment
+ << ", dtype_hint=" <<
DLDataType2String(instr.alloc_storage.dtype_hint)
+ << ", device_index=" << instr.alloc_storage.device_index;
+ storage_obj->buffer = allocator->Alloc(size, alignment,
instr.alloc_storage.dtype_hint);
+ }
Storage storage(storage_obj);
WriteRegister(instr.dst, storage);
OpStopHook();
@@ -899,8 +919,9 @@ void VirtualMachine::RunLoop(const std::vector<Index>&
output_tensor_reg_indices
ICHECK_EQ(actual_src_dev.device_type, inst_src_dev.device_type);
ICHECK_EQ(actual_src_dev.device_id, inst_src_dev.device_id);
Device dst_dev = GetDevice(instr.device_copy.dst_device_index);
+ auto mem_scope =
exec_->virtual_devices[instr.device_copy.dst_device_index].second;
- NDArray dst_data = src_data.CopyTo(dst_dev);
+ NDArray dst_data = src_data.CopyTo(dst_dev, String(mem_scope));
WriteRegister(instr.dst, dst_data);
OpStopHook();
pc_++;
diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
index 3476037946..3c9c3f2caf 100644
--- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
+++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
@@ -21,16 +21,17 @@ import numpy as np
from tvm import relay
from tvm.relay import testing
from tvm.contrib import utils
-from utils.adreno_utils import gpu_preprocess, build_run_compare
+from utils.adreno_utils import gpu_preprocess, build_run_compare,
build_run_compare_vm
import pytest
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target, dtype):
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target,
executor_type, dtype):
input_shape = (1, 32, 42, 42)
filter_shape = (96, 32, 3, 3)
bias_shape = (1, 96, 1, 1)
@@ -65,14 +66,19 @@ def
test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target,
dtype):
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target,
executor_type, dtype):
input_shape = (1, 32, 40, 40)
filter_shape = (96, 32, 2, 2)
bias_shape = (1, 96, 1, 1)
@@ -107,14 +113,19 @@ def
test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target, dtype)
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_35_35_strides(remote, target, dtype):
+def test_conv2d_inceptionv3_35_35_strides(remote, target, executor_type,
dtype):
input_shape = (1, 48, 35, 35)
filter_shape = (64, 48, 5, 5)
bias_shape = (1, 64, 1, 1)
@@ -149,14 +160,19 @@ def test_conv2d_inceptionv3_35_35_strides(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_resnet50_v2_nchw_3c(remote, target, dtype):
+def test_conv2d_resnet50_v2_nchw_3c(remote, target, executor_type, dtype):
input_shape = (1, 3, 224, 224)
filter_shape = (64, 3, 7, 7)
bias_shape = (1, 64, 1, 1)
@@ -192,12 +208,15 @@ def test_conv2d_resnet50_v2_nchw_3c(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_nchw_3c(remote, target, dtype):
+def test_conv2d_inceptionv3_nchw_3c(remote, target, executor_type, dtype):
input_shape = (1, 3, 299, 299)
filter_shape = (64, 3, 3, 3)
bias_shape = (1, 64, 1, 1)
@@ -232,12 +251,15 @@ def test_conv2d_inceptionv3_nchw_3c(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_1x1_16c16spatial(remote, target, dtype):
+def test_conv2d_1x1_16c16spatial(remote, target, executor_type, dtype):
input_shape = (1, 16, 256, 256)
filter_shape = (32, 16, 4, 4)
bias_shape = (1, 32, 1, 1)
@@ -272,12 +294,15 @@ def test_conv2d_1x1_16c16spatial(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_4x4_16c16pad(remote, target, dtype):
+def test_conv2d_4x4_16c16pad(remote, target, executor_type, dtype):
input_shape = (1, 32, 256, 256)
filter_shape = (32, 32, 4, 4)
bias_shape = (1, 32, 1, 1)
@@ -312,12 +337,15 @@ def test_conv2d_4x4_16c16pad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_4x4x4_16c16pad(remote, target, dtype):
+def test_conv2d_4x4x4_16c16pad(remote, target, executor_type, dtype):
input_shape = (1, 32, 256, 256)
filter_shape = (4, 32, 4, 4)
bias_shape = (1, 4, 1, 1)
@@ -352,12 +380,15 @@ def test_conv2d_4x4x4_16c16pad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_yolov3_v2_nchw_3c(remote, target, dtype):
+def test_conv2d_yolov3_v2_nchw_3c(remote, target, executor_type, dtype):
input_shape = (1, 1024, 13, 13)
filter_shape = (255, 1024, 1, 1)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -385,12 +416,15 @@ def test_conv2d_yolov3_v2_nchw_3c(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_vgg16_winograd_4d(remote, target, dtype):
+def test_conv2d_vgg16_winograd_4d(remote, target, executor_type, dtype):
input_shape = (1, 512, 28, 28)
filter_shape = (512, 512, 3, 3)
bias_shape = (1, 512, 1, 1)
@@ -429,16 +463,35 @@ def test_conv2d_vgg16_winograd_4d(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28,
28], "{dtype}"], ["TENSOR", [512, 512, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1],
[1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null,
"entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]],
["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result":
[[0.0037244], 0, 7.06374192237854, 165 [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_winograd_conv(remote, target, dtype):
+def test_conv2d_winograd_conv(remote, target, executor_type, dtype):
input_shape = (1, 4, 3, 3)
A = relay.var("data", shape=input_shape, dtype=dtype)
filter_shape3 = (8, 4, 3, 3)
@@ -476,16 +529,35 @@ def test_conv2d_winograd_conv(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3,
3], "{dtype}"], ["TENSOR", [8, 4, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1,
1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity":
[["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x",
"sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0,
7.06374192237854, 1653898629. [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_residual_block(remote, target, dtype):
+def test_residual_block(remote, target, executor_type, dtype):
"""
- some kind of residual block followed by convolution to have texture
after residual block
- scalar data type verification which should be mapped to global memory
scope
@@ -602,14 +674,31 @@ def test_residual_block(remote, target, dtype):
"",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_concat(remote, target, dtype):
+def test_concat(remote, target, executor_type, dtype):
"""
layout_transform (NCHW->NCHW4c)
| <- buffer
@@ -716,14 +805,31 @@ def test_concat(remote, target, dtype):
static_memory_scope = []
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_pooling_branching_texture_params(remote, target, dtype):
+def test_pooling_branching_texture_params(remote, target, executor_type,
dtype):
"""
Verification of the pooling and many branches having textures
layout_transform (NCHW->NCHW4c)
@@ -844,14 +950,31 @@ def test_pooling_branching_texture_params(remote, target,
dtype):
"",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_branching_texture_params(remote, target, dtype):
+def test_branching_texture_params(remote, target, executor_type, dtype):
"""
Verification of passing texture to several consumers markup of relay
variables in
primary functions + on_device
@@ -970,15 +1093,32 @@ def test_branching_texture_params(remote, target, dtype):
"",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
# function repeat, params scope are different in reused functions
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_different_lowering_same_op(remote, target, dtype):
+def test_conv2d_different_lowering_same_op(remote, target, executor_type,
dtype):
"""
Use case for verification of caching compiled functions
Three convolutions following by each other in this case should be
@@ -1054,14 +1194,31 @@ def test_conv2d_different_lowering_same_op(remote,
target, dtype):
"",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_winograd_non_rect(remote, target, dtype):
+def test_conv2d_winograd_non_rect(remote, target, executor_type, dtype):
input_shape = (1, 771, 36, 64)
A = relay.var("data", shape=input_shape, dtype=dtype)
filter_shape = (128, 771, 3, 3)
@@ -1085,17 +1242,36 @@ def test_conv2d_winograd_non_rect(remote, target,
dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256 -texture_spatial_limit=16384 -thread_warp_size=1",
"conv2d_nchw_winograd.image2d", [["TENSOR", [1, 771, 36, 64], "{dtype}"],
["TENSOR", [128, 771, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1],
"{dtype}"], {{}}], "config": {{"index": 5399, "code_hash": null, "entity":
[["auto_unroll_max_step", "ot", 16], ["tile_y", "sp", [-1, 1, 32]], ["tile_x",
"sp", [-1, 4, 8]], ["tile_rc", "sp", [-1, 193]]] [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
# function repeat, params scope are different in reused functions
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_injective_nwo_inputs1(remote, target, dtype):
+def test_injective_nwo_inputs1(remote, target, executor_type, dtype):
"""
Use case for verification of stability of annotation primary functions
having several ops accepting data outside of Primary function
@@ -1186,15 +1362,32 @@ def test_injective_nwo_inputs1(remote, target, dtype):
"global",
"global",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
# function repeat, params scope are different in reused functions
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_injective_nwo_inputs2(remote, target, dtype):
+def test_injective_nwo_inputs2(remote, target, executor_type, dtype):
"""
Use case for verification of stability of annotation primary functions
having several ops accepting data outside of Primary function
@@ -1284,14 +1477,31 @@ def test_injective_nwo_inputs2(remote, target, dtype):
"global.texture",
"global",
]
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
static_memory_scope
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
+ else:
+ build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ static_memory_scope,
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_to_3_channels(remote, target, dtype):
+def test_conv2d_to_3_channels(remote, target, executor_type, dtype):
input_shape = (1, 256, 200, 200)
filter_shape = (3, 256, 1, 1)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -1316,7 +1526,12 @@ def test_conv2d_to_3_channels(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target, [])
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target, [])
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, []
+ )
if __name__ == "__main__":
diff --git a/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
index 5f69e777d9..dc86a23187 100644
--- a/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
+++ b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
@@ -22,16 +22,17 @@ import numpy as np
from tvm import relay
from tvm.relay import testing
from tvm.contrib import utils
-from utils.adreno_utils import gpu_preprocess, build_run_compare
+from utils.adreno_utils import gpu_preprocess, build_run_compare,
build_run_compare_vm
import pytest
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(remote, target, dtype):
+def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(remote, target,
executor_type, dtype):
input_shape = (1, 257, 257, 32)
filter_shape = (1, 1, 32, 16)
bias_shape = (filter_shape[-1],)
@@ -63,12 +64,15 @@ def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(remote,
target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(remote, target,
dtype):
+def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(remote, target,
executor_type, dtype):
input_shape = (1, 257, 257, 32)
filter_shape = (1, 1, 32, 16)
bias_shape = (filter_shape[-1],)
@@ -103,12 +107,15 @@ def
test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(remote, target, dt
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_4_35_35_32x3_3_144_16(remote, target, dtype):
+def test_conv2d_4_35_35_32x3_3_144_16(remote, target, executor_type, dtype):
input_shape = (4, 35, 35, 32)
filter_shape = (3, 3, 32, 16)
bias_shape = (filter_shape[-1],)
@@ -141,12 +148,15 @@ def test_conv2d_4_35_35_32x3_3_144_16(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(remote, target, dtype):
+def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(remote, target, executor_type,
dtype):
input_shape = (1, 513, 513, 3)
filter_shape = (3, 3, 3, 32)
bias_shape = (filter_shape[-1],)
@@ -179,12 +189,15 @@ def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(remote,
target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target, dtype):
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target,
executor_type, dtype):
input_shape = (1, 42, 42, 32)
filter_shape = (3, 3, 32, 96)
bias_shape = (1, 1, 1, 96)
@@ -219,14 +232,19 @@ def
test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target,
dtype):
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target,
executor_type, dtype):
input_shape = (1, 40, 40, 32)
filter_shape = (2, 2, 32, 96)
bias_shape = (1, 1, 1, 96)
@@ -261,14 +279,19 @@ def
test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(remote, target, dtype)
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_35_35_strides(remote, target, dtype):
+def test_conv2d_inceptionv3_35_35_strides(remote, target, executor_type,
dtype):
input_shape = (1, 35, 35, 48)
filter_shape = (5, 5, 48, 64)
bias_shape = (1, 1, 1, 64)
@@ -303,14 +326,19 @@ def test_conv2d_inceptionv3_35_35_strides(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_resnet50_v2_nhwc_3c(remote, target, dtype):
+def test_conv2d_resnet50_v2_nhwc_3c(remote, target, executor_type, dtype):
input_shape = (1, 224, 224, 3)
filter_shape = (7, 7, 3, 64)
bias_shape = (1, 1, 1, 64)
@@ -346,12 +374,15 @@ def test_conv2d_resnet50_v2_nhwc_3c(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_inceptionv3_nhwc_3c(remote, target, dtype):
+def test_conv2d_inceptionv3_nhwc_3c(remote, target, executor_type, dtype):
input_shape = (1, 299, 299, 3)
filter_shape = (3, 3, 3, 64)
bias_shape = (1, 1, 1, 64)
@@ -386,12 +417,15 @@ def test_conv2d_inceptionv3_nhwc_3c(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_1x1_16c16spatial(remote, target, dtype):
+def test_conv2d_1x1_16c16spatial(remote, target, executor_type, dtype):
input_shape = (1, 128, 128, 16)
filter_shape = (4, 4, 16, 32)
bias_shape = (1, 1, 1, 32)
@@ -426,12 +460,15 @@ def test_conv2d_1x1_16c16spatial(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_4x4_16c16pad(remote, target, dtype):
+def test_conv2d_4x4_16c16pad(remote, target, executor_type, dtype):
input_shape = (1, 256, 256, 32)
filter_shape = (4, 4, 32, 32)
bias_shape = (1, 1, 1, 32)
@@ -466,12 +503,15 @@ def test_conv2d_4x4_16c16pad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_4x4x4_16c16pad(remote, target, dtype):
+def test_conv2d_4x4x4_16c16pad(remote, target, executor_type, dtype):
input_shape = (1, 256, 256, 32)
filter_shape = (4, 4, 32, 4)
bias_shape = (1, 1, 1, 4)
@@ -505,12 +545,15 @@ def test_conv2d_4x4x4_16c16pad(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_yolov3_v2_nhwc_3c(remote, target, dtype):
+def test_conv2d_yolov3_v2_nhwc_3c(remote, target, executor_type, dtype):
input_shape = (1, 13, 13, 1024)
filter_shape = (1, 1, 1024, 255)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -538,12 +581,15 @@ def test_conv2d_yolov3_v2_nhwc_3c(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_vgg16_winograd_4d(remote, target, dtype):
+def test_conv2d_vgg16_winograd_4d(remote, target, executor_type, dtype):
input_shape = (1, 28, 28, 512)
filter_shape = (3, 3, 512, 512)
bias_shape = (1, 1, 1, 512)
@@ -582,16 +628,35 @@ def test_conv2d_vgg16_winograd_4d(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28,
512], "{dtype}"], ["TENSOR", [3, 3, 512, 512], "{dtype}"], [1, 1], [1, 1, 1,
1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null,
"entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]],
["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result":
[[0.0037244], 0, 7.06374192237854, 165 [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_vgg16_winograd_4d_expand_spatial_dims(remote, target, dtype):
+def test_conv2d_vgg16_winograd_4d_expand_spatial_dims(remote, target,
executor_type, dtype):
input_shape = (1, 28, 28, 1)
filter_shape = (3, 3, 1, 64)
bias_shape = (1, 1, 1, 64)
@@ -629,16 +694,35 @@ def
test_conv2d_vgg16_winograd_4d_expand_spatial_dims(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28,
1], "{dtype}"], ["TENSOR", [3, 3, 1, 64], "{dtype}"], [1, 1], [0, 0, 0, 0], [1,
1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity":
[["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x",
"sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0,
7.06374192237854, 16538986 [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_winograd_conv(remote, target, dtype):
+def test_conv2d_winograd_conv(remote, target, executor_type, dtype):
input_shape = (1, 3, 3, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
filter_shape3 = (3, 3, 4, 8)
@@ -690,16 +774,35 @@ def test_conv2d_winograd_conv(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 3, 3,
4], "{dtype}"], ["TENSOR", [3, 3, 4, 8], "{dtype}"], [1, 1], [1, 1, 1, 1], [1,
1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity":
[["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x",
"sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0,
7.06374192237854, 1653898629. [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_winograd_non_rect(remote, target, dtype):
+def test_conv2d_winograd_non_rect(remote, target, executor_type, dtype):
input_shape = (1, 36, 64, 771)
A = relay.var("data", shape=input_shape, dtype=dtype)
filter_shape = (3, 3, 771, 128)
@@ -730,16 +833,35 @@ def test_conv2d_winograd_non_rect(remote, target, dtype):
f.write(
f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno
-max_num_threads=256 -texture_spatial_limit=16384 -thread_warp_size=1",
"conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 36, 64, 771], "{dtype}"],
["TENSOR", [3, 3, 771, 128], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1],
"{dtype}"], {{}}], "config": {{"index": 5399, "code_hash": null, "entity":
[["auto_unroll_max_step", "ot", 16], ["tile_y", "sp", [-1, 1, 32]], ["tile_x",
"sp", [-1, 4, 8]], ["tile_rc", "sp", [-1, 193]]] [...]
)
- graph = build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
stat_file=stat_file
- )
- matches = re.findall("winograd", graph)
- assert len(matches) > 0
+ if executor_type == "ge":
+ graph = build_run_compare(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", graph)
+ assert len(matches) > 0
+ else:
+ vmc = build_run_compare_vm(
+ remote,
+ mod,
+ params1,
+ {"data": input_shape},
+ {"data": dtype},
+ target,
+ stat_file=stat_file,
+ )
+ matches = re.findall("winograd", vmc.primitives)
+ assert len(matches) > 0
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_to_3_channels(remote, target, dtype):
+def test_conv2d_to_3_channels(remote, target, executor_type, dtype):
input_shape = (1, 200, 200, 256)
filter_shape = (1, 1, 256, 3)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -764,7 +886,12 @@ def test_conv2d_to_3_channels(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target, [])
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target, [])
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, []
+ )
if __name__ == "__main__":
diff --git
a/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
index 2c729a36eb..87e9542140 100644
--- a/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
+++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
@@ -20,14 +20,15 @@ import tvm
import numpy as np
from tvm import relay
from tvm.relay import testing
-from utils.adreno_utils import gpu_preprocess, build_run_compare
+from utils.adreno_utils import gpu_preprocess, build_run_compare,
build_run_compare_vm
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_bias_nchwc(remote, target, dtype):
+def test_depthwise_conv2d_bias_nchwc(remote, target, executor_type, dtype):
input_shape = (1, 64, 112, 112)
filter_shape = (64, 1, 3, 3)
bias_shape = (1, 64, 1, 1)
@@ -64,14 +65,19 @@ def test_depthwise_conv2d_bias_nchwc(remote, target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_nchwc(remote, target, dtype):
+def test_depthwise_conv2d_nchwc(remote, target, executor_type, dtype):
input_shape = (1, 64, 112, 112)
filter_shape = (64, 1, 3, 3)
bias_shape = (1, 64, 1, 1)
@@ -103,14 +109,19 @@ def test_depthwise_conv2d_nchwc(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(
- remote, mod, params1, {"data": input_shape}, {"data": dtype}, target,
[], gpu_preprocess
- )
+ if executor_type == "ge":
+ build_run_compare(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, [], gpu_preprocess
+ )
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_bias_nchw(remote, target, dtype):
+def test_depthwise_conv2d_bias_nchw(remote, target, executor_type, dtype):
input_shape = (1, 64, 112, 112)
filter_shape = (64, 1, 3, 3)
bias_shape = (1, 64, 1, 1)
@@ -147,12 +158,15 @@ def test_depthwise_conv2d_bias_nchw(remote, target,
dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_repack_bias_nchw(remote, target, dtype):
+def test_depthwise_conv2d_repack_bias_nchw(remote, target, executor_type,
dtype):
input_shape = (1, 63, 112, 112)
filter_shape = (63, 1, 3, 3)
bias_shape = (1, 63, 1, 1)
@@ -189,12 +203,15 @@ def test_depthwise_conv2d_repack_bias_nchw(remote,
target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_to_3_channels(remote, target, dtype):
+def test_conv2d_to_3_channels(remote, target, executor_type, dtype):
input_shape = (1, 3, 200, 200)
filter_shape = (3, 1, 1, 1)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -220,7 +237,12 @@ def test_conv2d_to_3_channels(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target, [])
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target, [])
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, []
+ )
if __name__ == "__main__":
diff --git
a/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
index 28f0f4cefa..782c99a96a 100644
--- a/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
+++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
@@ -20,14 +20,16 @@ import tvm
import numpy as np
from tvm import relay
from tvm.relay import testing
-from utils.adreno_utils import build_run_compare
+from utils.adreno_utils import build_run_compare, build_run_compare_vm
+
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(remote, target,
dtype):
+def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(remote, target,
executor_type, dtype):
input_shape = (1, 129, 129, 144)
filter_shape = (3, 3, 144, 1)
kernel_size = (filter_shape[0], filter_shape[1])
@@ -62,12 +64,15 @@ def
test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(remote, target, dtyp
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(remote, target,
dtype):
+def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(remote, target,
executor_type, dtype):
input_shape = (4, 35, 35, 576)
filter_shape = (3, 3, 576, 1)
kernel_size = (filter_shape[0], filter_shape[1])
@@ -102,12 +107,17 @@ def
test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(remote, target, dtype)
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def
test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(remote,
target, dtype):
+def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(
+ remote, target, executor_type, dtype
+):
input_shape = (1, 129, 129, 144)
filter_shape = (3, 3, 144, 1)
kernel_size = (filter_shape[0], filter_shape[1])
@@ -144,12 +154,15 @@ def
test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(remote,
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_1_513_513_7x3_3_7_1(remote, target, dtype):
+def test_depthwise_conv2d_1_513_513_7x3_3_7_1(remote, target, executor_type,
dtype):
input_shape = (1, 513, 513, 7)
filter_shape = (3, 3, 7, 1)
bias_shape = (filter_shape[2],)
@@ -183,12 +196,15 @@ def test_depthwise_conv2d_1_513_513_7x3_3_7_1(remote,
target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_depthwise_conv2d_1_513_513_3x3_3_3_1(remote, target, dtype):
+def test_depthwise_conv2d_1_513_513_3x3_3_3_1(remote, target, executor_type,
dtype):
input_shape = (1, 513, 513, 3)
filter_shape = (3, 3, 3, 1)
bias_shape = (filter_shape[2],)
@@ -222,12 +238,15 @@ def test_depthwise_conv2d_1_513_513_3x3_3_3_1(remote,
target, dtype):
"bias": tvm.nd.array(bias_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_conv2d_to_3_channels(remote, target, dtype):
+def test_conv2d_to_3_channels(remote, target, executor_type, dtype):
input_shape = (1, 200, 200, 3)
filter_shape = (1, 1, 3, 1)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -253,7 +272,12 @@ def test_conv2d_to_3_channels(remote, target, dtype):
"weight": tvm.nd.array(filter_data),
}
- build_run_compare(remote, mod, params1, {"data": input_shape}, {"data":
dtype}, target, [])
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params1, {"data": input_shape},
{"data": dtype}, target, [])
+ else:
+ build_run_compare_vm(
+ remote, mod, params1, {"data": input_shape}, {"data": dtype},
target, []
+ )
if __name__ == "__main__":
diff --git a/tests/python/relay/opencl_texture/test_injection_texture.py
b/tests/python/relay/opencl_texture/test_injection_texture.py
index 991983706f..31c082c994 100644
--- a/tests/python/relay/opencl_texture/test_injection_texture.py
+++ b/tests/python/relay/opencl_texture/test_injection_texture.py
@@ -20,48 +20,56 @@ import pytest
import tvm
import numpy as np
from tvm import relay
-from tvm.relay import testing
-from tvm.contrib import utils
-from utils.adreno_utils import gpu_preprocess, build_run_compare
+from utils.adreno_utils import build_run_compare, build_run_compare_vm
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_layout_transform_to_block_nchw4c(remote, target, dtype):
+def test_layout_transform_to_block_nchw4c(remote, target, executor_type,
dtype):
"""Verification of the case NCHW->NCHW4c"""
input_shape = (1, 32, 720, 1280)
A = relay.var("data", shape=input_shape, dtype=dtype)
lt = relay.layout_transform(A, "NCHW", "NCHW4c")
mod = relay.Function([A], lt)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_layout_transform_to_block_nchw(remote, target, dtype):
+def test_layout_transform_to_block_nchw(remote, target, executor_type, dtype):
"""Verification of the case NCHW4c->NCHW"""
input_shape = (1, 36, 1, 1, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
lt = relay.layout_transform(A, "NCHW4c", "NCHW")
mod = relay.Function([A], lt)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_layout_transform_to_block_nhwc4c(remote, target, dtype):
+def test_layout_transform_to_block_nhwc4c(remote, target, executor_type,
dtype):
"""Verification of the case NHWC->NHWC4c"""
input_shape = (1, 1, 1, 144)
A = relay.var("data", shape=input_shape, dtype=dtype)
lt = relay.layout_transform(A, "NHWC", "NHWC4c")
mod = relay.Function([A], lt)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@pytest.mark.skipif(
@@ -69,7 +77,7 @@ def test_layout_transform_to_block_nhwc4c(remote, target,
dtype):
)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_layout_transform_to_block_nhwc(remote, target, dtype):
+def test_layout_transform_to_block_nhwc(remote, target, executor_type, dtype):
"""Verification of the case NHWC4c->NHWC"""
input_shape = (1, 80, 80, 36, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -78,7 +86,10 @@ def test_layout_transform_to_block_nhwc(remote, target,
dtype):
lt = relay.layout_transform(cast, "NHWC4c", "NHWC")
mod = relay.Function([A], lt)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
if __name__ == "__main__":
diff --git a/tests/python/relay/opencl_texture/test_network.py
b/tests/python/relay/opencl_texture/test_network.py
index 1d0e996f9f..2b2f3741cb 100644
--- a/tests/python/relay/opencl_texture/test_network.py
+++ b/tests/python/relay/opencl_texture/test_network.py
@@ -24,10 +24,13 @@ from tvm import relay
from tvm.contrib import utils
from tvm.relay import testing
from tvm.relay.op import register_mixed_precision_conversion
-from utils.adreno_utils import build_run_compare, get_model, gpu_preprocess
+from utils.adreno_utils import build_run_compare, build_run_compare_vm,
get_model, gpu_preprocess
-def _test_mobilenet_v1(remote, target, calc_dtype, acc_dtype):
+executor_type = tvm.testing.parameter("ge", "vm")
+
+
+def _test_mobilenet_v1(remote, target, calc_dtype, executor_type, acc_dtype):
mod, params, inputs, dtypes = get_model(
"https://github.com/mlcommons/mobile_models/raw/main/v0_7/tflite/mobilenet_edgetpu_224_1.0_float.tflite",
"mobilenet_edgetpu_224_1.0_float.tflite",
@@ -46,29 +49,32 @@ def _test_mobilenet_v1(remote, target, calc_dtype,
acc_dtype):
},
)
- build_run_compare(remote, mod, params, inputs, dtypes, target, [])
+ if executor_type == "ge":
+ build_run_compare(remote, mod, params, inputs, dtypes, target, [])
+ else:
+ build_run_compare_vm(remote, mod, params, inputs, dtypes, target, [])
@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/13443")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
@pytest.mark.skipif(tvm.testing.utils.IS_IN_CI, reason="CI doesn't support
fp16(half datatypes)")
-def test_mobilenet_v1_fp16(remote, target):
- _test_mobilenet_v1(remote, target, "float16", "float16")
+def test_mobilenet_v1_fp16(remote, target, executor_type):
+ _test_mobilenet_v1(remote, target, "float16", executor_type, "float16")
@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/13443")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mobilenet_v1_fp32(remote, target):
- _test_mobilenet_v1(remote, target, "float32", "float32")
+def test_mobilenet_v1_fp32(remote, target, executor_type):
+ _test_mobilenet_v1(remote, target, "float32", executor_type, "float32")
@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/13443")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mobilenet_v1_fp16_acc32(remote, target):
- _test_mobilenet_v1(remote, target, "float16", "float32")
+def test_mobilenet_v1_fp16_acc32(remote, target, executor_type):
+ _test_mobilenet_v1(remote, target, "float16", executor_type, "float32")
if __name__ == "__main__":
diff --git a/tests/python/relay/opencl_texture/test_pool_texture.py
b/tests/python/relay/opencl_texture/test_pool_texture.py
index faeb121c80..6190790a3d 100644
--- a/tests/python/relay/opencl_texture/test_pool_texture.py
+++ b/tests/python/relay/opencl_texture/test_pool_texture.py
@@ -17,15 +17,16 @@
import tvm
from tvm import relay
-from utils.adreno_utils import build_run_compare
+from utils.adreno_utils import build_run_compare, build_run_compare_vm
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nchw_wide(remote, target, dtype):
+def test_global_pool2d_nchw_wide(remote, target, executor_type, dtype):
"""
Use case of NCHW global pooling with big spatial valies
"""
@@ -34,12 +35,15 @@ def test_global_pool2d_nchw_wide(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A)
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nchw4c_wide(remote, target, dtype):
+def test_global_pool2d_nchw4c_wide(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c global pooling with big spatial valies
"""
@@ -48,12 +52,15 @@ def test_global_pool2d_nchw4c_wide(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A, layout="NCHW4c")
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nchw_deep(remote, target, dtype):
+def test_global_pool2d_nchw_deep(remote, target, executor_type, dtype):
"""
Use case of NCHW deep global pooling
"""
@@ -62,12 +69,15 @@ def test_global_pool2d_nchw_deep(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A)
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nchw4c_deep(remote, target, dtype):
+def test_global_pool2d_nchw4c_deep(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c deep global pooling
"""
@@ -76,12 +86,15 @@ def test_global_pool2d_nchw4c_deep(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A, layout="NCHW4c")
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nhwc(remote, target, dtype):
+def test_global_pool2d_nhwc(remote, target, executor_type, dtype):
"""
Use case of NHWC global pooling with big spatial valies
"""
@@ -90,12 +103,15 @@ def test_global_pool2d_nhwc(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A, layout="NHWC")
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_pool2d_nhwc4c(remote, target, dtype):
+def test_global_pool2d_nhwc4c(remote, target, executor_type, dtype):
"""
Use case of NHWC deep global pooling
"""
@@ -104,12 +120,15 @@ def test_global_pool2d_nhwc4c(remote, target, dtype):
C = relay.nn.global_avg_pool2d(A, layout="NHWC4c")
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_max_pool2d_nchw_wide(remote, target, dtype):
+def test_global_max_pool2d_nchw_wide(remote, target, executor_type, dtype):
"""
Use case of NCHW global pooling with big spatial valies
"""
@@ -118,12 +137,15 @@ def test_global_max_pool2d_nchw_wide(remote, target,
dtype):
C = relay.nn.global_max_pool2d(A)
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_global_max_pool2d_nchw4c_wide(remote, target, dtype):
+def test_global_max_pool2d_nchw4c_wide(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c global pooling with big spatial valies
"""
@@ -132,4 +154,11 @@ def test_global_max_pool2d_nchw4c_wide(remote, target,
dtype):
C = relay.nn.global_max_pool2d(A, layout="NCHW4c")
mod = relay.Function([A], C)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+
+
+if __name__ == "__main__":
+ tvm.testing.main()
diff --git a/tests/python/relay/opencl_texture/test_reduction_texture.py
b/tests/python/relay/opencl_texture/test_reduction_texture.py
index 5728e6294f..1016a7c88e 100644
--- a/tests/python/relay/opencl_texture/test_reduction_texture.py
+++ b/tests/python/relay/opencl_texture/test_reduction_texture.py
@@ -21,123 +21,151 @@ import numpy as np
from tvm import relay
from tvm.relay import testing
from tvm.contrib import utils
-from utils.adreno_utils import gpu_preprocess, build_run_compare
+from utils.adreno_utils import gpu_preprocess, build_run_compare,
build_run_compare_vm
+executor_type = tvm.testing.parameter("ge", "vm")
dtype = tvm.testing.parameter("float32")
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mean(remote, target, dtype):
+def test_mean(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 1280)
A = relay.var("data", shape=input_shape, dtype=dtype)
mean = relay.mean(A, axis=1, keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_argmax(remote, target, dtype):
+def test_argmax(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 1280)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.argmax(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_reduction_max(remote, target, dtype):
+def test_reduction_max(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 1280)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.max(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mean_nd4(remote, target, dtype):
+def test_mean_nd4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 729, 729)
A = relay.var("data", shape=input_shape, dtype=dtype)
mean = relay.mean(A, axis=1, keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_argmax_nd4(remote, target, dtype):
+def test_argmax_nd4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 729, 729)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.argmax(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_reduction_max_nd4(remote, target, dtype):
+def test_reduction_max_nd4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 729, 729)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.max(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mean_b4(remote, target, dtype):
+def test_mean_b4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 320, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
mean = relay.mean(A, axis=1, keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_argmax_b4(remote, target, dtype):
+def test_argmax_b4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 320, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.argmax(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_reduction_max_b4(remote, target, dtype):
+def test_reduction_max_b4(remote, target, executor_type, dtype):
# NCHW
input_shape = (1, 3, 720, 320, 4)
A = relay.var("data", shape=input_shape, dtype=dtype)
argmax = relay.op.max(A, axis=[1])
mod = relay.Function([A], argmax)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mean_global_pooling(remote, target, dtype):
+def test_mean_global_pooling(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c global pooling with big spatial valies
"""
@@ -146,12 +174,15 @@ def test_mean_global_pooling(remote, target, dtype):
mean = relay.mean(A, axis=[1, 2], keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_mean_global_pooling_block4(remote, target, dtype):
+def test_mean_global_pooling_block4(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c global pooling with big spatial valies
"""
@@ -160,12 +191,15 @@ def test_mean_global_pooling_block4(remote, target,
dtype):
mean = relay.mean(A, axis=[1, 2], keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
@tvm.testing.parametrize_targets("opencl -device=adreno")
-def test_max_global_pooling_block4(remote, target, dtype):
+def test_max_global_pooling_block4(remote, target, executor_type, dtype):
"""
Use case of blocked NCHW4c global pooling with big spatial valies
"""
@@ -174,7 +208,10 @@ def test_max_global_pooling_block4(remote, target, dtype):
mean = relay.max(A, axis=[1, 2], keepdims=True)
mod = relay.Function([A], mean)
- build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
+ if executor_type == "ge":
+ build_run_compare(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
+ else:
+ build_run_compare_vm(remote, mod, {}, {"data": input_shape}, {"data":
dtype}, target)
@tvm.testing.requires_opencl
diff --git a/tests/python/relay/opencl_texture/utils/adreno_utils.py
b/tests/python/relay/opencl_texture/utils/adreno_utils.py
index e2a271d9f6..de325d822c 100644
--- a/tests/python/relay/opencl_texture/utils/adreno_utils.py
+++ b/tests/python/relay/opencl_texture/utils/adreno_utils.py
@@ -26,6 +26,7 @@ from tvm.contrib import utils, ndk
from tvm.relay import testing
from tvm.relay.transform import recast
from tvm.contrib import graph_runtime
+from tvm.runtime.vm import VirtualMachine
import json
@@ -122,6 +123,89 @@ def build_run_compare(
return graph
+def build_run_compare_vm(
+ remote,
+ tvm_mod,
+ params1,
+ input_shape,
+ dtypes,
+ target="llvm",
+ static_mem_scopes=[],
+ gpu_preprocess=None,
+ stat_file=None,
+):
+ if remote is None:
+ target_host = "llvm"
+ else:
+ target_host = "llvm -mtriple=arm64-linux-android"
+
+ if gpu_preprocess:
+ tvm_mod_nchwc = gpu_preprocess(tvm_mod)
+ else:
+ tvm_mod_nchwc = tvm_mod
+
+ if isinstance(tvm_mod_nchwc, relay.Function):
+ module = tvm.IRModule({})
+ module["main"] = tvm_mod_nchwc
+ tvm_mod_nchwc = module
+
+ if stat_file is not None:
+ with autotvm.apply_history_best(stat_file):
+ with tvm.transform.PassContext(opt_level=3):
+ vmc = relay.vm.compile(
+ tvm_mod_nchwc, target=target, target_host=target_host,
params=params1
+ )
+ else:
+ with tvm.transform.PassContext(opt_level=3):
+ vmc = relay.vm.compile(
+ tvm_mod_nchwc, target=target, target_host=target_host,
params=params1
+ )
+
+ # TODO(echuraev): enable scope checking
+ ## verification that storage_scope has expected textures scopes
+ # graph_json = json.loads(graph)
+ # if "storage_scope" in graph_json["attrs"]:
+ # assert (
+ # len(static_mem_scopes) ==
len(graph_json["attrs"]["storage_scope"][1])
+ # or len(static_mem_scopes) == 0
+ # )
+ # else:
+ # assert len(static_mem_scopes) == 0
+
+ # for i in range(0, len(static_mem_scopes)):
+ # assert static_mem_scopes[i] ==
graph_json["attrs"]["storage_scope"][1][i]
+
+ if remote is None:
+ dev = tvm.opencl()
+ vm = VirtualMachine(vmc, dev, "naive")
+ else:
+ temp = utils.tempdir()
+ dso_binary = "dev_lib_cl.so"
+ dso_binary_path = temp.relpath(dso_binary)
+ dev = remote.cl(0)
+ vmc.mod.export_library(dso_binary_path, ndk.create_shared)
+ remote.upload(dso_binary_path)
+ rlib = remote.load_module(dso_binary)
+ vm = VirtualMachine(rlib, dev, "naive")
+ data = {}
+ inputs = []
+ for key in input_shape:
+
inputs.append(np.random.normal(size=input_shape[key]).astype(dtypes[key]))
+ data[key] = tvm.nd.array(inputs[-1], dev)
+ for k, v in params1.items():
+ data[k] = tvm.nd.array(v, dev)
+ vm.set_input("main", **data)
+ vm.invoke_stateful("main")
+
+ ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs)
+ for i, ref_output in enumerate(ref_outputs):
+ tvm_output = vm.get_outputs()[i]
+ output = tvm_output.asnumpy()
+
+ np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1)
+ return vmc
+
+
def gpu_preprocess(tvm_mod):
layout_config = relay.transform.LayoutConfig()
desired_layouts = {"nn.conv2d": ["NCHW4c", "OIHW4o"]}
diff --git a/tests/python/relay/test_pass_dead_code_elimination.py
b/tests/python/relay/test_pass_dead_code_elimination.py
index 68d2919ec3..70dc1dd4f7 100644
--- a/tests/python/relay/test_pass_dead_code_elimination.py
+++ b/tests/python/relay/test_pass_dead_code_elimination.py
@@ -16,8 +16,10 @@
# under the License.
import tvm
import tvm.testing
+from tvm import relay
from tvm.relay import Function, transform
from tvm.relay.testing import inception_v3
+import numpy as np
import pytest
cpu_scope = tvm.target.VirtualDevice(tvm.cpu(), tvm.target.Target("llvm"))
@@ -228,6 +230,11 @@ def test_inline_into_function():
def test_impure_op():
+ shape = np.array([64, 2])
+ metatable = {
+ "VirtualDevice": [cpu_scope],
+ "relay.Constant": [relay.const(shape, dtype="int64")],
+ }
"""Don't elide calls to side-effecting operators."""
before_program = tvm.relay.parse(
"""
@@ -235,7 +242,7 @@ def test_impure_op():
def @main() {
let %size: int64 = cast(1024, dtype="int64");
let %alignment: int64 = cast(64, dtype="int64");
- let %x = memory.alloc_storage(%size, %alignment,
virtual_device=meta[VirtualDevice][0]);
+ let %x = memory.alloc_storage(%size, meta[relay.Constant][0],
%alignment, virtual_device=meta[VirtualDevice][0]);
let %_ = memory.kill(%x);
0
}
@@ -250,6 +257,7 @@ def test_impure_op():
#[version = "0.0.5"]
def @main() {
%0 = memory.alloc_storage(cast(1024, dtype="int64"),
+ meta[relay.Constant][0],
cast(64, dtype="int64"),
virtual_device=meta[VirtualDevice][0]);
let %_ = memory.kill(%0);
@@ -267,6 +275,11 @@ def test_impure_op():
def test_impure_func():
+ shape = np.array([64, 2])
+ metatable = {
+ "VirtualDevice": [cpu_scope],
+ "relay.Constant": [relay.const(shape, dtype="int64")],
+ }
"""Don't elide calls to side-effecting functions."""
before_program = tvm.relay.parse(
"""
@@ -274,7 +287,7 @@ def test_impure_func():
def @f() -> int {
let %size: int64 = cast(1024, dtype="int64");
let %alignment: int64 = cast(64, dtype="int64");
- let %x = memory.alloc_storage(%size, %alignment,
virtual_device=meta[VirtualDevice][0]);
+ let %x = memory.alloc_storage(%size, meta[relay.Constant][0],
%alignment, virtual_device=meta[VirtualDevice][0]);
let %_ = memory.kill(%x);
0
}
@@ -293,6 +306,7 @@ def test_impure_func():
#[version = "0.0.5"]
def @f() -> int {
%0 = memory.alloc_storage(cast(1024, dtype="int64"),
+ meta[relay.Constant][0],
cast(64, dtype="int64"),
virtual_device=meta[VirtualDevice][0]);
let %_ = memory.kill(%0);
diff --git a/tests/python/relay/test_pass_plan_devices.py
b/tests/python/relay/test_pass_plan_devices.py
index c7f42103ca..f654b4b453 100644
--- a/tests/python/relay/test_pass_plan_devices.py
+++ b/tests/python/relay/test_pass_plan_devices.py
@@ -761,14 +761,18 @@ def test_shape_of():
def test_alloc_storage():
- metatable = {"VirtualDevice": [HOST, GPU]}
+ shape = np.array([3, 2])
+ metatable = {
+ "VirtualDevice": [HOST, GPU],
+ "relay.Constant": [relay.const(shape, dtype="int64")],
+ }
def input():
return tvm.relay.parse(
"""
#[version = "0.0.5"]
def @main(%size: int64, %alignment: int64) {
- memory.alloc_storage(%size, %alignment,
virtual_device=meta[VirtualDevice][1])
+ memory.alloc_storage(%size, meta[relay.Constant][0], %alignment,
virtual_device=meta[VirtualDevice][1])
}
""",
"from_string",
@@ -782,7 +786,8 @@ def test_alloc_storage():
#[version = "0.0.5"]
def @main(%size {virtual_device=meta[VirtualDevice][0]}: int64,
%alignment {virtual_device=meta[VirtualDevice][0]}: int64,
virtual_device=meta[VirtualDevice][1]) {
- memory.alloc_storage(%size, %alignment,
virtual_device=meta[VirtualDevice][1])
+ %0 = on_device(meta[relay.Constant][0],
virtual_device=meta[VirtualDevice][0], constrain_result=True);
+ memory.alloc_storage(%size, %0, %alignment,
virtual_device=meta[VirtualDevice][1])
}
""",
"from_string",