tqchen commented on a change in pull request #7488:
URL: https://github.com/apache/tvm/pull/7488#discussion_r581029417



##########
File path: include/tvm/runtime/c_runtime_api.h
##########
@@ -569,22 +583,12 @@ TVM_DLL int TVMDeviceFreeDataSpace(TVMContext ctx, void* 
ptr);
 
 /*!
  * \brief Copy data from one place to another.
- * \param from The source array.
- * \param from_offset The byte offeset in the from.
- * \param to The target array.
- * \param to_offset The byte offset in the to.
- * \param num_bytes The size of the memory in bytes
- * \param ctx_from The source context
- * \param ctx_to The target context
- * \param type_hint The type of elements, only neded by certain backends.
- *                  can be useful for cross device endian converison.
+ * \param from The source tensor.
+ * \param to The target tensor.
  * \param stream Optional stream object.

Review comment:
       Add
   ```
   \note This API is designed to support special memory with shape dependent 
layout.
            We pass in DLTensor* with shape information to support these cases.
   ```

##########
File path: include/tvm/runtime/device_api.h
##########
@@ -99,19 +110,10 @@ class TVM_DLL DeviceAPI {
   /*!
    * \brief copy data from one place to another
    * \param from The source array.
-   * \param from_offset The byte offeset in the from.
    * \param to The target array.
-   * \param to_offset The byte offset in the to.
-   * \param num_bytes The size of the memory in bytes
-   * \param ctx_from The source context
-   * \param ctx_to The target context
-   * \param type_hint The type of elements, only neded by certain backends.
-   *                  can be useful for cross device endian converison.
    * \param stream Optional stream object.

Review comment:
       Add similar note about special memory layout handling

##########
File path: src/runtime/ndarray.cc
##########
@@ -279,6 +279,17 @@ int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int 
dtype_code, int dtype_
   API_END();
 }
 
+TVM_REGISTER_GLOBAL("runtime.TVMArrayAllocWithScope").set_body([](TVMArgs 
args, TVMRetValue* ret) {
+  int64_t* shape_ptr = static_cast<int64_t*>(static_cast<void*>(args[0]));

Review comment:
       avoid pass in raw pointers, consider encoding shape into positional 
arguments for now

##########
File path: src/runtime/ndarray.cc
##########
@@ -58,36 +59,39 @@ inline void VerifyDataType(DLDataType dtype) {
   ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
 }
 
-inline size_t GetDataAlignment(const DLTensor& arr) {
-  size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
-  if (align < kAllocAlignment) return kAllocAlignment;
-  return align;
-}
-
 void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(data, 0, handle->data, 
static_cast<size_t>(handle->byte_offset), nbytes,
-                       cpu_ctx, handle->ctx, handle->dtype, nullptr);
+
+  DLTensor from;
+  from.data = const_cast<void*>(data);
+  from.ctx = DLContext{kDLCPU, 0};
+  from.ndim = handle->ndim;
+  from.dtype = handle->dtype;
+  from.shape = handle->shape;
+  from.strides = handle->strides;
+  from.byte_offset = handle->byte_offset;

Review comment:
       `from.byte_offset = 0`, NOTE (the data view in from starts from 0)

##########
File path: src/runtime/c_runtime_api.cc
##########
@@ -144,6 +144,44 @@ void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t 
size, DLDataType type_hin
   return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
 }
 
+static size_t GetDataAlignment(const DLDataType dtype) {
+  size_t align = (dtype.bits / 8) * dtype.lanes;
+  if (align < kAllocAlignment) return kAllocAlignment;
+  return align;
+}
+
+void* DeviceAPI::AllocDataSpace(TVMContext ctx, std::vector<int64_t> shape, 
DLDataType dtype,
+                                Optional<String> mem_scope) {
+  if (!mem_scope.defined() || mem_scope.value() == "global") {
+    DLTensor temp;
+    temp.data = nullptr;
+    temp.ctx = ctx;
+    temp.ndim = static_cast<int>(shape.size());
+    temp.dtype = dtype;
+    temp.shape = shape.data();
+    temp.strides = nullptr;
+    temp.byte_offset = 0;
+    size_t size = GetDataSize(temp);
+    size_t alignment = GetDataAlignment(temp.dtype);
+    return AllocDataSpace(ctx, size, alignment, dtype);

Review comment:
       add comments, by default, we can always redirect to the flat memory 
allocations

##########
File path: include/tvm/runtime/device_api.h
##########
@@ -90,6 +91,16 @@ class TVM_DLL DeviceAPI {
    */
   virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
                                DLDataType type_hint) = 0;
+  /*!
+   * \brief Allocate a data space on device.
+   * \param ctx The device context to perform operation.
+   * \param shape The shape of allocated tensor.
+   * \param dtype The type of elements.
+   * \param mem_scope The memory scope of allocated tensor.
+   * \return The allocated device pointer.
+   */
+  virtual void* AllocDataSpace(TVMContext ctx, std::vector<int64_t> shape, 
DLDataType dtype,

Review comment:
       let us consider doing ```int ndim, const int64_t* shape,``` instead, to 
be more compatible with the DLTensor*'s field without constructing a vector.

##########
File path: src/runtime/ndarray.cc
##########
@@ -58,36 +59,39 @@ inline void VerifyDataType(DLDataType dtype) {
   ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
 }
 
-inline size_t GetDataAlignment(const DLTensor& arr) {
-  size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
-  if (align < kAllocAlignment) return kAllocAlignment;
-  return align;
-}
-
 void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(data, 0, handle->data, 
static_cast<size_t>(handle->byte_offset), nbytes,
-                       cpu_ctx, handle->ctx, handle->dtype, nullptr);
+
+  DLTensor from;
+  from.data = const_cast<void*>(data);
+  from.ctx = DLContext{kDLCPU, 0};
+  from.ndim = handle->ndim;
+  from.dtype = handle->dtype;
+  from.shape = handle->shape;
+  from.strides = handle->strides;
+  from.byte_offset = handle->byte_offset;
+  DeviceAPI::Get(handle->ctx)->CopyDataFromTo(&from, handle, nullptr);
   // Synchronize in case data become unavailable later.
   DeviceAPI::Get(handle->ctx)->StreamSync(handle->ctx, nullptr);
 }
 
 void ArrayCopyToBytes(const DLTensor* handle, void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyToBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(handle->data, static_cast<size_t>(handle->byte_offset), 
data, 0, nbytes,
-                       handle->ctx, cpu_ctx, handle->dtype, nullptr);
+
+  DLTensor to;
+  to.data = const_cast<void*>(data);
+  to.ctx = DLContext{kDLCPU, 0};
+  to.ndim = handle->ndim;
+  to.dtype = handle->dtype;
+  to.shape = handle->shape;
+  to.strides = handle->strides;

Review comment:
       strides = nullptr

##########
File path: include/tvm/runtime/device_api.h
##########
@@ -90,6 +91,16 @@ class TVM_DLL DeviceAPI {
    */
   virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
                                DLDataType type_hint) = 0;
+  /*!
+   * \brief Allocate a data space on device.

Review comment:
       Allocate data space with memory scope support

##########
File path: src/runtime/ndarray.cc
##########
@@ -58,36 +59,39 @@ inline void VerifyDataType(DLDataType dtype) {
   ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
 }
 
-inline size_t GetDataAlignment(const DLTensor& arr) {
-  size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
-  if (align < kAllocAlignment) return kAllocAlignment;
-  return align;
-}
-
 void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(data, 0, handle->data, 
static_cast<size_t>(handle->byte_offset), nbytes,
-                       cpu_ctx, handle->ctx, handle->dtype, nullptr);
+
+  DLTensor from;
+  from.data = const_cast<void*>(data);
+  from.ctx = DLContext{kDLCPU, 0};
+  from.ndim = handle->ndim;
+  from.dtype = handle->dtype;
+  from.shape = handle->shape;
+  from.strides = handle->strides;
+  from.byte_offset = handle->byte_offset;
+  DeviceAPI::Get(handle->ctx)->CopyDataFromTo(&from, handle, nullptr);
   // Synchronize in case data become unavailable later.
   DeviceAPI::Get(handle->ctx)->StreamSync(handle->ctx, nullptr);
 }
 
 void ArrayCopyToBytes(const DLTensor* handle, void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyToBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(handle->data, static_cast<size_t>(handle->byte_offset), 
data, 0, nbytes,
-                       handle->ctx, cpu_ctx, handle->dtype, nullptr);
+
+  DLTensor to;
+  to.data = const_cast<void*>(data);
+  to.ctx = DLContext{kDLCPU, 0};
+  to.ndim = handle->ndim;
+  to.dtype = handle->dtype;
+  to.shape = handle->shape;
+  to.strides = handle->strides;
+  to.byte_offset = handle->byte_offset;

Review comment:
       to.byte_offset = 0;

##########
File path: src/runtime/ndarray.cc
##########
@@ -58,36 +59,39 @@ inline void VerifyDataType(DLDataType dtype) {
   ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
 }
 
-inline size_t GetDataAlignment(const DLTensor& arr) {
-  size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
-  if (align < kAllocAlignment) return kAllocAlignment;
-  return align;
-}
-
 void ArrayCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
-  TVMContext cpu_ctx;
-  cpu_ctx.device_type = kDLCPU;
-  cpu_ctx.device_id = 0;
   size_t arr_size = GetDataSize(*handle);
   ICHECK_EQ(arr_size, nbytes) << "ArrayCopyFromBytes: size mismatch";
   ICHECK(IsContiguous(*handle)) << "ArrayCopyFromBytes only support contiguous 
array for now";
-  DeviceAPI::Get(handle->ctx)
-      ->CopyDataFromTo(data, 0, handle->data, 
static_cast<size_t>(handle->byte_offset), nbytes,
-                       cpu_ctx, handle->ctx, handle->dtype, nullptr);
+
+  DLTensor from;
+  from.data = const_cast<void*>(data);
+  from.ctx = DLContext{kDLCPU, 0};
+  from.ndim = handle->ndim;
+  from.dtype = handle->dtype;
+  from.shape = handle->shape;
+  from.strides = handle->strides;

Review comment:
       from.strides =  nullptr

##########
File path: include/tvm/runtime/c_runtime_api.h
##########
@@ -559,6 +559,20 @@ TVM_DLL int TVMByteArrayFree(TVMByteArray* arr);
 TVM_DLL int TVMDeviceAllocDataSpace(DLContext ctx, size_t nbytes, size_t 
alignment,
                                     DLDataType type_hint, void** out_data);
 
+/*!
+ * \brief Allocate a data space on device.
+ * \param ctx The device context to perform operation.
+ * \param ndim The number of dimension of the tensor.
+ * \param shape The shape of the tensor.
+ * \param dtype The type of elements.
+ * \param mem_scope The memory scope of the tensor.

Review comment:
       can be nullptr, which indicate the default global DRAM

##########
File path: src/runtime/c_runtime_api.cc
##########
@@ -144,6 +144,44 @@ void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t 
size, DLDataType type_hin
   return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
 }
 
+static size_t GetDataAlignment(const DLDataType dtype) {
+  size_t align = (dtype.bits / 8) * dtype.lanes;
+  if (align < kAllocAlignment) return kAllocAlignment;
+  return align;
+}
+
+void* DeviceAPI::AllocDataSpace(TVMContext ctx, std::vector<int64_t> shape, 
DLDataType dtype,
+                                Optional<String> mem_scope) {
+  if (!mem_scope.defined() || mem_scope.value() == "global") {
+    DLTensor temp;
+    temp.data = nullptr;
+    temp.ctx = ctx;
+    temp.ndim = static_cast<int>(shape.size());
+    temp.dtype = dtype;
+    temp.shape = shape.data();
+    temp.strides = nullptr;
+    temp.byte_offset = 0;
+    size_t size = GetDataSize(temp);
+    size_t alignment = GetDataAlignment(temp.dtype);
+    return AllocDataSpace(ctx, size, alignment, dtype);
+  }
+  LOG(FATAL) << "Device does not support allocate data space with "
+             << "specified memory scope: " << mem_scope.value();
+  return nullptr;
+}
+
+void DeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle 
stream) {
+  size_t nbytes = GetDataSize(*from);
+  CopyDataFromTo(from->data, from->byte_offset, to->data, to->byte_offset, 
nbytes, from->ctx,

Review comment:
       add comments, by default, we can always redirect to the flat memory copy 
operation.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to