[GitHub] [tvm] comaniac commented on a change in pull request #7313: [AutoSchedule] Sparse dense tuning support with custom sketch rule

GitBox Thu, 04 Mar 2021 10:06:48 -0800


comaniac commented on a change in pull request #7313:
URL: https://github.com/apache/tvm/pull/7313#discussion_r587680749




##########
File path: python/tvm/auto_scheduler/measure.py
##########
@@ -719,6 +720,45 @@ def local_builder_build(inputs, timeout, n_parallel, 
build_func="default", verbo
     return results
 
 
+def _prepare_input_map(args):
+    """This function deals with special task inputs.

Review comment:
       Better to say more about what is special inputs.

##########
File path: python/tvm/auto_scheduler/measure.py
##########
@@ -719,6 +720,45 @@ def local_builder_build(inputs, timeout, n_parallel, 
build_func="default", verbo
     return results
 
 
+def _prepare_input_map(args):
+    """This function deals with special task inputs.
+
+    Parameters
+    ----------
+    args : List[Tensor]
+        Input/output Tensor of a TVM subgraph.
+
+    Returns
+    -------
+    A Dict[Tensor, str] that maps the input Tensor to a buffer name.
+
+    Note
+    ----

Review comment:
       ```suggestion
       Notes
       -----
   ```

##########
File path: include/tvm/auto_scheduler/search_task.h
##########
@@ -120,6 +121,8 @@ class SearchTaskNode : public Object {
   HardwareParams hardware_params;
   /*! \brief The layout rewrite option used for measuring programs. */
   LayoutRewriteOption layout_rewrite_option;
+  /*! \brief Names of some user defined input data used in program measuring. 
*/
+  Array<String> task_inputs;

Review comment:
       Per functionality, `task_input_names` would be better.

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}
+    """
+    np_data = buffer_data.asnumpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name) and file.count("."):
+            meta_info = file.split(".")[-1].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.
+
+    save_to_file : bool = False
+        Whether record this buffer to a local file. This can be reused to 
continue the last tuning
+        process.
+    """

Review comment:
       Returns?

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}
+    """
+    np_data = buffer_data.asnumpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name) and file.count("."):
+            meta_info = file.split(".")[-1].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.
+
+    save_to_file : bool = False
+        Whether record this buffer to a local file. This can be reused to 
continue the last tuning
+        process.
+    """
+    global TASK_INPUT_BUFFER_TABLE
+
+    if workload_key not in TASK_INPUT_BUFFER_TABLE:
+        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
+    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
+
+    if not overwrite:
+        if input_name not in input_table.keys():
+            # Try to load buffer data from local file
+            tensor_from_file = _try_load_buffer_from_file(input_name)
+            if tensor_from_file:
+                input_table[input_name] = tensor_from_file
+
+        if input_name in input_table.keys():
+            logger.warning(
+                "Tensor %s exists in TASK_INPUT_BUFFER_TABLE, %s",
+                input_name,
+                "set overwrite to True or this Tensor will not be registered",
+            )
+            return input_table[input_name]
+
+    input_table[input_name] = input_data
+    if save_to_file:
+        _save_buffer_to_file(input_name, input_data)
+    return input_data
+
+
+@tvm._ffi.register_func("auto_scheduler.search_task.get_task_input_buffer")
+def get_task_input_buffer(workload_key, input_name):
+    """Get special buffer for measurement.
+
+    The buffers are registered by `register_task_input_buffer`.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    Returns
+    -------
+    The registered input buffer.

Review comment:
       type?

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -185,6 +335,16 @@ class SearchTask(Object):
         The NO_REWRITE and INSERT_TRANSFORM_STAGE are expected to be used when 
tuning a standalone
         op, and the REWRITE_FOR_PRE_TRANSFORMED is expected to be used when 
tuning ops inside a
         network.
+    task_inputs : Union[Dict[str, tvm.nd.NDArray], List[str]]
+        A dict maps the input names to input tensors or a list of input names.
+        Some special Tensor used as inputs in program measuring. Usually we do 
not need to care
+        about it, but for special workloads like Sparse computation the Sparse 
Tensor input are
+        meaningful that we cannot use random input directly.
+    task_inputs_overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.
+    task_inputs_save_to_file : bool = False
+        Whether record this buffer to a local file. This can be reused to 
continue the last
+        tuning process.

Review comment:
       ditto

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.

Review comment:
       ```suggestion
   # The map stores special registered buffer for measurement.
   # This can be used for sparse workloads when we cannot use random tensors 
for measurment.
   ```

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}

Review comment:
       Would that be better to have the file extension (i.e., `npy`), so it 
becomes `{buffer_name}_{buffer_shape}_{buffer_data_type}.npy`.

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}
+    """
+    np_data = buffer_data.asnumpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name) and file.count("."):
+            meta_info = file.split(".")[-1].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.
+
+    save_to_file : bool = False
+        Whether record this buffer to a local file. This can be reused to 
continue the last tuning
+        process.

Review comment:
       ```suggestion
           Whether to save the data to a local file as well. This can be reused 
to resume the last tuning
           process.
   ```

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}
+    """
+    np_data = buffer_data.asnumpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name) and file.count("."):
+            meta_info = file.split(".")[-1].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.
+
+    save_to_file : bool = False
+        Whether record this buffer to a local file. This can be reused to 
continue the last tuning
+        process.
+    """
+    global TASK_INPUT_BUFFER_TABLE
+
+    if workload_key not in TASK_INPUT_BUFFER_TABLE:
+        TASK_INPUT_BUFFER_TABLE[workload_key] = {}
+    input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
+
+    if not overwrite:
+        if input_name not in input_table.keys():
+            # Try to load buffer data from local file
+            tensor_from_file = _try_load_buffer_from_file(input_name)
+            if tensor_from_file:
+                input_table[input_name] = tensor_from_file
+
+        if input_name in input_table.keys():

Review comment:
       ```suggestion
           else:
   ```
   btw should we have a message saying what buffer is loaded? otherwise if 
users suppose a buffer has been loaded but actually not (like the user 
accidently removed the file), then the tuning results may be useless.

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -185,6 +335,16 @@ class SearchTask(Object):
         The NO_REWRITE and INSERT_TRANSFORM_STAGE are expected to be used when 
tuning a standalone
         op, and the REWRITE_FOR_PRE_TRANSFORMED is expected to be used when 
tuning ops inside a
         network.
+    task_inputs : Union[Dict[str, tvm.nd.NDArray], List[str]]
+        A dict maps the input names to input tensors or a list of input names.
+        Some special Tensor used as inputs in program measuring. Usually we do 
not need to care
+        about it, but for special workloads like Sparse computation the Sparse 
Tensor input are
+        meaningful that we cannot use random input directly.
+    task_inputs_overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.

Review comment:
       ```suggestion
           Whether to overwrite the data if a name has already in the global 
table.
   ```

##########
File path: python/tvm/auto_scheduler/search_task.py
##########
@@ -157,6 +164,149 @@ def __init__(
         )
 
 
+# The map stores special registered buffer for measurement
+#  This can be used for sparse workloads when we cannot use random tensors for 
measurment.
+# {
+#     "workload_key_0": {
+#         "task_input_0": Tensor(...),
+#         "task_input_1": Tensor(...)
+#     },
+#     "workload_key_1": {
+#         "task_input_2": Tensor(...),
+#         "task_input_3": Tensor(...)
+#     },
+#     ...
+# }
+TASK_INPUT_BUFFER_TABLE = {}
+
+
+def _save_buffer_to_file(buffer_name, buffer_data):
+    """Save the current Tensor buffer to a numpy file.
+
+    File name will be: {buffer_name}.{buffer_shape}_{buffer_data_type}
+    """
+    np_data = buffer_data.asnumpy()
+
+    buffer_name += "."
+    for i in np_data.shape:
+        buffer_name += "%d_" % (i)
+    buffer_name += "%s" % (np_data.dtype)
+
+    np_data.tofile(buffer_name, " ")
+
+
+def _try_load_buffer_from_file(buffer_name):
+    """Try to load buffer from a numpy file, if not found, return None.
+
+    File name has a same format as `_save_buffer_to_file`.
+    """
+    filelist = os.listdir()
+
+    for file in filelist:
+        if file.startswith(buffer_name) and file.count("."):
+            meta_info = file.split(".")[-1].split("_")
+            shape = [int(i) for i in meta_info[:-1]]
+            dtype = meta_info[-1]
+            buffer_data = np.fromfile(file, dtype=dtype, sep=" ")
+            buffer_data = buffer_data.reshape(shape)
+            return ndarray.array(buffer_data)
+
+    return None
+
+
+def register_task_input_buffer(
+    workload_key,
+    input_name,
+    input_data,
+    overwrite=False,
+    save_to_file=False,
+):
+    """Register special buffer for measurement.
+
+    Parameters
+    ----------
+    workload_key : str
+        The workload key of the SearchTask.
+
+    input_name : str
+        The name of input buffer.
+
+    input_data : tvm.nd.NDArray
+        The input Tensor data.
+
+    overwrite : bool = False
+        Whether overwrite the data if a name has already in the global table.

Review comment:
       ```suggestion
           Whether to overwrite the data if a name has already registered.
   ```

##########
File path: python/tvm/auto_scheduler/measure.py
##########
@@ -758,11 +802,25 @@ def _timed_eval_func(
 
     if error_no == 0:
         try:
-            args = [ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for 
x in build_res.args]
             random_fill = 
tvm.get_global_func("tvm.contrib.random.random_fill", True)
             assert random_fill, "Please make sure USE_RANDOM is ON in the 
config.cmake"
-            for arg in args:
-                random_fill(arg)
+
+            tensor_input_map = _prepare_input_map(build_res.args) if 
task_inputs else {}
+            args = []
+            for arg in build_res.args:
+                if arg in tensor_input_map:
+                    tensor_name = tensor_input_map[arg]
+                    if tensor_name in task_inputs:
+                        
args.append(get_task_input_buffer(inp.task.workload_key, tensor_name))
+                    else:
+                        raise ValueError(
+                            "%s not found in task_inputs, " % (tensor_name)
+                            + "should provide with SearchTask.AddTaskInput()"

Review comment:
       Didn't find `AddTaskInput`?

##########
File path: python/tvm/auto_scheduler/measure.py
##########
@@ -943,18 +1005,30 @@ def _timed_rpc_run(
 
     if error_no == 0:
         try:
-            args = [ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for 
x in build_res.args]
-            try:
-                random_fill = 
remote.get_function("tvm.contrib.random.random_fill")
-            except AttributeError:
-                raise AttributeError(
-                    "Please make sure USE_RANDOM is ON in the config.cmake " 
"on the remote devices"
-                )
-            for arg in args:
-                random_fill(arg)
+            random_fill = remote.get_function("tvm.contrib.random.random_fill")
+            assert (
+                random_fill
+            ), "Please make sure USE_RANDOM is ON in the config.cmake on the 
remote devices"
+
+            tensor_input_map = _prepare_input_map(build_res.args) if 
task_inputs else {}
+            args = []
+            for arg in build_res.args:
+                if arg in tensor_input_map:
+                    tensor_name = tensor_input_map[arg]
+                    if tensor_name in task_inputs:
+                        
args.append(get_task_input_buffer(inp.task.workload_key, tensor_name))
+                    else:
+                        raise ValueError(
+                            "%s not found in task_inputs, " % (tensor_name)
+                            + "should provide with SearchTask.AddTaskInput()"

Review comment:
       ditto

##########
File path: python/tvm/auto_scheduler/measure.py
##########
@@ -719,6 +720,45 @@ def local_builder_build(inputs, timeout, n_parallel, 
build_func="default", verbo
     return results
 
 
+def _prepare_input_map(args):
+    """This function deals with special task inputs.
+
+    Parameters
+    ----------
+    args : List[Tensor]
+        Input/output Tensor of a TVM subgraph.
+
+    Returns
+    -------
+    A Dict[Tensor, str] that maps the input Tensor to a buffer name.

Review comment:
       ```suggestion
       Dict[Tensor, str] : 
           Map from the input Tensor to its buffer name.
   ```

##########
File path: python/tvm/topi/nn/sparse.py
##########
@@ -356,3 +359,110 @@ def sparse_dense_alter_layout(_attrs, _inputs, _tinfos, 
_out_type):
     Unlike other TOPI functions, this function operates on both graph level 
and operator level.
     """
     return None
+
+
+def try_get_sparse_input(args):
+    """Analyze the input data from the given args.
+
+    Parameters
+    ----------
+    args : List[Tensor]
+        Input/output Tensor of a TVM subgraph.
+
+    Returns
+    -------
+    A Dict[Tensor, str] that maps the input Tensor to a buffer name.
+
+    Note
+    ----
+    The buffer name is specially designed, and these buffer should be provided 
in
+    `SearchTask(..., task_inputs={...})`.

Review comment:
       ditto




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] comaniac commented on a change in pull request #7313: [AutoSchedule] Sparse dense tuning support with custom sketch rule

Reply via email to