tkonolige commented on a change in pull request #7334:
URL: https://github.com/apache/tvm/pull/7334#discussion_r564640703
##########
File path: python/tvm/topi/cuda/scan.py
##########
@@ -251,99 +269,103 @@ def scan_thrust(data, output_dtype, exclusive=True,
return_reduction=False):
Whether or not do exclusive or inclusive scan.
return_reduction: bool, optional
- Whether or not return a 1-D tensor storing the reduction of each row.
+ Whether or not return a (N-1)-D tensor storing the reduction of each
scan axis.
Reductions are computed as part of the upsweep pass, so there is no
extra cost.
- If False, reductions are ignored.
+ If False, reductions are ignored. It must be False when exclusive is
False.
+
+ binop: function, optional
+ A binary associative op to use for scan. Since we need to lookup the
corresponding
+ thrust function, arbitrariy callables are not supported. Currently only
+ tvm.tir.generic.add can be passed in.
Returns
-------
output : tvm.te.Tensor
- 1-D tensor that is the exclusive scan of the input, or
- 2-D tensor storing the exclusive scan of each row.
+ A N-D tensor of the same rank N and shape as the input data.
reduction : tvm.te.Tensor, optional
- 1-D tensor storing the reduction of each row.
+ (N-1)-D tensor storing the reduction of each scan axis.
Returned if return_reduction is True.
"""
data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf",
data_alignment=8)
output_buf = tvm.tir.decl_buffer(data.shape, output_dtype, "output_buf",
data_alignment=8)
+
output = te.extern(
[data.shape],
[data],
lambda ins, outs: tvm.tir.call_packed(
- "tvm.contrib.thrust.sum_scan", ins[0], outs[0], exclusive
+ _get_thrust_func_name(binop), ins[0], outs[0], exclusive
),
dtype=[output_dtype],
in_buffers=[data_buf],
out_buffers=[output_buf],
- name="exclusive_sum_scan2d",
- tag="exclusive_sum_scan2d_gpu",
+ name="exclusive_scan_thrust",
+ tag="exclusive_scan_thrust_gpu",
)
if return_reduction:
assert exclusive, "return_reduction should be False for inclusive scan"
- reduction = get_reduction_from_exclusive_scan(data, output)
+ reduction = get_reduction_from_exclusive_scan(data, output, binop)
return output, reduction
return output
-def exclusive_scan(data, axis=-1, return_reduction=False, output_dtype=None):
- """Do exclusive scan on 1D input or along rows of 2D input.
+def exclusive_scan(
+ data, axis=-1, return_reduction=False, output_dtype=None,
binop=tvm.tir.generic.add
+):
+ """Do exclusive scan on 1D or multidimensional input.
Parameters
----------
data : tvm.te.Tensor
- Input data. 1-D tensor with shape [scan_axis_size], or
- 2-D tensor with shape [batch_size, scan_axis_size].
+ Input data of any shape.
axis: int, optional
- The axis to do scan on. For now, only the inner most axis is supported.
+ The axis to do scan on. By default, scan is done on the innermost axis.
return_reduction: bool, optional
- Whether or not return a 1-D tensor storing the reduction of each row.
+ Whether or not return a tensor storing the reduction over each scan
axis.
+ If the input rank is N, this tensor is of rank N - 1.
Reductions are computed as part of the upsweep pass, so there is no
extra cost.
If False, reductions are ignored.
output_dtype: string, optional
The dtype of the output scan tensor. If not provided, the dtype of the
input is used.
+ binop: function, optional
Review comment:
I think you should say that this defaults to add.
##########
File path: python/tvm/relay/op/transform.py
##########
@@ -1320,3 +1320,50 @@ def adv_index(inputs):
Output tensor.
"""
return _make.adv_index(Tuple(inputs))
+
+
+def cumsum(data, axis=None, dtype=None):
+ """Numpy style cumsum op. Return the cumulative inclusive sum of the
elements along
+ a given axis.
+
+ Parameters
+ ----------
+ data : relay.Expr
+ The input data to the operator.
+
+ axis : int, optional
+ Axis along which the cumulative sum is computed. The default (None) is
to compute
+ the cumsum over the flattened array.
+
+ dtype : string, optional
+ Type of the returned array and of the accumulator in which the
elements are summed.
+ If dtype is not specified, it defaults to the dtype of data.
+
+ Returns
+ -------
+ result : relay.Expr
+ The result has the same size as data, and the same shape as data if
axis is not None.
+ If axis is None, the result is a 1-d array.
+
+ Examples:
Review comment:
I think this formatting is necessary for rst?
```suggestion
Examples
---------
```
And if you want a code block, you need a `.. code-block:: python`. See
`python/tvm/tir/ir_builder.py` for an example.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]