Lunderberg commented on PR #13530:
URL: https://github.com/apache/tvm/pull/13530#issuecomment-1334018512

   The performance testing script and the results on the LLVM, CUDA, and Vulkan 
backends are included below.
   
   
![image](https://user-images.githubusercontent.com/3888575/205102288-57508cc6-7b84-44ef-91a5-9eda12cd52f9.png)
   
   <summary>
   Click to expand test script
   
   <details>
   
   First, apply the following diff to patch float-indexing back into 
`topi.image.resize`.
   
   ```
   diff --git a/python/tvm/topi/image/resize.py 
b/python/tvm/topi/image/resize.py
   index 0383dd7ae..25675b614 100644
   --- a/python/tvm/topi/image/resize.py
   +++ b/python/tvm/topi/image/resize.py
   @@ -661,6 +661,13 @@ def _resize_2d(
                roi[2],
            )
    
   +    import os
   +
   +    use_float_indexing = "float" in os.environ.get("RESIZE_INDEXING", "")
   +    if use_float_indexing:
   +        in_x = in_x.astype("float32")
   +        in_y = in_y.astype("float32")
   +
        if method == "nearest_neighbor":
            if rounding_method == "":
                if coordinate_transformation_mode == "align_corners":
   ```
   
   Then, run the following script.
   
   ```python
   #!/usr/bin/env python3
   
   import os
   import sys
   import time
   
   import numpy as np
   import pytest
   
   import tvm
   from tvm import topi, te
   
   
   pytest_plugins = [
       "pytest-benchmark",
       "tvm.testing.plugin",
   ]
   
   
   def resize2d_dx_compute(inp, dy):
       """compute definition for resize2d_dx op"""
       size = (64, 32)
       layout = "NCHW"
       method = "cubic"
       coord_trans = "half_pixel"
       rounding_method = ""
       cubic_alpha = -0.75
       cubic_exclude = 0
       out_dtype = "float32"
   
       out = topi.image.resize2d(
           inp,
           (None, None, None, None),
           size,
           layout,
           method,
           coord_trans,
           rounding_method,
           bicubic_alpha=cubic_alpha,
           bicubic_exclude=cubic_exclude,
           out_dtype=out_dtype,
       )
       grads = tvm.te.gradient(out, [inp], head=dy)
       return grads
   
   
   resize_indexing = tvm.testing.parameter("resize_int_indexing", 
"resize_float_indexing")
   constrained_booleans = tvm.testing.parameter(
       by_dict={"kApplyConstraintsToBooleanBranches": "true", "": ""}
   )
   
   
   def _run_benchmark(
       benchmark, stage, resize_indexing, constrained_booleans="", target=None, 
dev=None
   ):
       os.environ["RESIZE_INDEXING"] = resize_indexing
       os.environ["REMOVE_NO_OP_CONSTRAINED_BOOLEANS"] = constrained_booleans
   
       inp_shape = (32, 3, 32, 32)
       dy_shape = (32, 3, 64, 32)
   
       inp = tvm.te.placeholder(inp_shape, name="inp")
       dy = tvm.te.placeholder(dy_shape, name="dy")
   
       if stage == "topi":
           benchmark(resize2d_dx_compute, inp, dy)
           return
       else:
           grad = resize2d_dx_compute(inp, dy)[0]
   
       # if resize_indexing == "resize_float_indexing" and constrained_booleans:
       if constrained_booleans:
           pytest.skip("Runs too slowly to effectively benchmark")
   
       target = tvm.target.Target(target)
       with target:
           if "gpu" in target.keys:
               sch = topi.cuda.injective.schedule_injective(grad)
           else:
               sch = topi.x86.injective.schedule_injective(grad)
   
       if stage == "tvm.lower":
           benchmark(tvm.lower, sch, [inp, dy, grad], simple_mode=True)
           return
       else:
           tvm.lower(sch, [inp, dy, grad], simple_mode=True)
   
       if stage == "tvm.build":
           benchmark(tvm.build, sch, [inp, dy, grad], target=target)
           return
       else:
           func = tvm.build(sch, [inp, dy, grad], target=target)
   
       inp_np = np.random.uniform(size=inp_shape).astype(inp.dtype)
       dy_np = np.random.uniform(size=dy_shape).astype(inp.dtype)
   
       inp_tvm = tvm.nd.array(inp_np, dev)
       dy_tvm = tvm.nd.array(dy_np, dev)
       grad_tvm = tvm.nd.empty(grad.shape, grad.dtype, dev)
   
       def execute_read():
           func(inp_tvm, dy_tvm, grad_tvm)
           dev.sync()
   
       if stage == "execute":
           iterations = 10 if "llvm" == target.kind.name else 100
           benchmark.pedantic(execute_read, iterations=iterations, 
warmup_rounds=5, rounds=10)
           return
   
   
   def test_benchmark_topi(benchmark, resize_indexing):
       _run_benchmark(benchmark, "topi", resize_indexing)
   
   
   def test_benchmark_lowering(benchmark, resize_indexing, 
constrained_booleans, target):
       _run_benchmark(benchmark, "tvm.lower", resize_indexing, 
constrained_booleans, target)
   
   
   def test_benchmark_build(benchmark, resize_indexing, constrained_booleans, 
target):
       _run_benchmark(benchmark, "tvm.build", resize_indexing, 
constrained_booleans, target)
   
   
   def test_benchmark_execute(benchmark, resize_indexing, constrained_booleans, 
target, dev):
       _run_benchmark(benchmark, "execute", resize_indexing, 
constrained_booleans, target, dev)
   
   
   if __name__ == "__main__":
       sys.exit(pytest.main(sys.argv))
   ```
   </details>
   </summary>
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to