device_annotation.cc

GitBox Mon, 27 Sep 2021 16:00:04 -0700


mbs-octoml commented on a change in pull request #9038:
URL: https://github.com/apache/tvm/pull/9038#discussion_r717102835




##########
File path: tests/python/relay/test_pass_plan_devices.py
##########
@@ -0,0 +1,1405 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License
+
+
+"""Unit tests for the PlanDevices pass. We check:
+    - The pass alone given the expected AST, though we need to manually run 
InferTypes.
+    - The pass is idempotent."""
+
+# TODO(mbs): All the input/expected programs should be directly quoted using 
@script
+# TODO(mbs): Not testing Match and Constructor since not supported by Python 
bindings?
+# TODO(mbs): Add back reference implementation tests once VM is ready.
+
+import tvm
+from tvm import relay
+import tvm.testing
+import numpy as np
+
+N = 5
+M = 7
+CPU = tvm.device("cpu")  # device_type=1
+GPU = tvm.device("cuda")  # device_type=2
+DEFAULT = GPU
+
+
+def rewrite_and_assert(in_mod, expected_mod):
+    """Manually run the pass and assert it's structurally equals to the 
expected."""
+    actual_mod = relay.transform.InferType()(in_mod)
+    actual_mod = relay.transform.PlanDevices(DEFAULT)(actual_mod)
+    actual_mod = relay.transform.InferType()(actual_mod)
+    expected_mod = relay.transform.InferType()(expected_mod)
+    if not tvm.ir.structural_equal(actual_mod, expected_mod):
+        # Print everything in full so we can see what's going on when things 
fail.
+        print("Input module:")
+        print(in_mod)
+        print("Expected module:")
+        print(expected_mod)
+        print("Actual module:")
+        print(actual_mod)
+        # Assert again so as to see the actual disagreeing sub-expressions.
+        tvm.ir.assert_structural_equal(actual_mod, expected_mod)
+
+
+def rand(shape):
+    return np.random.rand(*shape).astype("float32")
+
+
+def rands(shape, n):
+    return [rand(shape) for i in range(n)]
+
+
+def exercise(in_mod: tvm.IRModule, expected_mod: tvm.IRModule, reference_func, 
args):
+    """Test in_mod against expected_mod and reference_func using args."""
+    # Correctness
+    rewrite_and_assert(in_mod, expected_mod)
+    # Idempotence
+    rewrite_and_assert(expected_mod, expected_mod)
+    # TODO(mbs): Add back compiling and comparing to reference implementation 
once VM is ready.
+
+
+#
+# Annotation shorthands
+#
+
+
+def on_cpu(expr: relay.Expr):
+    return relay.annotation.on_device(expr, CPU)
+
+
+def on_gpu(expr: relay.Expr):
+    return relay.annotation.on_device(expr, GPU)
+
+
+def cpu_to_gpu(expr: relay.Expr):
+    return relay.op.device_copy(expr, CPU, GPU)
+
+
+def gpu_to_cpu(expr: relay.Expr):
+    return relay.op.device_copy(expr, GPU, CPU)
+
+
+def fixed_cpu(expr: relay.Expr):
+    return relay.annotation.on_device(expr, CPU, True)
+
+
+def fixed_gpu(expr: relay.Expr):
+    return relay.annotation.on_device(expr, GPU, True)
+
+
+def test_plain():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+
+    # def @main(a, b, c, d) { subtract(add(a, b), add(c, d)) }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([a, b, c, d], relay.subtract(relay.add(a, b), 
relay.add(c, d)))
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[2,2,2,2], 
result_device_type=2}) {
+    #   subtract(add(a, b), add(c, d))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([a, b, c, d], relay.subtract(relay.add(a, b), 
relay.add(c, d))),
+                [GPU, GPU, GPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_left_add_on_cpu():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+
+    # def @main(a, b, c, d) { subtract(on_cpu(add(a, b)), add(c, d)) }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([a, b, c, d], relay.subtract(on_cpu(relay.add(a, 
b)), relay.add(c, d)))
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], 
result_device_type=2}) {
+    #    subtract(cpu_to_gpu(fixed_cpu(add(a, b)), add(c, d))
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.subtract(cpu_to_gpu(fixed_cpu(relay.add(a, b))), 
relay.add(c, d)),
+                ),
+                [CPU, CPU, GPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_left_add_on_cpu_via_copy():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+
+    # def @main(a, b, c, d) { subtract(cpu_to_gpu(add(a, b)), add(c, d)) }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d], relay.subtract(cpu_to_gpu(relay.add(a, b)), 
relay.add(c, d))
+            )
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], 
result_device_type=2}) {
+    #    subtract(cpu_to_gpu(fixed_cpu(add(a, b)), add(c, d))
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.subtract(cpu_to_gpu(fixed_cpu(relay.add(a, b))), 
relay.add(c, d)),
+                ),
+                [CPU, CPU, GPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_both_adds_on_cpu():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+
+    # def @main(a, b, c, d) { subtract(on_cpu(add(a, b)), on_cpu(add(c, d))) }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d], relay.subtract(on_cpu(relay.add(a, b)), 
on_cpu(relay.add(c, d)))
+            )
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,1,1], 
result_device_type=2}) {
+    #    subtract(cpu_to_gpu(fixed_cpu(add(a, b)), cpu_to_gpu(fixed_cpu(add(c, 
d))))
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.subtract(
+                        cpu_to_gpu(fixed_cpu(relay.add(a, b))),
+                        cpu_to_gpu(fixed_cpu(relay.add(c, d))),
+                    ),
+                ),
+                [CPU, CPU, CPU, CPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_sharing():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+
+    # def @main(a, b) {
+    #   %0 = add(a, b)
+    #   subtract(on_cpu(%0), %0) }
+    def input():
+        add = relay.add(a, b)
+        return tvm.IRModule.from_expr(
+            relay.Function([a, b], relay.subtract(on_cpu(add), on_cpu(add)))
+        )
+
+    # def @main(a, b, on_device={param_device_types=[1,1], 
result_device_type=2}) {
+    #    %0 = add(a, b)
+    #    subtract(cpu_to_gpu(fixed_cpu(%0), cpu_to_gpu(fixed_cpu(%0)))
+    def expected():
+        add = relay.add(a, b)
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b], relay.subtract(cpu_to_gpu(fixed_cpu(add)), 
cpu_to_gpu(fixed_cpu(add)))
+                ),
+                [CPU, CPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b):
+        x = np.add(a, b)
+        return np.subtract(x, x)
+
+    exercise(input(), expected(), ref, rands(shape, 2))
+
+
+def test_let_on_cpu():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+    l = relay.Var("l")
+    r = relay.Var("r")
+
+    # def @main(a, b, c, d) {
+    #   let l = add(a, b);
+    #   let r = add(c, d);
+    #   subtract(on_cpu(l), r)
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d],
+                relay.Let(
+                    l, relay.add(a, b), relay.Let(r, relay.add(c, d), 
relay.subtract(on_cpu(l), r))
+                ),
+            )
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], 
result_device_type=2}) {
+    #    let l = fixed_cpu(add(a, b));
+    #    let r = add(c, d);
+    #    subtract(cpu_to_gpu(l), r)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.Let(
+                        l,
+                        fixed_cpu(relay.add(a, b)),
+                        relay.Let(r, relay.add(c, d), 
relay.subtract(cpu_to_gpu(l), r)),
+                    ),
+                ),
+                [CPU, CPU, GPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_func_param_on_cpu():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+    f = relay.Var("f")
+    x = relay.Var("x")
+    y = relay.Var("y")
+
+    # def @main(a, b, c, d) {
+    #   let f = fn(x, y) { on_cpu(add(x, y)) }   -- forces both body and 
result on CPU
+    #   subtract(f(a, b), add(c, d))
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d],
+                relay.Let(
+                    f,
+                    relay.Function([x, y], on_cpu(relay.add(x, y))),
+                    relay.subtract(relay.Call(f, [a, b]), relay.add(c, d)),
+                ),
+            )
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,1,1], 
result_device_type=1}) {
+    #   let f = fn(x, y, on_device={param_device_types[1,1], 
result_device_type=1}) {
+    #     add(x, y)
+    #   };
+    #   subtract(f(a, b), add(c, d))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.Let(
+                        f,
+                        relay.annotation.function_on_device(
+                            relay.Function([x, y], relay.add(x, y)), [CPU, 
CPU], CPU
+                        ),
+                        relay.subtract(relay.Call(f, [a, b]), relay.add(c, d)),
+                    ),
+                ),
+                [CPU, CPU, CPU, CPU],
+                CPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_func_result_on_cpu():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+    f = relay.Var("f")
+    x = relay.Var("x")
+    y = relay.Var("y")
+
+    # def @main(a, b, c, d) {
+    #   let f = fn(x, y) { add(x, y) }
+    #   subtract(on_cpu(f(a, b)), add(c, d))
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d],
+                relay.Let(
+                    f,
+                    relay.Function([x, y], relay.add(x, y)),
+                    relay.subtract(on_cpu(relay.Call(f, [a, b])), relay.add(c, 
d)),
+                ),
+            )
+        )
+
+    # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], 
result_device_type=2}) {
+    #   let f = fixed_cpu(fn(x, y, on_device={param_device_types=[1,1], 
result_device_type=1}) {
+    #     add(x, y)
+    #   });
+    #   subtract(cpu_to_gpu(fixed_cpu(f(a, b))), add(c, d))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.Let(
+                        f,
+                        fixed_cpu(
+                            relay.annotation.function_on_device(
+                                relay.Function([x, y], relay.add(x, y)), [CPU, 
CPU], CPU
+                            )
+                        ),
+                        relay.subtract(
+                            cpu_to_gpu(fixed_cpu(relay.Call(f, [a, b]))), 
relay.add(c, d)
+                        ),
+                    ),
+                ),
+                [CPU, CPU, GPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.add(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_higher_order():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    f = relay.Var("f")
+    g = relay.Var("g")
+    a = relay.Var("a")
+    h = relay.Var("h")
+    b = relay.Var("b")
+
+    # The constraint on a flows back to y via f and h
+    # def @main(x, y) {
+    #   let f = fn(g) { fn(a) { add(g(on_cpu(a)), x) } }
+    #   let h = fn(b) { relu(b) }
+    #   subtract(x, f(h)(y))
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x, y],
+                relay.Let(
+                    f,
+                    relay.Function(
+                        [g], relay.Function([a], relay.add(relay.Call(g, 
[on_cpu(a)]), x))
+                    ),
+                    relay.Let(
+                        h,
+                        relay.Function([b], relay.negative(b)),
+                        relay.subtract(x, relay.Call(relay.Call(f, [h]), [y])),
+                    ),
+                ),
+            )
+        )
+
+    # def @main(x, y, on_device={param_device_types=[GPU, CPU], 
result_device_type=GPU}) {
+    #   let f = fn(g, on_device={param_device_types=[GPU], 
result_device_type=GPU}) {
+    #     fn(a, on_device={param_device_types=[CPU], result_device_type=GPU}) {
+    #       add(g(cpu_to_gpu(a)), x)
+    #     }
+    #   }
+    #   let h = fn(b, on_device={param_device_types=[GPU], 
result_device_type=GPU}) { negative(b) }
+    #   subtract(x, f(h)(y))
+    # }
+    def expected():
+        # Yeah, this is illegible.
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y],
+                    relay.Let(
+                        f,
+                        relay.annotation.function_on_device(
+                            relay.Function(
+                                [g],
+                                relay.annotation.function_on_device(
+                                    relay.Function(
+                                        [a], relay.add(relay.Call(g, 
[cpu_to_gpu(a)]), x)
+                                    ),
+                                    [CPU],
+                                    GPU,
+                                ),
+                            ),
+                            [GPU],
+                            GPU,
+                        ),
+                        relay.Let(
+                            h,
+                            relay.annotation.function_on_device(
+                                relay.Function([b], relay.negative(b)), [GPU], 
GPU
+                            ),
+                            relay.subtract(x, relay.Call(relay.Call(f, [h]), 
[y])),
+                        ),
+                    ),
+                ),
+                [GPU, CPU],
+                GPU,
+            )
+        )
+
+    def ref(x, y):
+        def f(g):
+            return lambda a: np.add(g(a), x)
+
+        def h(b):
+            return np.negative(b)
+
+        return np.subtract(x, f(h)(y))
+
+    exercise(input(), expected(), ref, rands(shape, 2))
+
+
+def test_function_in_tuple():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    y = relay.var("y", shape=shape)
+    f = relay.Var("f")
+    t = relay.Var("t")
+
+    # Since f end up in a tuple its argument and result is forced to be on the 
CPU
+    # def @main(x, y) {
+    #   let f = fn(a, b) { add(a, on_cpu(b)) }
+    #   let t = (f, x)
+    #   t.0(t.1, y)
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x, y],
+                relay.Let(
+                    f,
+                    relay.Function([a, b], relay.add(a, on_cpu(b))),
+                    relay.Let(
+                        t,
+                        relay.Tuple([f, x]),
+                        relay.Call(relay.TupleGetItem(t, 0), 
[relay.TupleGetItem(t, 1), y]),
+                    ),
+                ),
+            )
+        )
+
+    # def @main(x, y, on_device={param_device_types=[1,1], 
result_device_type=1}) {
+    #   let f = fn(a, b, on_device={param_device_types=[1,1], 
result_device_type=1}) { add(a, b) }
+    #   let t = (f, x)
+    #   t.0(t.1, y)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y],
+                    relay.Let(
+                        f,
+                        relay.annotation.function_on_device(
+                            relay.Function([a, b], relay.add(a, b)), [CPU, 
CPU], CPU
+                        ),
+                        relay.Let(
+                            t,
+                            relay.Tuple([f, x]),
+                            relay.Call(relay.TupleGetItem(t, 0), 
[relay.TupleGetItem(t, 1), y]),
+                        ),
+                    ),
+                ),
+                [CPU, CPU],
+                CPU,
+            )
+        )
+
+    def ref(x, y):
+        return np.add(x, y)
+
+    exercise(input(), expected(), ref, rands(shape, 2))
+
+
+def test_device_copy():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    const = relay.const(rand(shape))
+
+    # def @main(x) { add(cpu_to_gpu(x), const) }
+    def input():
+        return tvm.IRModule.from_expr(relay.Function([x], 
relay.add(cpu_to_gpu(x), const)))
+
+    # def @main(x, on_device={param_device_types=[1], result_device_type=2}) {
+    #   add(cpu_to_gpu(x), constant)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([x], relay.add(cpu_to_gpu(x), const)), [CPU], 
GPU
+            )
+        )
+
+    def ref(x):
+        return np.add(x, const.data.numpy())
+
+    exercise(input(), expected(), ref, rands(shape, 1))
+
+
+def test_shape_func():
+    p = relay.var("p")
+    data_shape = (relay.Any(),)
+    x = relay.var("x", shape=data_shape)
+    y = relay.var("y", shape=data_shape)
+    s = relay.var("s", shape=(1,), dtype="int64")
+
+    # def @main(x, s) {
+    #   let p = fixed_gpu(fn(y) { relu(y) })    -- simulates a primitive post 
FuseOps
+    #   shape_func(p,
+    #              (shape_of(fixed_gpu(x)),),   -- shape of primitive input 
tensor
+    #              (s,),                        -- space for output shape
+    #              [False])                     -- calling with input shapes 
not tensors
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x, s],
+                relay.Let(
+                    p,
+                    fixed_gpu(relay.Function([y], relay.nn.relu(y))),
+                    relay.op.vm.shape_func(
+                        p,
+                        relay.Tuple([relay.op.vm.shape_of(fixed_gpu(x))]),
+                        relay.Tuple([s]),
+                        [False],
+                    ),
+                ),
+            )
+        )
+
+    # def @main(x, s, on_device={param_device_types=[2,1], 
result_device_type=1}) {
+    #   let p = fixed_gpu(fn(y, param_device_types=[2], result_device_type=2) 
{ relu(y) })
+    #   shape_func(p,
+    #              (shape_of(x),),
+    #              (s,),
+    #              [False])
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, s],
+                    relay.Let(
+                        p,
+                        fixed_gpu(
+                            relay.annotation.function_on_device(
+                                relay.Function([y], relay.nn.relu(y)), [GPU], 
GPU
+                            )
+                        ),
+                        relay.op.vm.shape_func(
+                            p, relay.Tuple([relay.op.vm.shape_of(x)]), 
relay.Tuple([s]), [False]
+                        ),
+                    ),
+                ),
+                [GPU, CPU],
+                CPU,
+            )
+        )
+
+    # Don't try to execute, too fiddly to setup.
+    exercise(input(), expected(), None, None)
+
+
+def test_shape_of():
+    compiletime_shape = (relay.Any(), relay.Any())
+    runtime_shape = (N, M)
+    x = relay.var("x", shape=compiletime_shape)
+
+    # We need to use fixed_gpu since the result of on_gpu will default to the 
result device of @main which is cpu,
+    # which then forces a copy.
+    # TODO(mbs): Perhaps the defaulting heuristics are being too clever?
+    # def @main(x) { shape_of(fixed_gpu(x)) }
+    def input():
+        return tvm.IRModule.from_expr(relay.Function([x], 
relay.op.vm.shape_of(fixed_gpu(x))))
+
+    # def @main(x, on_device={param_device_types=[2], result_dev_type=1}) {
+    #   shape_of(x)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([x], relay.op.vm.shape_of(x)), [GPU], CPU
+            )
+        )
+
+    def ref(x):
+        return x.shape
+
+    exercise(input(), expected(), ref, rands(runtime_shape, 1))
+
+
+def test_alloc_storage():
+    size = relay.Var("size", relay.scalar_type("int64"))
+    alignment = relay.Var("alignment", relay.scalar_type("int64"))
+    main = relay.GlobalVar("main")
+    stdlib = tvm.IRModule()
+    stdlib.import_from_std("core.rly")
+
+    # def @main(size, alignment) { alloc_storage(size, alignment, GPU) }
+    def input():
+        mod = tvm.IRModule()
+        mod.update(stdlib)
+        mod[main] = relay.Function(
+            [size, alignment], relay.op.memory.alloc_storage(size, alignment, 
GPU)
+        )
+        return mod
+
+    # def @main(size, alignment, on_device={param_device_types=[1,1], 
result_device_type=2}) {
+    #   alloc_storage(size, alignment, GPU)
+    # }
+    def expected():
+        mod = tvm.IRModule()
+        mod.update(stdlib)
+        mod[main] = relay.annotation.function_on_device(
+            relay.Function([size, alignment], 
relay.op.memory.alloc_storage(size, alignment, GPU)),
+            [CPU, CPU],
+            GPU,
+        )
+        return mod
+
+    # Don't try to execute, too fiddly to setup.
+    exercise(input(), expected(), None, None)
+
+
+def test_alloc_tensor():
+    stdlib = tvm.IRModule()
+    stdlib.import_from_std("core.rly")
+    sto_type = relay.TypeCall(stdlib.get_global_type_var("Storage"), [])
+    sto = relay.Var("sto", sto_type)
+    main = relay.GlobalVar("main")
+    shape = relay.const(np.array([3, 2]), dtype="int64")
+
+    # def @main(sto) { alloc_tensor(sto, 0, [3, 2]) }
+    def input():
+        mod = tvm.IRModule()
+        mod.update(stdlib)
+        mod[main] = relay.Function(
+            [sto], relay.op.memory.alloc_tensor(sto, relay.const(0, 
dtype="int64"), shape)
+        )
+        return mod
+
+    # def @main(sto, on_device={param_device_types=[2], result_device_type=2}) 
{
+    #   alloc_tensor(sto, fixed_cpu(0), fixed_cpu([3, 2]))
+    # }
+    def expected():
+        mod = tvm.IRModule()
+        mod.update(stdlib)
+        mod[main] = relay.annotation.function_on_device(
+            relay.Function(
+                [sto],
+                relay.op.memory.alloc_tensor(
+                    sto, fixed_cpu(relay.const(0, dtype="int64")), 
fixed_cpu(shape)
+                ),
+            ),
+            [GPU],
+            GPU,
+        )
+        return mod
+
+    # Don't try to execute, too fiddly to setup.
+    exercise(input(), expected(), None, None)
+
+
+def test_reshape_tensor():
+    shape = (2, 8)
+    x = relay.var("x", shape=shape, dtype="float32")
+    newshape_expr = relay.const([2, 4, 2], dtype="int64")
+    newshape_prim = [2, 4, 2]
+
+    # def @main(x) { reshape_tensor(x, shape, newshape=[2,4,2]) }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([x], relay.op.vm.reshape_tensor(x, newshape_expr, 
newshape_prim))
+        )
+
+    # def @main(x, on_device={param_device_types=[2], result_device_type=2}) {
+    #   reshape_tensor(x, fixed_cpu(shape), newshape=[2,4,2])
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x], relay.op.vm.reshape_tensor(x, 
fixed_cpu(newshape_expr), newshape_prim)
+                ),
+                [GPU],
+                GPU,
+            )
+        )
+
+    def ref(x):
+        return np.reshape(x, newshape_prim)
+
+    exercise(input(), expected(), ref, rands(shape, 1))
+
+
+def test_dynamic_input():
+    compiletime_shape = (relay.Any(), relay.Any())
+    runtime_shape = (N, M)
+    x0 = relay.var("x0", shape=compiletime_shape)
+    x1 = relay.var("x1", shape=compiletime_shape)
+
+    # def @main(x0, x1) { add(x0, x1) }
+    def input():
+        return tvm.IRModule.from_expr(relay.Function([x0, x1], relay.add(x0, 
x1)))
+
+    # def @main(x0, x1), on_device={param_device_types=[2,2], 
result_device_type=2}) {
+    #   add(x0, x1)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([x0, x1], relay.add(x0, x1)), [GPU, GPU], GPU
+            )
+        )
+
+    def ref(x0, x1):
+        return np.add(x0, x1)
+
+    exercise(input(), expected(), ref, rands(runtime_shape, 2))
+
+
+def test_redundant_annotation():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    z = relay.var("z", shape=shape)
+
+    # def @main(x, y, z) {
+    #   %0 = add(x, y)
+    #   add(subtract(on_cpu(%0), z), on_cpu(%0))
+    # }
+    def input():
+        a = relay.add(x, y)
+        return tvm.IRModule.from_expr(
+            relay.Function([x, y, z], relay.add(relay.subtract(on_cpu(a), z), 
on_cpu(a)))
+        )
+
+    # def @main(x, y, z, on_device={param_device_types=[1,1,2], 
result_device_type=2}) {
+    #   %0 = add(x, y)
+    #   add(subtract(cpu_to_gpu(fixed_cpu(%0)), z), cpu_to_gpu(fixed_cpu(%0)))
+    # }
+    def expected():
+        a = relay.add(x, y)
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y, z],
+                    relay.add(
+                        relay.subtract(cpu_to_gpu(fixed_cpu(a)), z), 
cpu_to_gpu(fixed_cpu(a))
+                    ),
+                ),
+                [CPU, CPU, GPU],
+                GPU,
+            )
+        )
+
+    def ref(x, y, z):
+        a = np.add(x, y)
+        return np.add(np.subtract(a, z), a)
+
+    exercise(input(), expected(), ref, rands(shape, 3))
+
+
+def test_annotate_expr():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    z = relay.var("z", shape=shape)
+
+    # def @main(x, y, z) { on_cpu(subtract(on_gpu(add(x, y)), z)) } -- forces 
function result also on cpu
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([x, y, z], 
on_cpu(relay.subtract(on_gpu(relay.add(x, y)), z)))
+        )
+
+    # def @main(x, y, z, on_device={param_device_types=[2,2,1], 
result_device_type=1}) {
+    #   subtract(gpu_to_cpu(fixed_gpu(add(x, y))), z)
+    # }
+    def expected():
+        add = relay.add(x, y)
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y, z], 
relay.subtract(gpu_to_cpu(fixed_gpu(relay.add(x, y))), z)
+                ),
+                [GPU, GPU, CPU],
+                CPU,
+            )
+        )
+
+    def ref(x, y, z):
+        return np.subtract(np.add(x, y), z)
+
+    exercise(input(), expected(), ref, rands(shape, 3))
+
+
+def test_annotate_all():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    z = relay.var("z", shape=shape)
+
+    # def @main(x, y, z) { on_cpu(subtract(on_cpu(add(x, y)), z) }  -- 
top-level also forces result to be CPU
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([x, y, z], 
on_cpu(relay.subtract(on_cpu(relay.add(x, y)), z)))
+        )
+
+    # def @main(x, y, z, on_device={param_device_types=[CPU, CPU, CPU], 
result_device_type=CPU}) {
+    #   subtract(add(x, y), z)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([x, y, z], relay.subtract(relay.add(x, y), z)), 
[CPU, CPU, CPU], CPU
+            )
+        )
+
+    def ref(x, y, z):
+        return np.subtract(np.add(x, y), z)
+
+    exercise(input(), expected(), ref, rands(shape, 3))
+
+
+def test_conv_network():
+    r"""The network and devices are as follows:
+    data1     data2    <--- CPU
+      |         |
+    conv2d    conv2d   <--- CPU
+       \       /
+        \     /
+          add          <--- GPU
+           |
+         conv2d        <--- CPU
+           |
+        <result>       <--- CPU
+    """
+    batch_size = 1
+    dshape = (batch_size, 64, 56, 56)
+    wshape = (64, 64, 3, 3)
+    weight = relay.var("weight", shape=wshape)
+    data1 = relay.var("data1", shape=dshape)
+    data2 = relay.var("data2", shape=dshape)
+
+    def input():
+        conv2d_1 = relay.nn.conv2d(data1, weight, channels=64, kernel_size=(3, 
3), padding=(1, 1))
+        conv2d_2 = relay.nn.conv2d(data2, weight, channels=64, kernel_size=(3, 
3), padding=(1, 1))
+        add = relay.add(on_cpu(conv2d_1), on_cpu(conv2d_2))
+        conv2d_3 = relay.nn.conv2d(
+            on_gpu(add), weight, channels=64, kernel_size=(3, 3), padding=(1, 
1)
+        )
+        return tvm.IRModule.from_expr(relay.Function([data1, data2, weight], 
on_cpu(conv2d_3)))
+
+    def expected():
+        conv2d_1 = relay.nn.conv2d(data1, weight, channels=64, kernel_size=(3, 
3), padding=(1, 1))
+        conv2d_2 = relay.nn.conv2d(data2, weight, channels=64, kernel_size=(3, 
3), padding=(1, 1))
+        add = relay.add(cpu_to_gpu(fixed_cpu(conv2d_1)), 
cpu_to_gpu(fixed_cpu(conv2d_2)))
+        conv2d_3 = relay.nn.conv2d(
+            gpu_to_cpu(fixed_gpu(add)), weight, channels=64, kernel_size=(3, 
3), padding=(1, 1)
+        )
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([data1, data2, weight], conv2d_3), [CPU, CPU, 
CPU], CPU
+            )
+        )
+
+    # Don't try to execute, we don't have a reference conv2d
+    exercise(input(), expected(), None, None)
+
+
+def test_tuple_get_item():
+    shape = (3, 3, 4)
+    x = relay.Var("x", relay.ty.TensorType(shape, "float32"))
+    t = relay.Var("t")
+
+    # We'll device copy after projection, not before.
+    # def @main(x) {
+    #   let t = split(x, 3);
+    #   subtract(on_cpu(t).0, on_cpu(t).1)
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x],
+                relay.Let(
+                    t,
+                    relay.op.split(x, 3).astuple(),
+                    on_gpu(
+                        relay.subtract(
+                            relay.TupleGetItem(on_cpu(t), 0), 
relay.TupleGetItem(on_cpu(t), 1)
+                        )
+                    ),
+                ),
+            )
+        )
+
+    # def @main(x, on_device={param_device_type=[1], result_device_type=2}) {
+    #   let t = fixed_cpu(split(x, 3))
+    #   subtract(cpu_to_gpu(fixed_cpu(t.0)), cpu_to_gpu(fixed_cpu(t.1)))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x],
+                    relay.Let(
+                        t,
+                        fixed_cpu(relay.op.split(x, 3).astuple()),
+                        relay.subtract(
+                            cpu_to_gpu(fixed_cpu(relay.TupleGetItem(t, 0))),
+                            cpu_to_gpu(fixed_cpu(relay.TupleGetItem(t, 1))),
+                        ),
+                    ),
+                ),
+                [CPU],
+                GPU,
+            )
+        )
+
+    def ref(x):
+        t = np.split(x, 3)
+        return np.subtract(t[0], t[1])
+
+    exercise(input(), expected(), ref, rands(shape, 1))
+
+
+def test_propogation():
+    R""" The network and devices are as follows:
+                  x           <--- CPU
+                  |
+                 log          <--- CPU
+                /   \
+              log2 log10      <--- GPU
+                \   /
+                 add          <--- GPU
+                  |
+                 tan          <--- CPU
+                  |
+               <result>       <--- CPU
+    """
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+
+    def input():
+        log = relay.log(x)
+        log2 = relay.log2(on_cpu(log))
+        log10 = relay.log10(on_cpu(log))
+        add = relay.add(on_gpu(log2), on_gpu(log10))
+        tan = relay.tan(on_gpu(add))
+        return tvm.IRModule.from_expr(relay.Function([x], on_cpu(tan)))
+
+    def expected():
+        log = relay.log(x)
+        log2 = relay.log2(cpu_to_gpu(fixed_cpu(log)))
+        log10 = relay.log10(cpu_to_gpu(fixed_cpu(log)))
+        add = relay.add(log2, log10)
+        tan = relay.tan(gpu_to_cpu(fixed_gpu(add)))
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(relay.Function([x], tan), 
[CPU], CPU)
+        )
+
+    def ref(x):
+        y = np.log(x)
+        return np.tan(np.add(np.log2(y), np.log10(y)))
+
+    exercise(input(), expected(), ref, rands(shape, 1))
+
+
+def test_fusible_network():
+    R""" The network is as follows:
+               x     y      <--- GPU
+                \   /
+                 add        <--- GPU
+                /   \
+           negative  \      <--- CPU
+              \       \
+               \  negative  <--- GPU
+                \   /
+                 add        <--- GPU
+                  |
+               negative     <--- CPU
+                  |
+               <result>     <--- CPU
+    """
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+
+    def input():
+        add = relay.add(x, y)
+        sqrt = relay.negative(on_gpu(add))
+        log = relay.negative(add)
+        subtract = relay.add(on_cpu(sqrt), log)
+        exp = relay.negative(on_gpu(subtract))
+        return tvm.IRModule.from_expr(relay.Function([x, y], on_cpu(exp)))
+
+    def expected():
+        add = relay.add(x, y)
+        sqrt = relay.negative(gpu_to_cpu(fixed_gpu(add)))
+        log = relay.negative(add)
+        subtract = relay.add(cpu_to_gpu(fixed_cpu(sqrt)), log)
+        exp = relay.negative(gpu_to_cpu(fixed_gpu(subtract)))
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(relay.Function([x, y], exp), 
[GPU, GPU], CPU)
+        )
+
+    def ref(x, y):
+        z = np.add(x, y)
+        return np.negative(np.add(np.negative(z), np.negative(z)))
+
+    exercise(input(), expected(), ref, rands(shape, 2))
+
+
+def test_unpropagatable_graph():
+    r"""The network is as follows:
+    a      b            <--- CPU
+    \     /
+     \   /   c     d    <--- GPU
+      \ /    \     /
+      add     \   /     <--- CPU
+       \       \ /
+        \    multiply   <--- GPU
+         \     /
+        subtract        <--- CPU
+           |
+        <result>        <--- CPU
+    """
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    c = relay.var("c", shape=shape)
+    d = relay.var("d", shape=shape)
+
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [a, b, c, d],
+                on_cpu(relay.subtract(on_cpu(relay.add(a, b)), 
on_gpu(relay.multiply(c, d)))),
+            )
+        )
+
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [a, b, c, d],
+                    relay.subtract(relay.add(a, b), 
gpu_to_cpu(fixed_gpu(relay.multiply(c, d)))),
+                ),
+                [CPU, CPU, GPU, GPU],
+                CPU,
+            )
+        )
+
+    def ref(a, b, c, d):
+        return np.subtract(np.add(a, b), np.multiply(c, d))
+
+    exercise(input(), expected(), ref, rands(shape, 4))
+
+
+def test_conditional():
+    shape = (N, M)
+    x = relay.Var("x", relay.ty.scalar_type("bool"))
+    y = relay.var("y", shape=shape)
+    z = relay.var("z", shape=shape)
+    f = relay.Var("f")
+    g = relay.Var("g")
+    h = relay.Var("h")
+    a1 = relay.Var("a")
+    a2 = relay.Var("a")
+
+    # def @main(x, y, z) {
+    #   let f = fn(a) { add(a, fixed_cpu(y)) }
+    #   let g = fn(a) { subtract(a, y) }
+    #   let h = if (x) {
+    #     f
+    #   } else {
+    #     g
+    #   }
+    #   h(z)
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x, y, z],
+                relay.Let(
+                    f,
+                    relay.Function([a1], relay.add(a1, fixed_cpu(y))),
+                    relay.Let(
+                        g,
+                        relay.Function([a2], relay.subtract(a2, y)),
+                        relay.Let(h, relay.If(x, f, g), relay.Call(h, [z])),
+                    ),
+                ),
+            )
+        )
+
+    # def @main(x, y, z, on_device={param_device_types=[1,1,1], 
result_device_type=1}) {
+    #   let f = fn(a, on_device={param_device_types=[1], 
result_device_type=1}) { add(a, y) }
+    #   let g = fn
+    #   (a, on_device={param_device_types=[1], result_device_type=1}) { 
subtract(a, y) }
+    #   let h = if (x) {
+    #     f
+    #   } else {
+    #     g
+    #   }
+    #   h(z)
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y, z],
+                    relay.Let(
+                        f,
+                        relay.annotation.function_on_device(
+                            relay.Function([a1], relay.add(a1, y)), [CPU], CPU
+                        ),
+                        relay.Let(
+                            g,
+                            relay.annotation.function_on_device(
+                                relay.Function([a2], relay.subtract(a2, y)), 
[CPU], CPU
+                            ),
+                            relay.Let(h, relay.If(x, f, g), relay.Call(h, 
[z])),
+                        ),
+                    ),
+                ),
+                [CPU, CPU, CPU],
+                CPU,
+            )
+        )
+
+    def ref(x, y, z):
+        def f(a):
+            return np.add(a, y)
+
+        def g(a):
+            return np.subtract(a, y)
+
+        h = f if x else g
+        return h(z)
+
+    exercise(input(), expected(), ref, [True, rand(shape), rand(shape)])
+
+
+def test_global():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    f = relay.GlobalVar("f")
+    main = relay.GlobalVar("main")
+
+    # def @f(a, b) { add(a, on_cpu(b)) }
+    # def @main(x, y) { @f(y, x) }
+    def input():
+        mod = tvm.IRModule()
+        mod[f] = relay.Function(
+            [a, b], relay.add(a, on_cpu(b)), relay.ty.TensorType(shape, 
"float32")
+        )
+        mod[main] = relay.Function(
+            [x, y], relay.Call(f, [y, x]), relay.ty.TensorType(shape, 
"float32")
+        )
+        return mod
+
+    # def @f(a, b, on_device={param_device_types=[2,1], result_device_type=2}) 
{ add(a, on_cpu(b)) }
+    # def @main(x, y, on_device={param_device_types=[1,2], 
result_device_type=2}) { @f(y, x) }
+    def expected():
+        mod = tvm.IRModule()
+        mod[f] = relay.annotation.function_on_device(
+            relay.Function(
+                [a, b], relay.add(a, cpu_to_gpu(b)), 
relay.ty.TensorType(shape, "float32")
+            ),
+            [GPU, CPU],
+            GPU,
+        )
+        mod[main] = relay.annotation.function_on_device(
+            relay.Function([x, y], relay.Call(f, [y, x]), 
relay.ty.TensorType(shape, "float32")),
+            [CPU, GPU],
+            GPU,
+        )
+        return mod
+
+    def ref(x, y):
+        def f(a, b):
+            return np.add(a, b)
+
+        return f(x, y)
+
+    exercise(input(), expected(), ref, rands(shape, 2))
+
+
+# Note that match and ADTs don't appear to be supported for direct AST
+# construction.
+
+
+def test_ref():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    r = relay.var("r")
+    dummy = relay.var("dummy")
+
+    # def @main(x, y) {
+    #   r = ref(x)
+    #   ref_write(r, on_cpu(y))
+    #   add(x, ref_read(r))
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function(
+                [x, y],
+                relay.Let(
+                    r,
+                    relay.RefCreate(x),
+                    relay.Let(dummy, relay.RefWrite(r, on_cpu(y)), 
relay.add(x, relay.RefRead(r))),
+                ),
+            )
+        )
+
+    # def @main(x, y, on_device={param_device_types=[GPU, CPU], 
result_device_type=GPU}) {
+    #   r = ref(x)
+    #   ref_write(r, cpu_to_gpu(y))
+    #   add(x, ref_read(r))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function(
+                    [x, y],
+                    relay.Let(
+                        r,
+                        relay.RefCreate(x),
+                        relay.Let(
+                            dummy, relay.RefWrite(r, cpu_to_gpu(y)), 
relay.add(x, relay.RefRead(r))
+                        ),
+                    ),
+                ),
+                [GPU, CPU],
+                GPU,
+            )
+        )
+
+    def ref(x, y):
+        r = {"value": x}
+        r["value"] = y
+        return np.add(x, r["value"])
+
+    # Don't try to execute, no backend currently supports both cross-devices 
and references.
+    exercise(input(), expected(), None, None)
+
+
+if __name__ == "__main__":
+    test_plain()

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] mbs-octoml commented on a change in pull request #9038: [Relay] Merge analysis/context_analysis.cc and transforms/device_annotation.cc

Reply via email to