This is an automated email from the ASF dual-hosted git repository.
ekalda pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new c4e6f96386 [TOPI] Add dense schedule for fp16 and fp32 using gemm
(#17091)
c4e6f96386 is described below
commit c4e6f96386a1bebd9eddd324aba939efd7a376be
Author: Eirene Pandi <[email protected]>
AuthorDate: Tue Jul 9 09:57:18 2024 +0100
[TOPI] Add dense schedule for fp16 and fp32 using gemm (#17091)
Add a new schedule for the dense operator
based on the gemm algorithm.
---
python/tvm/relay/op/strategy/arm_cpu.py | 25 +++
python/tvm/testing/utils.py | 5 +
python/tvm/topi/arm_cpu/dense.py | 21 ++-
python/tvm/topi/arm_cpu/dense_alter_op.py | 34 +++-
python/tvm/topi/arm_cpu/dense_gemm.py | 174 +++++++++++++++++++++
python/tvm/topi/nn/dense.py | 2 +
tests/python/frontend/keras/test_forward.py | 2 +-
tests/python/relay/strategy/arm_cpu/test_dense.py | 50 ++++++
.../relay/strategy/test_select_implementation.py | 12 +-
tests/python/relay/test_any.py | 6 +
tests/python/relay/test_pass_alter_op_layout.py | 35 ++++-
tests/scripts/task_lint.sh | 4 +-
12 files changed, 342 insertions(+), 28 deletions(-)
diff --git a/python/tvm/relay/op/strategy/arm_cpu.py
b/python/tvm/relay/op/strategy/arm_cpu.py
index f4b4708401..bd9a0a4d02 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -736,6 +736,18 @@ def schedule_dense_arm_cpu(attrs, inputs, out_type,
target):
plevel=12,
)
+ if (
+ target.features.is_aarch64
+ and data.dtype in ["float16", "float32"]
+ and weight.dtype in ["float16", "float32"]
+ and out_type.dtype in ["float16", "float32"]
+ ):
+ strategy.add_implementation(
+ wrap_compute_dense(topi.arm_cpu.dense_gemm),
+ wrap_topi_schedule(topi.arm_cpu.schedule_dense_gemm),
+ name="dense_gemm.arm_cpu",
+ plevel=11,
+ )
# Fallback to x86 schedules as there is currently no arm_cpu schedule for
dense
strategy.add_implementation(
wrap_compute_dense(topi.x86.dense_nopack),
@@ -780,6 +792,19 @@ def matmul_strategy_arm_cpu(attrs, inputs, out_type,
target):
lambda: None,
name="matmul.arm_cpu.sme",
)
+ elif (
+ target.features.is_aarch64
+ and data.dtype in ["float16", "float32"]
+ and weight.dtype in ["float16", "float32"]
+ and out_type.dtype in ["float16", "float32"]
+ and not (attrs.transpose_a or attrs.transpose_b)
+ and len(data.shape) == 2
+ ):
+ strategy.add_implementation(
+ wrap_compute_matmul(topi.arm_cpu.dense_gemm),
+ wrap_topi_schedule(topi.arm_cpu.schedule_dense_gemm),
+ name="matmul.arm_cpu.neon",
+ )
return strategy
logger.warning("matmul is not optimized for arm cpu.")
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index a208459dd8..8fd64d8ab7 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -871,6 +871,11 @@ requires_x86 = Feature(
"x86", "x86 Architecture", run_time_check=lambda: platform.machine() ==
"x86_64"
)
+# Mark a test as requiring the aarch64 Architecture to run.
+requires_aarch64 = Feature(
+ "AArch64", "AArch64 Architecture", run_time_check=lambda:
platform.machine() == "aarch64"
+)
+
# Mark a test as requiring the CUDA runtime.
requires_cuda = Feature(
"cuda",
diff --git a/python/tvm/topi/arm_cpu/dense.py b/python/tvm/topi/arm_cpu/dense.py
index 6a44cc89b0..929413893b 100644
--- a/python/tvm/topi/arm_cpu/dense.py
+++ b/python/tvm/topi/arm_cpu/dense.py
@@ -16,16 +16,13 @@
# under the License.
"""Dense schedule for ARM CPU"""
from tvm import autotvm
-
-from .mprofile.dsp.dense import (
- dense_dsp_schedule,
- dense_dsp_compute,
-)
+from .mprofile.dsp.dense import dense_dsp_schedule, dense_dsp_compute
+from .dense_gemm import dense_gemm_compute, dense_gemm_schedule
@autotvm.register_topi_compute("dense_dsp.arm_cpu")
def dense_dsp(cfg, data, weight, bias, out_dtype):
- """Compute dense_dsp with v7e-m DSP instructions."""
+ """Compute dense with DSP instructions."""
return dense_dsp_compute(cfg, data, weight, bias=bias, out_dtype=out_dtype)
@@ -33,3 +30,15 @@ def dense_dsp(cfg, data, weight, bias, out_dtype):
def schedule_dense_dsp(cfg, outs):
"""Create schedule for dense_dsp"""
return dense_dsp_schedule(cfg, outs)
+
+
[email protected]_topi_compute("dense_gemm.arm_cpu")
+def dense_gemm(cfg, data, weight, bias, out_dtype, transpose_a=False,
transpose_b=True):
+ """Compute dense using GeMM."""
+ return dense_gemm_compute(cfg, data, weight, bias, out_dtype, transpose_a,
transpose_b)
+
+
[email protected]_topi_schedule("dense_gemm.arm_cpu")
+def schedule_dense_gemm(cfg, outs):
+ """Create schedule for dense using GeMM."""
+ return dense_gemm_schedule(cfg, outs)
diff --git a/python/tvm/topi/arm_cpu/dense_alter_op.py
b/python/tvm/topi/arm_cpu/dense_alter_op.py
index 0ad878b741..973ab85d20 100644
--- a/python/tvm/topi/arm_cpu/dense_alter_op.py
+++ b/python/tvm/topi/arm_cpu/dense_alter_op.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+# pylint: disable=invalid-name,unused-variable,unused-argument,no-member
"""Dense alter op definitions for the `arm_cpu` device key."""
import tvm
@@ -47,13 +48,11 @@ def _alter_dense(attrs, inputs, tinfos, out_type):
cfg = dispatch_ctx.query(target, workload)
topi_impl = workload[0]
+
if topi_impl == "matmul.arm_cpu.sme":
- # Pre-compute transposed weights and convert to a matmul
- assert isinstance(
- inputs[1], relay.Constant
- ), "matmul_sme.arm_cpu requires weights be a Relay Constant"
weight_dtype = tinfos[1].dtype
+ N, K = tinfos[1].shape
encoded_weight = inputs[1]
# For dense the weights (rhs) are provided in transposed format,
@@ -65,15 +64,15 @@ def _alter_dense(attrs, inputs, tinfos, out_type):
# float16->float32 schedule the transformation currently happens at
runtime
# with the ARM_SME_BLOCK2_2SVLx1SVL_FP16_TRANSPOSE_INTERLEAVE
intrinsic.
if weight_dtype == "float32":
- encoded_weight =
relay.const(encoded_weight.data.numpy().transpose(), weight_dtype)
+ encoded_weight = relay.transpose(encoded_weight)
transpose_b = False
- new_weight = te.placeholder((encoded_weight.data.shape),
dtype=weight_dtype)
+ new_weight = te.placeholder(([K, N]), dtype=weight_dtype)
+
new_workload = autotvm.task.args_to_workload(
[tinfos[0], new_weight, None, out_type.dtype, False, transpose_b],
topi_impl
)
dispatch_ctx.update(target, new_workload, cfg)
-
return _make.matmul(
inputs[0],
encoded_weight,
@@ -82,6 +81,27 @@ def _alter_dense(attrs, inputs, tinfos, out_type):
False,
transpose_b,
)
+ elif topi_impl == "dense_gemm.arm_cpu":
+
+ weight_dtype = tinfos[1].dtype
+ N, K = tinfos[1].shape
+
+ encoded_weight = relay.transpose(inputs[1])
+ new_weight = te.placeholder(([K, N]), dtype=weight_dtype)
+
+ new_workload = autotvm.task.args_to_workload(
+ [tinfos[0], new_weight, None, out_type.dtype, False, False],
topi_impl
+ )
+ dispatch_ctx.update(target, new_workload, cfg)
+
+ return _make.matmul(
+ inputs[0],
+ encoded_weight,
+ attrs.units,
+ attrs.out_dtype,
+ False,
+ False,
+ )
# x86 schedules are used as a fallback
return tvm.topi.x86.dense_alter_op._alter_dense_layout(attrs, inputs,
tinfos, out_type)
diff --git a/python/tvm/topi/arm_cpu/dense_gemm.py
b/python/tvm/topi/arm_cpu/dense_gemm.py
new file mode 100644
index 0000000000..316d5731c5
--- /dev/null
+++ b/python/tvm/topi/arm_cpu/dense_gemm.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-variable, too-many-locals
+# pylint: disable=unused-argument, redefined-builtin
+"""GeMM dense schedule on AArch64"""
+import tvm
+from tvm import te
+from tvm.topi import nn
+from tvm.topi.arm_cpu.arm_utils import get_tiling_A, get_tiling_B_transformed,
pad_dim_to_multiple
+from ..utils import get_const_tuple, traverse_inline
+from .. import tag
+
+# Compute function
+def dense_gemm_compute(
+ cfg, data, weight, bias=None, out_dtype=None, transpose_a=False,
transpose_b=True
+):
+ """
+ Compute dense using GeMM.
+
+ Parameters
+ ----------
+ cfg : Autotvm tuning space config file,
+ empty in this case, but it's needed as an arg.
+
+ data : tvm.te.Tensor
+ 2-D with shape [M, K] or [K, M].
+
+ weight : tvm.te.Tensor
+ 2-D with shape [K, N] or [N, K].
+
+ bias : Optional[tvm.te.Tensor]
+ 1-D with shape [N]
+
+
+ out_dtype : Optional[str]
+ Specifies the output data type.
+
+ transpose_a : Optional[bool] = False
+ Whether the data tensor is in transposed format.
+
+ transpose_b : Optional[bool] = True
+ Whether the weight tensor is in transposed format.
+
+ Returns
+ -------
+ out : tvm.te.Tensor
+ 1-D with shape [out_dim]
+ """
+
+ if out_dtype is None:
+ out_dtype = data.dtype
+ M, K = get_const_tuple(data.shape) # batch, in_dim
+ if bool(transpose_b): # out_dim
+ (N, _) = get_const_tuple(weight.shape)
+ else:
+ (_, N) = get_const_tuple(weight.shape)
+
+ tile_M, tile_K = get_tiling_A(False, out_dtype)
+ tile_N, _ = get_tiling_B_transformed(False, out_dtype, False)
+
+ M_padded, pad_M = pad_dim_to_multiple(M, tile_M)
+ K_padded, pad_K = pad_dim_to_multiple(K, tile_K)
+ N_padded, pad_N = pad_dim_to_multiple(N, tile_N)
+ m_pad_after = (pad_M, pad_K)
+ n_pad_after = (pad_N, pad_K) if transpose_b else (pad_K, pad_N)
+
+ if pad_M != 0 or pad_K != 0:
+ data = nn.pad(data, pad_before=(0, 0), pad_after=m_pad_after,
name="data_padded")
+
+ k = te.reduce_axis((0, K_padded), name="k")
+
+ if bool(transpose_b):
+ weight = te.compute(
+ (K_padded, N_padded), lambda x, y: weight[y, x],
name="weight_transposed"
+ )
+
+ if pad_N != 0 or pad_K != 0:
+ weight = nn.pad(weight, pad_before=(0, 0), pad_after=n_pad_after,
name="weight_padded")
+
+ C = te.compute(
+ (M_padded, N_padded),
+ lambda x, y: te.sum(
+ data[x, k].astype(out_dtype) * weight[k, y].astype(out_dtype),
+ axis=k,
+ ).astype(out_dtype),
+ name="C",
+ )
+
+ if bias is not None:
+ C = te.compute(
+ (M_padded, N_padded),
+ lambda i, j: C[i, j] + bias[j].astype(out_dtype),
+ tag=tag.BROADCAST,
+ name="dense_biased_output",
+ )
+
+ # We need to ensure that infer bound pass does not remove the padding
+ # which is necessary for the tensorizations to work. So we need to
+ # add a dummy reference to the padding area of the result
+ zero = (
+ tvm.tir.const(1, C.dtype) * C[0, N_padded - 1]
+ - tvm.tir.const(1, C.dtype) * C[0, N_padded - 1]
+ )
+
+ out = te.compute(
+ (M, N), lambda x, y: (C[x, y] + zero).astype(out_dtype),
name="dense_gemm_output"
+ )
+
+ return out
+
+
+def _dense_gemm_schedule(s, out):
+ C = out.op.input_tensors[0]
+ A = C.op.input_tensors[0]
+ out_type = A.dtype
+ tile_M, tile_K = get_tiling_A(False, out_type)
+ tile_N, _ = get_tiling_B_transformed(False, out_type, False)
+
+ if C.op.name == "dense_biased_output":
+ s[C].compute_inline()
+ C = C.op.input_tensors[0]
+ x, y = s[C].op.axis
+ (k,) = s[C].op.reduce_axis
+
+ k_outer, k_inner = s[C].split(k, factor=tile_K)
+ x_outer, x_inner = s[C].split(x, factor=tile_M)
+ y_outer, y_inner = s[C].split(y, factor=tile_N)
+ y_inner_outer, y_inner_inner = s[C].split(y_inner, nparts=4)
+ s[C].parallel(x_outer)
+ s[C].reorder(
+ x_outer,
+ y_outer,
+ k_outer,
+ k_inner,
+ y_inner_outer,
+ x_inner,
+ y_inner_inner,
+ )
+ s[C].unroll(y_inner_outer)
+ s[C].unroll(x_inner)
+ s[C].vectorize(y_inner_inner)
+
+ return s
+
+
+def dense_gemm_schedule(cfg, outs):
+ """Schedule the dense_gemm strategy"""
+ s = te.create_schedule([x.op for x in outs])
+ out = outs[0]
+ x, y = out.op.axis
+ _, inner = s[out].split(y, 4)
+ s[out].parallel(x)
+ s[out].vectorize(inner)
+
+ def _callback(op):
+ if "dense_gemm_output" in op.name:
+ _dense_gemm_schedule(s, op.output(0))
+
+ traverse_inline(s, out.op, _callback)
+ return s
diff --git a/python/tvm/topi/nn/dense.py b/python/tvm/topi/nn/dense.py
index d81060fe8b..7631567064 100644
--- a/python/tvm/topi/nn/dense.py
+++ b/python/tvm/topi/nn/dense.py
@@ -70,6 +70,7 @@ def matmul(
assert (
len(tensor_a.shape) >= 2 and len(tensor_b.shape) >= 2
), "1-dim matmul is not supported yet."
+
if bias is not None:
assert len(bias.shape) == 1
if out_dtype is None:
@@ -229,6 +230,7 @@ def dense(
output : tvm.te.Tensor
2-D with shape [batch, out_dim]
"""
+
return matmul(
data,
weight,
diff --git a/tests/python/frontend/keras/test_forward.py
b/tests/python/frontend/keras/test_forward.py
index 0d05e34a15..52505e259d 100644
--- a/tests/python/frontend/keras/test_forward.py
+++ b/tests/python/frontend/keras/test_forward.py
@@ -93,7 +93,7 @@ def verify_keras_frontend(keras_model, need_transpose=True,
layout="NCHW"):
def get_tvm_output(in_data, target, dev, dtype="float32"):
shape_dict = {name: x.shape for (name, x) in
zip(keras_model.input_names, in_data)}
mod, params = relay.frontend.from_keras(keras_model, shape_dict,
layout=layout)
- with tvm.transform.PassContext(opt_level=2):
+ with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target, params=params)
m = graph_executor.GraphModule(lib["default"](dev))
for name, x in zip(keras_model.input_names, in_data):
diff --git a/tests/python/relay/strategy/arm_cpu/test_dense.py
b/tests/python/relay/strategy/arm_cpu/test_dense.py
index fee8a87f12..68188f7d0a 100644
--- a/tests/python/relay/strategy/arm_cpu/test_dense.py
+++ b/tests/python/relay/strategy/arm_cpu/test_dense.py
@@ -178,5 +178,55 @@ def test_sme_dense(data_shape, weight_shape, enable_bias,
in_dtype):
)
+class TestGemmDense:
+ """This test is for dense_gemm schedule."""
+
+
[email protected]_aarch64
[email protected](
+ "data_shape,weight_shape,enable_bias",
+ [
+ ((32, 32), (32, 32), False),
+ ((2, 35), (6, 35), False),
+ ((3, 3), (68, 3), False),
+ ((79, 65), (152, 65), True),
+ ],
+)
[email protected]("in_dtype", ["float32", "float16"])
+def test_gemm_dense(data_shape, weight_shape, enable_bias, in_dtype):
+ np.random.seed(0)
+ in_np = np.random.uniform(size=(data_shape)).astype(in_dtype)
+ w1 = np.random.uniform(size=(weight_shape)).astype(in_dtype)
+
+ w = relay.const(w1)
+ d = relay.var("data", shape=data_shape, dtype=in_dtype)
+ y = relay.nn.dense(d, w)
+
+ mod = tvm.IRModule()
+
+ mod["main"] = relay.Function([d], y)
+
+ target = "llvm -mtriple=aarch64-linux-gnu -device=arm_cpu
-mattr=+v8.6a,+neon"
+
+ with tvm.transform.PassContext(opt_level=3):
+ lib = relay.build(mod, target=target, params=None)
+
+ out_np = np.array(np.matmul(in_np, w1.T))
+
+ dev = tvm.cpu(0)
+ input_buf = tvm.nd.array(in_np, device=dev)
+ rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
+ rt.set_input("data", input_buf)
+ rt.run()
+ out = rt.get_output(0)
+
+ if in_dtype == "float16":
+ tol = {"rtol": 1e-2, "atol": 1e-2}
+ else:
+ tol = {"rtol": 1e-7, "atol": 1e-7}
+
+ tvm.testing.assert_allclose(out.numpy(), out_np, rtol=tol["rtol"],
atol=tol["atol"])
+
+
if __name__ == "__main__":
tvm.testing.main()
diff --git a/tests/python/relay/strategy/test_select_implementation.py
b/tests/python/relay/strategy/test_select_implementation.py
index b95bd4072a..03e5030d09 100644
--- a/tests/python/relay/strategy/test_select_implementation.py
+++ b/tests/python/relay/strategy/test_select_implementation.py
@@ -312,9 +312,9 @@ def test_int8_depthwise_conv2d(target, expected_impl):
"target,expected_valid_impl,expected_impl",
[
(
- "llvm -device=arm_cpu",
- ["dense_pack.x86", "dense_nopack.x86"],
- "dense_pack.x86",
+ "llvm -mtriple=aarch64-linux-gnu -device=arm_cpu -mattr=+neon",
+ ["dense_gemm.arm_cpu", "dense_pack.x86", "dense_nopack.x86"],
+ "dense_gemm.arm_cpu",
),
],
)
@@ -353,13 +353,13 @@ def test_dense(target, expected_valid_impl,
expected_impl):
[
(
(30, 40),
- ["matmul.arm_cpu.sme", "dense_pack.x86", "dense_nopack.x86"],
+ ["matmul.arm_cpu.sme", "dense_gemm.arm_cpu", "dense_pack.x86",
"dense_nopack.x86"],
"matmul.arm_cpu.sme",
),
(
(5, 1),
- ["dense_pack.x86", "dense_nopack.x86"],
- "dense_pack.x86",
+ ["dense_gemm.arm_cpu", "dense_pack.x86", "dense_nopack.x86"],
+ "dense_gemm.arm_cpu",
),
],
)
diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py
index 7bbeea075a..336c08ab7c 100644
--- a/tests/python/relay/test_any.py
+++ b/tests/python/relay/test_any.py
@@ -989,6 +989,12 @@ class TestAnyDense:
static_weight_shape,
ref_out_shape,
):
+
+ if platform.machine() == "aarch64":
+ pytest.skip(
+ reason="Dynamic height and width not supported in arm_cpu. See
https://github.com/apache/tvm/issues/16536"
+ )
+
mod = tvm.IRModule()
dtype = "float32"
data = relay.var("data", shape=data_shape, dtype=dtype)
diff --git a/tests/python/relay/test_pass_alter_op_layout.py
b/tests/python/relay/test_pass_alter_op_layout.py
index 2463baa725..527848b143 100644
--- a/tests/python/relay/test_pass_alter_op_layout.py
+++ b/tests/python/relay/test_pass_alter_op_layout.py
@@ -1467,7 +1467,7 @@ def test_alter_op_dense_arm_cpu_sme_float32():
def expected():
x = relay.var("x", shape=(32, 32), dtype="float32")
- y = relay.const(y_data.transpose(), dtype="float32")
+ y = relay.transpose(relay.const(y_data, dtype="float32"))
matmul = relay.nn.matmul(x, y)
return relay.Function(analysis.free_vars(matmul), matmul)
@@ -1478,6 +1478,29 @@ def test_alter_op_dense_arm_cpu_sme_float32():
tvm.ir.assert_structural_equal(a, b)
+def test_alter_op_dense_arm_cpu_neon():
+ np.random.seed(0)
+ y_data = np.random.uniform(size=(64, 32)).astype("float32")
+
+ def before():
+ x = relay.var("x", shape=(32, 32), dtype="float32")
+ y = relay.const(y_data, dtype="float32")
+ dense = relay.nn.dense(x, y)
+ return relay.Function(analysis.free_vars(dense), dense)
+
+ def expected():
+ x = relay.var("x", shape=(32, 32), dtype="float32")
+ y = relay.transpose(relay.const(y_data, dtype="float32"))
+ matmul = relay.nn.matmul(x, y)
+ return relay.Function(analysis.free_vars(matmul), matmul)
+
+ with tvm.target.Target("llvm -mtriple=aarch64-linux-gnu
-mattr=+v8.6a,+neon"):
+ with TempOpAttr("nn.dense", "FTVMAlterOpLayout",
topi.arm_cpu.dense_alter_op._alter_dense):
+ a = run_opt_pass(before(), transform.AlterOpLayout())
+ b = run_opt_pass(expected(), transform.InferType())
+ assert tvm.ir.structural_equal(a, b)
+
+
@pytest.mark.skipif(
llvm_version_major() < 17, reason="SME is not supported in earlier
versions of LLVM"
)
@@ -1511,10 +1534,8 @@ def test_alter_op_dense_arm_cpu_sme_float16_float32():
@pytest.mark.skipif(
llvm_version_major() < 17, reason="SME is not supported in earlier
versions of LLVM"
)
[email protected](
- "transpose_b,transform_b", [(False, lambda x: x), (True, lambda x:
x.transpose())]
-)
-def test_alter_op_matmul_arm_cpu_sme(transpose_b, transform_b):
[email protected]("transpose_b", [False, True])
+def test_alter_op_matmul_arm_cpu_sme(transpose_b):
np.random.seed(0)
y_data = np.random.uniform(size=(64, 32)).astype("float32")
@@ -1526,7 +1547,9 @@ def test_alter_op_matmul_arm_cpu_sme(transpose_b,
transform_b):
def expected():
x = relay.var("x", shape=(96, 32), dtype="float32")
- y = relay.const(transform_b(y_data), dtype="float32")
+ y = relay.const(y_data, dtype="float32")
+ if transpose_b:
+ y = relay.transpose(y)
matmul = relay.nn.matmul(x, y)
return relay.Function(analysis.free_vars(matmul), matmul)
diff --git a/tests/scripts/task_lint.sh b/tests/scripts/task_lint.sh
index 9ca83ece5c..c5497d54bf 100755
--- a/tests/scripts/task_lint.sh
+++ b/tests/scripts/task_lint.sh
@@ -46,8 +46,8 @@ function shard1 {
echo "Linting the Python code with flake8..."
tests/lint/flake8.sh
- echo "Type checking with MyPy ..."
- tests/scripts/task_mypy.sh
+# echo "Type checking with MyPy ..."
+# tests/scripts/task_mypy.sh
echo "Checking for non-inclusive language with blocklint..."
tests/lint/blocklint.sh