This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new ab1aef962a [Target][CI] Add LLVM functions for current system info
(#15903)
ab1aef962a is described below
commit ab1aef962a37c7f32639b06f53bc754eae3fa0c8
Author: Balint Cristian <[email protected]>
AuthorDate: Thu Oct 12 09:25:48 2023 +0300
[Target][CI] Add LLVM functions for current system info (#15903)
---
python/tvm/target/codegen.py | 42 ++++++++++
python/tvm/testing/utils.py | 89 +++++++---------------
src/target/llvm/llvm_module.cc | 25 ++++++
tests/python/contrib/test_amx.py | 13 +++-
tests/python/contrib/test_gemm_acc32_vnni.py | 4 +-
tests/python/integration/test_auto_tensorize.py | 12 +--
tests/python/relay/test_op_level1.py | 4 +-
tests/python/relay/test_op_level10.py | 4 +-
tests/python/relay/test_op_level2.py | 4 +-
tests/python/target/test_llvm_features_info.py | 6 ++
.../unittest/test_meta_schedule_cpu_dot_product.py | 8 +-
11 files changed, 128 insertions(+), 83 deletions(-)
diff --git a/python/tvm/target/codegen.py b/python/tvm/target/codegen.py
index 1a2efd4efa..b2a92c2ca2 100644
--- a/python/tvm/target/codegen.py
+++ b/python/tvm/target/codegen.py
@@ -96,6 +96,48 @@ def llvm_get_intrinsic_name(intrin_id: int) -> str:
return _ffi_api.llvm_get_intrinsic_name(intrin_id)
+def llvm_get_system_x86_vendor():
+ """Get system x86 vendor info.
+
+ Parameters
+ ----------
+
+ Returns
+ -------
+ vendor : str
+ The current system's cpu vendor.
+ """
+ return _ffi_api.llvm_get_system_x86_vendor()
+
+
+def llvm_get_system_triple():
+ """Get system host triple.
+
+ Parameters
+ ----------
+
+ Returns
+ -------
+ triple : str
+ The current system's triple.
+ """
+ return _ffi_api.llvm_get_system_triple()
+
+
+def llvm_get_system_cpu():
+ """Get system host cpu name.
+
+ Parameters
+ ----------
+
+ Returns
+ -------
+ cpu_name : str
+ The current system's cpu name.
+ """
+ return _ffi_api.llvm_get_system_cpu()
+
+
def llvm_get_targets():
"""Get LLVM target list.
diff --git a/python/tvm/testing/utils.py b/python/tvm/testing/utils.py
index 7817ddcb01..7c8b0e883c 100644
--- a/python/tvm/testing/utils.py
+++ b/python/tvm/testing/utils.py
@@ -77,7 +77,6 @@ import sys
import textwrap
import time
import shutil
-import subprocess
from pathlib import Path
from typing import Optional, Callable, Union, List, Tuple
@@ -91,6 +90,7 @@ import tvm.tir
import tvm.te
import tvm._ffi
+from tvm.target import codegen
from tvm.contrib import nvcc, cudnn, rocm
import tvm.contrib.hexagon._ci_env_check as hexagon
from tvm.driver.tvmc.frontends import load_model
@@ -1002,76 +1002,43 @@ requires_corstone300 = Feature(
requires_vitis_ai = Feature("vitis_ai", "Vitis AI", cmake_flag="USE_VITIS_AI")
-def _arm_dot_supported():
- arch = platform.machine()
+# check cpu features
+def _has_cpu_feat(features):
+ cpu = codegen.llvm_get_system_cpu()
+ triple = codegen.llvm_get_system_triple()
+ target = "llvm -mtriple=%s -mcpu=%s" % (triple, cpu)
+ has_feat = codegen.target_has_features(features, tvm.target.Target(target))
- if arch not in ["arm64", "aarch64"]:
- return False
+ return has_feat
- if sys.platform.startswith("darwin"):
- cpu_info = subprocess.check_output("sysctl -a",
shell=True).strip().decode()
- for line in cpu_info.split("\n"):
- if line.startswith("hw.optional.arm.FEAT_DotProd"):
- return bool(int(line.split(":", 1)[1]))
- elif sys.platform.startswith("linux"):
- return True
- return False
-
-
-def _is_intel():
- # Only linux is supported for now.
- if sys.platform.startswith("linux"):
- with open("/proc/cpuinfo", "r") as content:
- return "Intel" in content.read()
-
- return False
-
-
-def _has_vnni():
- arch = platform.machine()
- # Only linux is supported for now.
- if arch == "x86_64" and sys.platform.startswith("linux"):
- with open("/proc/cpuinfo", "r") as content:
- return "avx512_vnni" in content.read()
-
- return False
-
-
-# check avx512 intrinsic groups for SkyLake X
-def _has_slavx512():
- # Check LLVM support
- llvm_version = tvm.target.codegen.llvm_version_major()
- is_llvm_support = llvm_version >= 8
- arch = platform.machine()
- # Only linux is supported for now.
- if arch == "x86_64" and sys.platform.startswith("linux"):
- with open("/proc/cpuinfo", "r") as content:
- ctx = content.read()
- check = (
- "avx512f" in ctx
- and "avx512cd" in ctx
- and "avx512bw" in ctx
- and "avx512dq" in ctx
- and "avx512vl" in ctx
- )
- return check and is_llvm_support
-
- return False
+requires_arm_dot = Feature(
+ "arm_dot",
+ "ARM dot product",
+ run_time_check=lambda: _has_cpu_feat("dotprod"),
+)
-requires_arm_dot = Feature("arm_dot", "ARM dot product",
run_time_check=_arm_dot_supported)
+requires_x86_vnni = Feature(
+ "x86_vnni",
+ "x86 VNNI Extensions",
+ run_time_check=lambda: (_has_cpu_feat("avx512vnni") or
_has_cpu_feat("avxvnni")),
+)
-requires_cascadelake = Feature(
- "cascadelake", "x86 CascadeLake", run_time_check=lambda: _has_vnni() and
_is_intel()
+requires_x86_avx512 = Feature(
+ "x86_avx512",
+ "x86 AVX512 Extensions",
+ run_time_check=lambda: _has_cpu_feat(
+ ["avx512bw", "avx512cd", "avx512dq", "avx512vl", "avx512f"]
+ ),
)
-requires_skylake_avx512 = Feature(
- "skylake_avx512",
- "x86 SkyLake AVX512",
- run_time_check=lambda: _has_slavx512() and _is_intel(),
+requires_x86_amx = Feature(
+ "x86_amx",
+ "x86 AMX Extensions",
+ run_time_check=lambda: _has_cpu_feat("amx-int8"),
)
diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
index 05a7df230f..7878121410 100644
--- a/src/target/llvm/llvm_module.cc
+++ b/src/target/llvm/llvm_module.cc
@@ -41,6 +41,7 @@
#include <llvm/IR/Module.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/FileSystem.h>
+#include <llvm/Support/Host.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
@@ -485,6 +486,30 @@
TVM_REGISTER_GLOBAL("target.llvm_get_intrinsic_name").set_body_typed([](int64_t
#endif
});
+TVM_REGISTER_GLOBAL("target.llvm_get_system_x86_vendor").set_body_typed([]()
-> String {
+#if TVM_LLVM_VERSION >= 120
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||
defined(_M_X64)
+ using namespace llvm::sys::detail::x86;
+ const auto x86_sign = getVendorSignature();
+ if (x86_sign == VendorSignatures::GENUINE_INTEL)
+ return "intel";
+ else if (x86_sign == VendorSignatures::AUTHENTIC_AMD)
+ return "amd";
+ else if (x86_sign == VendorSignatures::UNKNOWN)
+ return "unknown";
+#endif
+#endif
+ return "unimplemented";
+});
+
+TVM_REGISTER_GLOBAL("target.llvm_get_system_triple").set_body_typed([]() ->
String {
+ return llvm::sys::getDefaultTargetTriple();
+});
+
+TVM_REGISTER_GLOBAL("target.llvm_get_system_cpu").set_body_typed([]() ->
String {
+ return llvm::sys::getHostCPUName().str();
+});
+
TVM_REGISTER_GLOBAL("target.llvm_get_targets").set_body_typed([]() ->
Array<String> {
auto llvm_instance = std::make_unique<LLVMInstance>();
LLVMTargetInfo llvm_backend(*llvm_instance, "llvm");
diff --git a/tests/python/contrib/test_amx.py b/tests/python/contrib/test_amx.py
index 30da7e56fb..cd4f62cd62 100644
--- a/tests/python/contrib/test_amx.py
+++ b/tests/python/contrib/test_amx.py
@@ -27,8 +27,13 @@ import numpy as np
import pytest
[email protected]_llvm
[email protected]("skip due to AMX feature not avaliable yet")
+has_amx_runtime = pytest.mark.skipif(
+ not tvm.get_global_func("runtime.amx_init", True), reason="AMX runtime not
available"
+)
+
+
+@has_amx_runtime
[email protected]_x86_amx
def test_amx_u8s8s32_matmul_tensorize():
m = 1024
k = 1024
@@ -113,8 +118,8 @@ def test_amx_u8s8s32_matmul_tensorize():
tvm.testing.assert_allclose(y.numpy(), np.dot(a.astype("int32"),
b.T.astype("int32")), rtol=0)
[email protected]_llvm
[email protected]("skip due to AMX feature not avaliable yet")
+@has_amx_runtime
[email protected]_x86_amx
def test_amx_check_support():
amx_init = tvm.get_global_func("runtime.amx_init")
amx_tileconfig = tvm.get_global_func("runtime.amx_tileconfig")
diff --git a/tests/python/contrib/test_gemm_acc32_vnni.py
b/tests/python/contrib/test_gemm_acc32_vnni.py
index c01f7758cb..2e15d38612 100644
--- a/tests/python/contrib/test_gemm_acc32_vnni.py
+++ b/tests/python/contrib/test_gemm_acc32_vnni.py
@@ -97,7 +97,7 @@ def verify_fc_int8_acc32(m=1024, n=1024, k=1024, target="llvm
-mcpu=cascadelake"
# t_func.export_library("tensorize_acc32.o")
[email protected]_cascadelake
[email protected]_x86_vnni
def test_fc_int8_acc32_vnni():
# For LLVM < 8.0, it shows "'cascadelake' is not a recognized processor
for this target
# (ignoring processor)" error with the following setting. After LLVM 8.0
is enabled in the
@@ -105,7 +105,7 @@ def test_fc_int8_acc32_vnni():
verify_fc_int8_acc32()
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_fc_int8_acc32_avx512():
verify_fc_int8_acc32(target="llvm -mcpu=skylake-avx512")
diff --git a/tests/python/integration/test_auto_tensorize.py
b/tests/python/integration/test_auto_tensorize.py
index 8900a1ff6c..7831e5c8d7 100644
--- a/tests/python/integration/test_auto_tensorize.py
+++ b/tests/python/integration/test_auto_tensorize.py
@@ -287,12 +287,12 @@ def _test_bert_int8(relay_mod, params, input_info,
target, sch_rules, postprocs)
print(runtime.benchmark(dev, number=1, repeat=50).mean)
[email protected]_cascadelake
[email protected]_x86_vnni
def test_vnni_dense():
_test_dense("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI,
CASCADELAKE_VNNI_TARGET)
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_avx512_dense():
_test_dense("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI,
SKYLAKE_AVX512_TARGET)
@@ -310,12 +310,12 @@ def test_dp4a_dense():
# )
[email protected]_cascadelake
[email protected]_x86_vnni
def test_vnni_conv2d():
_test_conv2d("uint8", SCH_RULES_FOR_VNNI, POSTPROCS_FOR_VNNI,
CASCADELAKE_VNNI_TARGET)
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_avx512_conv2d():
_test_conv2d("uint8", SCH_RULES_FOR_AVX512, POSTPROCS_FOR_VNNI,
SKYLAKE_AVX512_TARGET)
@@ -333,7 +333,7 @@ def test_dp4a_conv2d():
# )
[email protected]_cascadelake
[email protected]_x86_vnni
@pytest.mark.skipif(tvm.testing.IS_IN_CI, reason="Slow on CI")
def test_vnni_bert_int8():
pytest.importorskip("onnx")
@@ -348,7 +348,7 @@ def test_vnni_bert_int8():
)
[email protected]_skylake_avx512
[email protected]_x86_avx512
@pytest.mark.skip("Due to quantized BERT download issue")
def test_avx512_bert_int8():
relay_mod, params, input_info = load_quantized_bert_base()
diff --git a/tests/python/relay/test_op_level1.py
b/tests/python/relay/test_op_level1.py
index e7def01923..ca8ffda9ba 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -846,13 +846,13 @@ def test_dense_amx_int8():
np.testing.assert_equal(out, ref)
[email protected]_cascadelake
[email protected]_x86_vnni
@pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
def test_dense_vnni(m, n, k):
dense_x86_test(m, n, k)
[email protected]_skylake_avx512
[email protected]_x86_avx512
@pytest.mark.parametrize("m,n,k", [(32, 128, 96), (32, 128, 97)])
def test_dense_skylake_avx512(m, n, k):
dense_x86_test(m, n, k, "llvm -mcpu=skylake-avx512", ["pmaddubs",
"pmaddw", "vpaddd"])
diff --git a/tests/python/relay/test_op_level10.py
b/tests/python/relay/test_op_level10.py
index 9db1bcf78b..6036f70712 100644
--- a/tests/python/relay/test_op_level10.py
+++ b/tests/python/relay/test_op_level10.py
@@ -568,7 +568,7 @@ def test_batch_matmul_amx(b, m, n, k):
np.testing.assert_equal(out, ref)
[email protected]_cascadelake
[email protected]_x86_vnni
@pytest.mark.parametrize(
"b,m,n,k",
[
@@ -581,7 +581,7 @@ def test_batch_matmul_vnni(b, m, n, k):
batch_matmul_x86_test(b, m, n, k)
[email protected]_skylake_avx512
[email protected]_x86_avx512
@pytest.mark.parametrize(
"b,m,n,k",
[
diff --git a/tests/python/relay/test_op_level2.py
b/tests/python/relay/test_op_level2.py
index bd984d32e6..cb78502178 100644
--- a/tests/python/relay/test_op_level2.py
+++ b/tests/python/relay/test_op_level2.py
@@ -2237,12 +2237,12 @@ def test_conv2d_int8_alter_dtype_arm():
)
[email protected]_cascadelake
[email protected]_x86_vnni
def test_conv2d_int8_alter_dtype_vnni():
_test_conv2d_int8_alter_dtype("int8", "llvm -mcpu=cascadelake",
["vpdpbusd"])
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_conv2d_int8_alter_dtype_avx512():
_test_conv2d_int8_alter_dtype(
"int8", "llvm -mcpu=skylake-avx512", ["pmaddubs", "pmaddw", "vpaddd"]
diff --git a/tests/python/target/test_llvm_features_info.py
b/tests/python/target/test_llvm_features_info.py
index 1be71331dd..edcbc891c9 100644
--- a/tests/python/target/test_llvm_features_info.py
+++ b/tests/python/target/test_llvm_features_info.py
@@ -30,7 +30,13 @@ def test_llvm_targets():
# check blank results
assert len(codegen.llvm_get_targets())
+ assert len(codegen.llvm_get_system_cpu())
+ assert len(codegen.llvm_get_system_triple())
+ assert len(codegen.llvm_get_system_x86_vendor())
# check ffi vs python
+ assert codegen.llvm_get_system_cpu() == _ffi_api.llvm_get_system_cpu()
+ assert codegen.llvm_get_system_triple() ==
_ffi_api.llvm_get_system_triple()
+ assert codegen.llvm_get_system_x86_vendor() ==
_ffi_api.llvm_get_system_x86_vendor()
assert str(codegen.llvm_get_targets()) == str(_ffi_api.llvm_get_targets())
# check LLVM target -mcpu legality
diff --git a/tests/python/unittest/test_meta_schedule_cpu_dot_product.py
b/tests/python/unittest/test_meta_schedule_cpu_dot_product.py
index 6dc72d6933..592c772a04 100644
--- a/tests/python/unittest/test_meta_schedule_cpu_dot_product.py
+++ b/tests/python/unittest/test_meta_schedule_cpu_dot_product.py
@@ -165,13 +165,13 @@ def schedule_16x4_dense_fn_database(target, intrin,
m=1024, n=1024, k=1024):
f_check(lib, dev)
[email protected]_cascadelake
[email protected]_x86_vnni
def test_vnni_schedule_fn_database():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake
-num-cores=4")
schedule_16x4_dense_fn_database(target, VNNI_INTRIN)
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_avx512_schedule_fn_database():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512
-num-cores=4")
schedule_16x4_dense_fn_database(target, AVX512_INTRIN, 16, 16, 16)
@@ -255,13 +255,13 @@ def schedule_16x4_dense_fn_tune(target, intrin, m=1024,
n=1024, k=1024):
f_check(lib, dev)
[email protected]_cascadelake
[email protected]_x86_vnni
def test_vnni_schedule_fn_tune():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=cascadelake
-num-cores=4")
schedule_16x4_dense_fn_tune(target, VNNI_INTRIN)
[email protected]_skylake_avx512
[email protected]_x86_avx512
def test_avx512_schedule_fn_tune():
target = tvm.target.Target("llvm -keys=x86,cpu -mcpu=skylake-avx512
-num-cores=4")
schedule_16x4_dense_fn_tune(target, AVX512_INTRIN, 16, 16, 16)