zxybazh commented on PR #13203:
URL: https://github.com/apache/tvm/pull/13203#issuecomment-1293861231
My validation script is 75% done with the database scanning, it located a
output issue as follows:
```python
Python Environment
TVM version = 0.11.dev0
Python version = 3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0] (64
bit)
os.uname() = Linux 5.18.10-76051810-generic
#202207071639~1657252310~21.10~7d5e891 SMP PREEMPT_DYNAMIC Fri J x86_64
CMake Options:
{
"BACKTRACE_ON_SEGFAULT": "OFF",
"BUILD_STATIC_RUNTIME": "OFF",
"COMPILER_RT_PATH": "3rdparty/compiler-rt",
"CUDA_VERSION": "NOT-FOUND",
"DLPACK_PATH": "3rdparty/dlpack/include",
"DMLC_PATH": "3rdparty/dmlc-core/include",
"GIT_COMMIT_HASH": "de13c2b8f13e12540d6eabdbef5e38c66c2d43f9",
"GIT_COMMIT_TIME": "2022-10-11 16:50:24 -0700",
"HIDE_PRIVATE_SYMBOLS": "ON",
"INDEX_DEFAULT_I64": "ON",
"INSTALL_DEV": "OFF",
"LLVM_VERSION": "12.0.1",
"PICOJSON_PATH": "3rdparty/picojson",
"RANG_PATH": "3rdparty/rang/include",
"ROCM_PATH": "/opt/rocm",
"SUMMARIZE": "OFF",
"TVM_CXX_COMPILER_PATH": "/usr/bin/c++",
"USE_ALTERNATIVE_LINKER": "AUTO",
"USE_AOT_EXECUTOR": "ON",
"USE_ARM_COMPUTE_LIB": "OFF",
"USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR": "OFF",
"USE_BLAS": "none",
"USE_BNNS": "OFF",
"USE_BYODT_POSIT": "OFF",
"USE_CCACHE": "AUTO",
"USE_CLML": "OFF",
"USE_CLML_GRAPH_EXECUTOR": "OFF",
"USE_CMSISNN": "OFF",
"USE_COREML": "OFF",
"USE_CPP_RPC": "OFF",
"USE_CUBLAS": "OFF",
"USE_CUDA": "/usr/local/cuda-11.8/",
"USE_CUDNN": "ON",
"USE_CURAND": "ON",
"USE_CUSTOM_LOGGING": "OFF",
"USE_CUTLASS": "OFF",
"USE_DNNL": "OFF",
"USE_ETHOSN": "OFF",
"USE_FALLBACK_STL_MAP": "OFF",
"USE_GRAPH_EXECUTOR": "ON",
"USE_GRAPH_EXECUTOR_CUDA_GRAPH": "OFF",
"USE_GTEST": "AUTO",
"USE_HEXAGON": "OFF",
"USE_HEXAGON_GTEST": "/path/to/hexagon/gtest",
"USE_HEXAGON_RPC": "OFF",
"USE_HEXAGON_SDK": "/path/to/sdk",
"USE_IOS_RPC": "OFF",
"USE_KHRONOS_SPIRV": "OFF",
"USE_LIBBACKTRACE": "ON",
"USE_LIBTORCH": "OFF",
"USE_LLVM": "llvm-config-12 --link-static",
"USE_METAL": "OFF",
"USE_MICRO": "OFF",
"USE_MICRO_STANDALONE_RUNTIME": "OFF",
"USE_MIOPEN": "OFF",
"USE_MKL": "OFF",
"USE_MSVC_MT": "OFF",
"USE_NNPACK": "OFF",
"USE_OPENCL": "OFF",
"USE_OPENCL_GTEST": "/path/to/opencl/gtest",
"USE_OPENMP": "none",
"USE_PAPI": "OFF",
"USE_PROFILER": "ON",
"USE_PT_TVMDSOOP": "OFF",
"USE_RANDOM": "ON",
"USE_RELAY_DEBUG": "OFF",
"USE_ROCBLAS": "OFF",
"USE_ROCM": "OFF",
"USE_RPC": "ON",
"USE_RTTI": "ON",
"USE_RUST_EXT": "OFF",
"USE_SORT": "ON",
"USE_SPIRV_KHR_INTEGER_DOT_PRODUCT": "OFF",
"USE_STACKVM_RUNTIME": "OFF",
"USE_TARGET_ONNX": "OFF",
"USE_TENSORFLOW_PATH": "none",
"USE_TENSORRT_CODEGEN": "OFF",
"USE_TENSORRT_RUNTIME": "OFF",
"USE_TFLITE": "OFF",
"USE_TF_TVMDSOOP": "OFF",
"USE_THREADS": "ON",
"USE_THRUST": "OFF",
"USE_UMA": "OFF",
"USE_VITIS_AI": "OFF",
"USE_VULKAN": "OFF"
}
Validation failed!
Original Result:
------------------------------
[array([[[[5.72744668e-01, 7.18730569e-01, 3.86069208e-01, ...,
6.36127651e-01, 3.42450917e-01, 6.26292646e-01],
[5.07221401e-01, 6.67694628e-01, 6.95775867e-01, ...,
1.69036478e-01, 3.30536723e-01, 8.47398460e-01],
[6.99420631e-01, 4.91819829e-01, 4.23442200e-02, ...,
3.29874530e-02, 9.62956905e-01, 7.91969299e-02],
...,
[1.04226880e-01, 1.03568032e-01, 9.26714301e-01, ...,
4.48888540e-01, 8.36172879e-01, 3.13173562e-01],
[7.50210702e-01, 2.54800111e-01, 1.09177884e-02, ...,
1.56607479e-01, 3.70823532e-01, 3.37622017e-01],
[7.69818544e-01, 8.03145647e-01, 8.53710115e-01, ...,
8.41390789e-01, 7.99091876e-01, 4.94071782e-01]],
[[2.96491496e-02, 7.26268515e-02, 5.70003271e-01, ...,
9.57559466e-01, 5.24076641e-01, 4.91699517e-01],
[3.92443568e-01, 5.57050645e-01, 8.80203545e-01, ...,
5.83786547e-01, 3.47498745e-01, 8.52630854e-01],
[4.95827943e-01, 8.03838596e-02, 5.51400764e-04, ...,
8.94184530e-01, 3.35244745e-01, 3.38703752e-01],
...,
[6.56917810e-01, 1.26269087e-01, 8.86820674e-01, ...,
6.85630202e-01, 9.85439941e-02, 4.55999106e-01],
[3.23541015e-01, 1.84623320e-02, 8.43342781e-01, ...,
3.28674354e-02, 5.02927899e-01, 5.63053966e-01],
[4.82641309e-01, 2.05485225e-01, 8.46390903e-01, ...,
2.58696824e-01, 3.78653497e-01, 9.69791532e-01]],
[[7.06747532e-01, 5.55829778e-02, 5.45281311e-03, ...,
3.90011668e-01, 6.52581513e-01, 8.07841599e-01],
[9.54571843e-01, 5.93737781e-01, 6.66372925e-02, ...,
2.76699543e-01, 4.51010227e-01, 6.19255245e-01],
[9.23920989e-01, 5.14647663e-01, 5.07072866e-01, ...,
4.65018183e-01, 6.15873396e-01, 1.42470049e-02],
...,
[2.12811157e-02, 8.15052330e-01, 1.74715400e-01, ...,
1.05450161e-01, 4.68538821e-01, 4.58943486e-01],
[4.07276303e-01, 5.28039783e-02, 8.38160872e-01, ...,
6.23646200e-01, 6.84982300e-01, 1.00896932e-01],
[4.61004347e-01, 1.36735559e-01, 9.95546460e-01, ...,
6.11530840e-01, 7.54720032e-01, 9.53860104e-01]],
...,
[[1.43743232e-01, 2.51940161e-01, 6.18409634e-01, ...,
5.12220979e-01, 3.78941447e-01, 5.40351331e-01],
[9.49022174e-01, 2.41517842e-01, 8.94808948e-01, ...,
5.62952697e-01, 1.95407435e-01, 8.04692864e-01],
[9.91206408e-01, 8.40037465e-01, 1.82156593e-01, ...,
6.58586383e-01, 8.50752592e-01, 6.06926322e-01],
...,
[4.82302070e-01, 3.43226075e-01, 7.40178943e-01, ...,
7.15291500e-01, 3.06362569e-01, 8.86868775e-01],
[2.14953542e-01, 6.36601508e-01, 9.79079902e-01, ...,
7.39796996e-01, 6.09936953e-01, 9.28644717e-01],
[6.24535918e-01, 8.58304977e-01, 4.45496649e-01, ...,
5.76976717e-01, 9.26244795e-01, 7.05472887e-01]],
[[6.18884146e-01, 7.97822356e-01, 1.13503449e-01, ...,
5.87406337e-01, 6.84295893e-01, 8.60077977e-01],
[7.70183623e-01, 8.85573864e-01, 1.15071729e-01, ...,
6.88610494e-01, 2.41290748e-01, 5.79306781e-01],
[6.62524700e-01, 6.61418855e-01, 2.52471745e-01, ...,
8.70325685e-01, 9.78237271e-01, 1.96542338e-01],
...,
[1.54784843e-01, 5.60696065e-01, 6.62471175e-01, ...,
7.74010867e-02, 1.57814428e-01, 7.70231962e-01],
[9.37767804e-01, 8.79584551e-01, 3.82422209e-02, ...,
1.49532810e-01, 3.32031935e-01, 3.79795879e-01],
[2.86293894e-01, 9.98479843e-01, 2.83505768e-01, ...,
5.33205092e-01, 1.32817820e-01, 7.94363499e-01]],
[[4.13194709e-02, 9.04073894e-01, 6.47442758e-01, ...,
6.11195028e-01, 8.23427618e-01, 6.89852118e-01],
[3.63598973e-01, 4.36859369e-01, 8.90244484e-01, ...,
1.55881215e-02, 2.25598961e-01, 4.75840479e-01],
[4.32084113e-01, 5.32979429e-01, 6.87524796e-01, ...,
8.47668409e-01, 7.01093972e-01, 6.78106546e-01],
...,
[4.83861417e-02, 7.38014996e-01, 4.82638367e-03, ...,
4.27871734e-01, 9.44737613e-01, 8.88450384e-01],
[7.25017190e-01, 4.12823766e-01, 8.73411059e-01, ...,
9.05240178e-01, 4.87419277e-01, 1.37596279e-01],
[9.97225583e-01, 3.72083306e-01, 2.36607585e-02, ...,
1.12125196e-01, 7.89207041e-01, 9.78419363e-01]]]],
dtype=float32), array([[[[0.5403995 , 0.47514713, 0.5087983 , ...,
0.4753055 ,
0.54209375, 0.5675313 ]]]], dtype=float32)]
Scheduled Result:
------------------------------
[array([[[[5.72744668e-01, 7.18730569e-01, 3.86069208e-01, ...,
6.36127651e-01, 3.42450917e-01, 6.26292646e-01],
[5.07221401e-01, 6.67694628e-01, 6.95775867e-01, ...,
1.69036478e-01, 3.30536723e-01, 8.47398460e-01],
[6.99420631e-01, 4.91819829e-01, 4.23442200e-02, ...,
3.29874530e-02, 9.62956905e-01, 7.91969299e-02],
...,
[1.04226880e-01, 1.03568032e-01, 9.26714301e-01, ...,
4.48888540e-01, 8.36172879e-01, 3.13173562e-01],
[7.50210702e-01, 2.54800111e-01, 1.09177884e-02, ...,
1.56607479e-01, 3.70823532e-01, 3.37622017e-01],
[7.69818544e-01, 8.03145647e-01, 8.53710115e-01, ...,
8.41390789e-01, 7.99091876e-01, 4.94071782e-01]],
[[2.96491496e-02, 7.26268515e-02, 5.70003271e-01, ...,
9.57559466e-01, 5.24076641e-01, 4.91699517e-01],
[3.92443568e-01, 5.57050645e-01, 8.80203545e-01, ...,
5.83786547e-01, 3.47498745e-01, 8.52630854e-01],
[4.95827943e-01, 8.03838596e-02, 5.51400764e-04, ...,
8.94184530e-01, 3.35244745e-01, 3.38703752e-01],
...,
[6.56917810e-01, 1.26269087e-01, 8.86820674e-01, ...,
6.85630202e-01, 9.85439941e-02, 4.55999106e-01],
[3.23541015e-01, 1.84623320e-02, 8.43342781e-01, ...,
3.28674354e-02, 5.02927899e-01, 5.63053966e-01],
[4.82641309e-01, 2.05485225e-01, 8.46390903e-01, ...,
2.58696824e-01, 3.78653497e-01, 9.69791532e-01]],
[[7.06747532e-01, 5.55829778e-02, 5.45281311e-03, ...,
3.90011668e-01, 6.52581513e-01, 8.07841599e-01],
[9.54571843e-01, 5.93737781e-01, 6.66372925e-02, ...,
2.76699543e-01, 4.51010227e-01, 6.19255245e-01],
[9.23920989e-01, 5.14647663e-01, 5.07072866e-01, ...,
4.65018183e-01, 6.15873396e-01, 1.42470049e-02],
...,
[2.12811157e-02, 8.15052330e-01, 1.74715400e-01, ...,
1.05450161e-01, 4.68538821e-01, 4.58943486e-01],
[4.07276303e-01, 5.28039783e-02, 8.38160872e-01, ...,
6.23646200e-01, 6.84982300e-01, 1.00896932e-01],
[4.61004347e-01, 1.36735559e-01, 9.95546460e-01, ...,
6.11530840e-01, 7.54720032e-01, 9.53860104e-01]],
...,
[[1.43743232e-01, 2.51940161e-01, 6.18409634e-01, ...,
5.12220979e-01, 3.78941447e-01, 5.40351331e-01],
[9.49022174e-01, 2.41517842e-01, 8.94808948e-01, ...,
5.62952697e-01, 1.95407435e-01, 8.04692864e-01],
[9.91206408e-01, 8.40037465e-01, 1.82156593e-01, ...,
6.58586383e-01, 8.50752592e-01, 6.06926322e-01],
...,
[4.82302070e-01, 3.43226075e-01, 7.40178943e-01, ...,
7.15291500e-01, 3.06362569e-01, 8.86868775e-01],
[2.14953542e-01, 6.36601508e-01, 9.79079902e-01, ...,
7.39796996e-01, 6.09936953e-01, 9.28644717e-01],
[6.24535918e-01, 8.58304977e-01, 4.45496649e-01, ...,
5.76976717e-01, 9.26244795e-01, 7.05472887e-01]],
[[6.18884146e-01, 7.97822356e-01, 1.13503449e-01, ...,
5.87406337e-01, 6.84295893e-01, 8.60077977e-01],
[7.70183623e-01, 8.85573864e-01, 1.15071729e-01, ...,
6.88610494e-01, 2.41290748e-01, 5.79306781e-01],
[6.62524700e-01, 6.61418855e-01, 2.52471745e-01, ...,
8.70325685e-01, 9.78237271e-01, 1.96542338e-01],
...,
[1.54784843e-01, 5.60696065e-01, 6.62471175e-01, ...,
7.74010867e-02, 1.57814428e-01, 7.70231962e-01],
[9.37767804e-01, 8.79584551e-01, 3.82422209e-02, ...,
1.49532810e-01, 3.32031935e-01, 3.79795879e-01],
[2.86293894e-01, 9.98479843e-01, 2.83505768e-01, ...,
5.33205092e-01, 1.32817820e-01, 7.94363499e-01]],
[[4.13194709e-02, 9.04073894e-01, 6.47442758e-01, ...,
6.11195028e-01, 8.23427618e-01, 6.89852118e-01],
[3.63598973e-01, 4.36859369e-01, 8.90244484e-01, ...,
1.55881215e-02, 2.25598961e-01, 4.75840479e-01],
[4.32084113e-01, 5.32979429e-01, 6.87524796e-01, ...,
8.47668409e-01, 7.01093972e-01, 6.78106546e-01],
...,
[4.83861417e-02, 7.38014996e-01, 4.82638367e-03, ...,
4.27871734e-01, 9.44737613e-01, 8.88450384e-01],
[7.25017190e-01, 4.12823766e-01, 8.73411059e-01, ...,
9.05240178e-01, 4.87419277e-01, 1.37596279e-01],
[9.97225583e-01, 3.72083306e-01, 2.36607585e-02, ...,
1.12125196e-01, 7.89207041e-01, 9.78419363e-01]]]],
dtype=float32), array([[[[0.0098429 , 0.00700461, 0.00787896, ...,
0.01298219,
0.00698879, 0.01278148]]]], dtype=float32)]
Input:
------------------------------
[array([[[[5.72744668e-01, 7.18730569e-01, 3.86069208e-01, ...,
6.36127651e-01, 3.42450917e-01, 6.26292646e-01],
[5.07221401e-01, 6.67694628e-01, 6.95775867e-01, ...,
1.69036478e-01, 3.30536723e-01, 8.47398460e-01],
[6.99420631e-01, 4.91819829e-01, 4.23442200e-02, ...,
3.29874530e-02, 9.62956905e-01, 7.91969299e-02],
...,
[1.04226880e-01, 1.03568032e-01, 9.26714301e-01, ...,
4.48888540e-01, 8.36172879e-01, 3.13173562e-01],
[7.50210702e-01, 2.54800111e-01, 1.09177884e-02, ...,
1.56607479e-01, 3.70823532e-01, 3.37622017e-01],
[7.69818544e-01, 8.03145647e-01, 8.53710115e-01, ...,
8.41390789e-01, 7.99091876e-01, 4.94071782e-01]],
[[2.96491496e-02, 7.26268515e-02, 5.70003271e-01, ...,
9.57559466e-01, 5.24076641e-01, 4.91699517e-01],
[3.92443568e-01, 5.57050645e-01, 8.80203545e-01, ...,
5.83786547e-01, 3.47498745e-01, 8.52630854e-01],
[4.95827943e-01, 8.03838596e-02, 5.51400764e-04, ...,
8.94184530e-01, 3.35244745e-01, 3.38703752e-01],
...,
[6.56917810e-01, 1.26269087e-01, 8.86820674e-01, ...,
6.85630202e-01, 9.85439941e-02, 4.55999106e-01],
[3.23541015e-01, 1.84623320e-02, 8.43342781e-01, ...,
3.28674354e-02, 5.02927899e-01, 5.63053966e-01],
[4.82641309e-01, 2.05485225e-01, 8.46390903e-01, ...,
2.58696824e-01, 3.78653497e-01, 9.69791532e-01]],
[[7.06747532e-01, 5.55829778e-02, 5.45281311e-03, ...,
3.90011668e-01, 6.52581513e-01, 8.07841599e-01],
[9.54571843e-01, 5.93737781e-01, 6.66372925e-02, ...,
2.76699543e-01, 4.51010227e-01, 6.19255245e-01],
[9.23920989e-01, 5.14647663e-01, 5.07072866e-01, ...,
4.65018183e-01, 6.15873396e-01, 1.42470049e-02],
...,
[2.12811157e-02, 8.15052330e-01, 1.74715400e-01, ...,
1.05450161e-01, 4.68538821e-01, 4.58943486e-01],
[4.07276303e-01, 5.28039783e-02, 8.38160872e-01, ...,
6.23646200e-01, 6.84982300e-01, 1.00896932e-01],
[4.61004347e-01, 1.36735559e-01, 9.95546460e-01, ...,
6.11530840e-01, 7.54720032e-01, 9.53860104e-01]],
...,
[[1.43743232e-01, 2.51940161e-01, 6.18409634e-01, ...,
5.12220979e-01, 3.78941447e-01, 5.40351331e-01],
[9.49022174e-01, 2.41517842e-01, 8.94808948e-01, ...,
5.62952697e-01, 1.95407435e-01, 8.04692864e-01],
[9.91206408e-01, 8.40037465e-01, 1.82156593e-01, ...,
6.58586383e-01, 8.50752592e-01, 6.06926322e-01],
...,
[4.82302070e-01, 3.43226075e-01, 7.40178943e-01, ...,
7.15291500e-01, 3.06362569e-01, 8.86868775e-01],
[2.14953542e-01, 6.36601508e-01, 9.79079902e-01, ...,
7.39796996e-01, 6.09936953e-01, 9.28644717e-01],
[6.24535918e-01, 8.58304977e-01, 4.45496649e-01, ...,
5.76976717e-01, 9.26244795e-01, 7.05472887e-01]],
[[6.18884146e-01, 7.97822356e-01, 1.13503449e-01, ...,
5.87406337e-01, 6.84295893e-01, 8.60077977e-01],
[7.70183623e-01, 8.85573864e-01, 1.15071729e-01, ...,
6.88610494e-01, 2.41290748e-01, 5.79306781e-01],
[6.62524700e-01, 6.61418855e-01, 2.52471745e-01, ...,
8.70325685e-01, 9.78237271e-01, 1.96542338e-01],
...,
[1.54784843e-01, 5.60696065e-01, 6.62471175e-01, ...,
7.74010867e-02, 1.57814428e-01, 7.70231962e-01],
[9.37767804e-01, 8.79584551e-01, 3.82422209e-02, ...,
1.49532810e-01, 3.32031935e-01, 3.79795879e-01],
[2.86293894e-01, 9.98479843e-01, 2.83505768e-01, ...,
5.33205092e-01, 1.32817820e-01, 7.94363499e-01]],
[[4.13194709e-02, 9.04073894e-01, 6.47442758e-01, ...,
6.11195028e-01, 8.23427618e-01, 6.89852118e-01],
[3.63598973e-01, 4.36859369e-01, 8.90244484e-01, ...,
1.55881215e-02, 2.25598961e-01, 4.75840479e-01],
[4.32084113e-01, 5.32979429e-01, 6.87524796e-01, ...,
8.47668409e-01, 7.01093972e-01, 6.78106546e-01],
...,
[4.83861417e-02, 7.38014996e-01, 4.82638367e-03, ...,
4.27871734e-01, 9.44737613e-01, 8.88450384e-01],
[7.25017190e-01, 4.12823766e-01, 8.73411059e-01, ...,
9.05240178e-01, 4.87419277e-01, 1.37596279e-01],
[9.97225583e-01, 3.72083306e-01, 2.36607585e-02, ...,
1.12125196e-01, 7.89207041e-01, 9.78419363e-01]]]],
dtype=float32), array([[[[0.09280316, 0.12370128, 0.09142544, ...,
0.7100446 ,
0.5562939 , 0.7813125 ]]]], dtype=float32)]
Original IRModule:
------------------------------
# from tvm.script import tir as T
@tvm.script.ir_module
class Module:
@T.prim_func
def main(p0: T.Buffer[(1, 7, 7, 2048), "float32"], tensor: T.Buffer[(1,
1, 1, 2048), "float32"]) -> None:
# function attr dict
T.func_attr({"tir.noalias": True, "global_symbol": "main"})
# body
# with T.block("root")
tensor_1 = T.alloc_buffer([1, 1, 1, 2048], dtype="float32")
for i0, i1, i2, i3, i4, i5 in T.grid(1, 1, 1, 2048, 7, 7):
with T.block("tensor"):
ax0, ax1, ax2, ax3, rv0, rv1 = T.axis.remap("SSSSRR", [i0,
i1, i2, i3, i4, i5])
T.reads(p0[ax0, ax1 * 7 + rv0, ax2 * 7 + rv1, ax3])
T.writes(tensor_1[ax0, ax1, ax2, ax3])
with T.init():
tensor_1[ax0, ax1, ax2, ax3] = T.float32(0)
tensor_1[ax0, ax1, ax2, ax3] = tensor_1[ax0, ax1, ax2, ax3]
+ p0[ax0, ax1 * 7 + rv0, ax2 * 7 + rv1, ax3]
for i0, i1, i2, i3 in T.grid(1, 1, 1, 2048):
with T.block("tensor_1"):
ax0, ax1, ax2, ax3 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(tensor_1[ax0, ax1, ax2, ax3])
T.writes(tensor[ax0, ax1, ax2, ax3])
tensor[ax0, ax1, ax2, ax3] = tensor_1[ax0, ax1, ax2, ax3] *
T.float32(0.020408163265306121)
Scheduled IRModule:
------------------------------
# from tvm.script import tir as T
@tvm.script.ir_module
class Module:
@T.prim_func
def main(p0: T.Buffer[(1, 7, 7, 2048), "float32"], tensor: T.Buffer[(1,
1, 1, 2048), "float32"]) -> None:
# function attr dict
T.func_attr({"tir.noalias": True, "global_symbol": "main"})
# body
# with T.block("root")
tensor_shared = T.alloc_buffer([1, 1, 1, 2048], dtype="float32",
scope="shared")
for i0_i1_i2_i3_0_fused in T.thread_binding(32, thread="blockIdx.x",
annotations={"pragma_auto_unroll_max_step":T.int64(16),
"pragma_unroll_explicit":T.int64(1)}):
for ax0, ax1, ax2, ax3, ax4_ax5_fused_0 in T.grid(1, 1, 1, 64,
1):
for ax4_ax5_fused_1 in T.thread_binding(64,
thread="threadIdx.x"):
with T.block("tensor"):
T.where(ax4_ax5_fused_0 * 64 + ax4_ax5_fused_1 < 49)
ax0_1, ax1_1, ax2_1 = T.axis.remap("SSS", [ax0, ax1,
ax2])
ax3_1 = T.axis.spatial(2048, i0_i1_i2_i3_0_fused *
64 + ax3)
rv0 = T.axis.reduce(7, (ax4_ax5_fused_0 * 64 +
ax4_ax5_fused_1) // 7)
rv1 = T.axis.reduce(7, (ax4_ax5_fused_0 * 64 +
ax4_ax5_fused_1) % 7)
T.reads(p0[ax0_1, ax1_1 * 7 + rv0, ax2_1 * 7 + rv1,
ax3_1])
T.writes(tensor_shared[ax0_1, ax1_1, ax2_1, ax3_1])
with T.init():
tensor_shared[ax0_1, ax1_1, ax2_1, ax3_1] =
T.float32(0)
tensor_shared[ax0_1, ax1_1, ax2_1, ax3_1] =
tensor_shared[ax0_1, ax1_1, ax2_1, ax3_1] + p0[ax0_1, ax1_1 * 7 + rv0, ax2_1 *
7 + rv1, ax3_1]
for i3_1 in T.thread_binding(64, thread="threadIdx.x"):
with T.block("tensor_1"):
ax0 = T.axis.spatial(1, 0)
ax1 = T.axis.spatial(1, 0)
ax2 = T.axis.spatial(1, 0)
ax3 = T.axis.spatial(2048, i0_i1_i2_i3_0_fused * 64 +
i3_1)
T.reads(tensor_shared[ax0, ax1, ax2, ax3])
T.writes(tensor[ax0, ax1, ax2, ax3])
tensor[ax0, ax1, ax2, ax3] = tensor_shared[ax0, ax1,
ax2, ax3] * T.float32(0.020408163265306121)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]