Cookiee235 opened a new issue, #17311:
URL: https://github.com/apache/tvm/issues/17311
### Actual behavior
```
Segmentation fault (core dumped)
```
### Steps to reproduce
```
import tvm
from tvm import relax
import numpy as np
from tvm.script import ir as I
from tvm.script import tir as T
from tvm.script import relax as R
@I.ir_module
class Module:
@T.prim_func(private=True)
def layer_norm(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64),
T.int64(64)), "float32"), gamma: T.Buffer((T.int64(64), T.int64(64)),
"float32"), beta: T.Buffer((T.int64(64), T.int64(64)), "float32"),
T_layer_norm: T.Buffer((T.int64(1), T.int64(512), T.int64(64), T.int64(64)),
"float32")):
T.func_attr({"op_pattern": 4})
# with T.block("root"):
rxplaceholder_red_temp_v0 = T.alloc_buffer((T.int64(64),
T.int64(64)))
rxplaceholder_red_temp_v1 = T.alloc_buffer((T.int64(64),
T.int64(64)))
for i0, i1, i2, i3 in T.grid(T.int64(1), T.int64(512), T.int64(64),
T.int64(64)):
with T.block("rxplaceholder_red_temp"):
ax0, ax1, k2, k3 = T.axis.remap("SSRR", [i0, i1, i2, i3])
T.reads(A[ax0, ax1, k2, k3])
T.writes(rxplaceholder_red_temp_v0[ax0, ax1],
rxplaceholder_red_temp_v1[ax0, ax1])
with T.init():
rxplaceholder_red_temp_v0[ax0, ax1] = T.float32(0)
rxplaceholder_red_temp_v1[ax0, ax1] = T.float32(0)
v_rxplaceholder_red_temp_v0: T.float32 =
rxplaceholder_red_temp_v0[ax0, ax1] + A[ax0, ax1, k2, k3]
v_rxplaceholder_red_temp_v1: T.float32 =
rxplaceholder_red_temp_v1[ax0, ax1] + A[ax0, ax1, k2, k3] * A[ax0, ax1, k2, k3]
rxplaceholder_red_temp_v0[ax0, ax1] =
v_rxplaceholder_red_temp_v0
rxplaceholder_red_temp_v1[ax0, ax1] =
v_rxplaceholder_red_temp_v1
for i0, i1, i2, i3 in T.grid(T.int64(1), T.int64(512), T.int64(64),
T.int64(64)):
with T.block("T_layer_norm"):
ax0, ax1, ax2, ax3 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(A[ax0, ax1, ax2, ax3],
rxplaceholder_red_temp_v0[ax0, ax1], rxplaceholder_red_temp_v1[ax0, ax1],
gamma[ax2, ax3], beta[ax2, ax3])
T.writes(T_layer_norm[ax0, ax1, ax2, ax3])
T_layer_norm[ax0, ax1, ax2, ax3] = (A[ax0, ax1, ax2, ax3] -
rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003)) *
T.rsqrt(rxplaceholder_red_temp_v1[ax0, ax1] * T.float32(0.050000000000000003) -
rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003) *
(rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003)) +
T.float32(1.0000000000000001e-05)) * gamma[ax2, ax3] + beta[ax2, ax3]
@T.prim_func(private=True)
def relu(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64),
T.int64(64)), "float32"), B: T.Buffer((T.int64(1), T.int64(512), T.int64(64),
T.int64(64)), "float32")):
T.func_attr({"op_pattern": 0})
# with T.block("root"):
for i0, i1, i2, i3 in T.grid(T.int64(1), T.int64(512), T.int64(64),
T.int64(64)):
with T.block("relu"):
v_i0, v_i1, v_i2, v_i3 = T.axis.remap("SSSS", [i0, i1, i2,
i3])
T.reads(A[v_i0, v_i1, v_i2, v_i3])
T.writes(B[v_i0, v_i1, v_i2, v_i3])
B[v_i0, v_i1, v_i2, v_i3] = T.max(A[v_i0, v_i1, v_i2, v_i3],
T.float32(0))
@R.function(private=True)
def fused_layer_norm_relu(x: R.Tensor((1, 512, 64, 64),
dtype="float32"), mean: R.Tensor((64, 64), dtype="float32"), var: R.Tensor((64,
64), dtype="float32")) -> R.Tensor((1, 512, 64, 64), dtype="float32"):
R.func_attr({"Primitive": 1})
cls = Module
with R.dataflow():
gv0 = R.call_tir(cls.layer_norm, (x, mean, var),
out_sinfo=R.Tensor((1, 512, 64, 64)))
gv = R.call_tir(cls.relu, (gv0,), out_sinfo=R.Tensor((1, 512,
64, 64), dtype="float32"))
R.output(gv)
return gv
@R.function
def main(x: R.Tensor((1, 512, 64, 64), dtype="float32"), mean:
R.Tensor((64, 64), dtype="float32"), var: R.Tensor((64, 64), dtype="float32"))
-> R.Tensor((1, 512, 64, 64), dtype="float32"):
cls = Module
with R.dataflow():
gv: R.Tensor((1, 512, 64, 64), dtype="float32") =
cls.fused_layer_norm_relu(x, mean, var)
R.output(gv)
return gv
mod = Module
mod = relax.transform.FuseTIR()(mod)
def compile_mod(mod, func_name, target, *inputs):
ex = relax.build(mod, target='llvm')
vm = relax.VirtualMachine(ex, tvm.cpu())
mod_outputs = vm[f'{func_name}'](*inputs) #segfault
input_0 = tvm.nd.array(10 * np.random.random([1, 512, 64,
64]).astype('float32'))
input_1 = tvm.nd.array(10 * np.random.random([64, 64]).astype('float32'))
input_2 = tvm.nd.array(10 * np.random.random([64, 64]).astype('float32'))
compile_mod(mod, 'main', 'llvm', input_0,input_1,input_2)
```
CC @Lunderberg @vinx13
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]