masahi commented on pull request #7425:
URL: https://github.com/apache/tvm/pull/7425#issuecomment-779798238
@tqchen @vinx13 @junrushao1994 Does the behavior of `While` node wrt
`StorageRewrite` below look reasonable?
In the following IR, "A" and "B" buffers, which are allocated in `For` loop,
are coalesced into a one buffer, but "C" buffer, which is allocated inside
`While` loop, is not:
```
def test_parallel_alloc():
ib = tvm.tir.ir_builder.create()
n = te.var("n")
with ib.for_range(0, n, name="i", kind="parallel") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", n, name="A", scope="global")
A[j] = A[j] + 2
with ib.for_range(0, 10, name="j") as j:
B = ib.allocate("float32", n, name="B", scope="global")
B[j] = B[j] + 2
i = ib.allocate("int32", (1,), name="i", scope="local")
i[0] = 1
with ib.while_loop(i[0] < 10):
C = ib.allocate("float32", n, name="C", scope="local")
C[i[0]] = C[i[0]] + 2
i[0] += 1
```
```
parallel (i, 0, n) {
// attr [A] storage_scope = "global"
allocate A[float32 * n]
// attr [i] storage_scope = "local"
allocate i[int32 * 1]
// attr [C] storage_scope = "local"
allocate C[float32 * n]
for (j, 0, 10) {
A[j] = (A[j] + 2f)
}
for (j, 0, 10) {
A[j] = (A[j] + 2f)
}
i[0] = 1
while((i[0] < 10)){
C[i[0]] = (C[i[0]] + 2f)
i[0] = (i[0] + 1)
}
}
```
In the following IR, all buffers, including the one allocated inside `While`
loop, are coalesced:
```
def test_alloc_seq():
scope_tb = "local.L0A"
max_bits = 1024 * 1024 * 1024
register_mem(scope_tb, max_bits)
ib = tvm.tir.ir_builder.create()
n = te.var("n")
with ib.for_range(0, n, name="i") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", 200, name="A", scope=scope_tb)
A[j] = 1.2
with ib.for_range(0, 10, name="j") as j:
B = ib.allocate("float32", 200, name="B", scope=scope_tb)
B[j] = 1.3
i = ib.allocate("int32", (1,), name="i", scope="local")
i[0] = 1
with ib.while_loop(i[0] < 10):
C = ib.allocate("float32", 200, name="C", scope=scope_tb)
C[i[0]] = 1.4
i[0] += 1
body = ib.get()
```
```
// attr [A] storage_scope = "local.L0A"
allocate A[float32 * 200]
// attr [i] storage_scope = "local"
allocate i[int32 * 1]
for (i, 0, n) {
for (j, 0, 10) {
A[j] = 1.2f
}
for (j, 0, 10) {
A[j] = 1.3f
}
i[0] = 1
while((i[0] < 10)){
A[i[0]] = 1.4f
i[0] = (i[0] + 1)
}
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]