[tvm] branch unity-staging updated: [MERGE-FIX] Update the code to fix merge issues

tqchen Tue, 01 Aug 2023 06:57:07 -0700

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/unity-staging by this push:
     new 53fd712bf7 [MERGE-FIX] Update the code to fix merge issues
53fd712bf7 is described below

commit 53fd712bf75b46477b8b0005aa98b7befcd73166
Author: Ruihang Lai <[email protected]>
AuthorDate: Tue Aug 1 02:40:29 2023 -0400

    [MERGE-FIX] Update the code to fix merge issues
    
    Fix FuseOps to adapt apache/tvm#15137
    Fix TIR TVMScript to adapt apache/tvm#15214
---
 src/relax/transform/fuse_ops.cc                    |   2 +-
 .../test_analysis_suggest_layout_transforms.py     |  70 +++++++-------
 .../relax/test_backend_transform_shape_lower.py    |   2 +-
 tests/python/relax/test_blockbuilder_emit_te.py    |   2 +-
 tests/python/relax/test_frontend_nn_op.py          |   2 +-
 .../relax/test_meta_schedule_relax_integration.py  |   6 +-
 tests/python/relax/test_transform_alter_op_impl.py |  34 +++----
 tests/python/relax/test_transform_fuse_ops.py      |  40 ++++----
 tests/python/relax/test_transform_fuse_tir.py      |  20 ++--
 .../relax/test_transform_gradient_te_register.py   |  16 ++--
 tests/python/relax/test_transform_legalize_ops.py  |  14 +--
 .../relax/test_transform_legalize_ops_binary.py    |  88 ++++++++---------
 .../test_transform_legalize_ops_create_datatype.py |  46 ++++-----
 .../relax/test_transform_legalize_ops_grad.py      |  14 +--
 .../relax/test_transform_legalize_ops_image.py     |   4 +-
 ..._transform_legalize_ops_index_linear_algebra.py |  34 +++----
 .../test_transform_legalize_ops_manipulate.py      |  78 +++++++--------
 .../python/relax/test_transform_legalize_ops_nn.py | 106 ++++++++++-----------
 ...st_transform_legalize_ops_search_statistical.py |  46 ++++-----
 tests/python/relax/test_tvmscript_parser.py        |   6 +-
 20 files changed, 315 insertions(+), 315 deletions(-)

diff --git a/src/relax/transform/fuse_ops.cc b/src/relax/transform/fuse_ops.cc
index 463772f1f2..f4e7e00f30 100644
--- a/src/relax/transform/fuse_ops.cc
+++ b/src/relax/transform/fuse_ops.cc
@@ -998,7 +998,7 @@ IRModule FuseOps(IRModule mod, int opt_level, size_t 
max_fuse_depth) {
 
   // Step 2. Partition the graph by applying the fusion algorithm.
   std::vector<GraphPartitioner::Group*> groups =
-      GraphPartitioner(&arena, opt_level, max_fuse_depth).Partition(graph);
+      GraphPartitioner(&arena, opt_level, max_fuse_depth, 
/*max_function_args=*/0).Partition(graph);
 
   // Step 3. Transform the IRModule by fusing the operators in accordance with 
the graph partition
   // results.
diff --git a/tests/python/relax/test_analysis_suggest_layout_transforms.py 
b/tests/python/relax/test_analysis_suggest_layout_transforms.py
index 7f517a6f75..6e47c1d681 100644
--- a/tests/python/relax/test_analysis_suggest_layout_transforms.py
+++ b/tests/python/relax/test_analysis_suggest_layout_transforms.py
@@ -42,7 +42,7 @@ def apply_transformations(func, suggested_transfoms, 
print_transformation=False)
 
 
 def test_nested_blocks():
-    @T.prim_func
+    @T.prim_func(private=True)
     def nested_block(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         relu: T.Buffer((32, 64, 224, 224), "float32"),
@@ -67,7 +67,7 @@ def test_nested_blocks():
 
 
 def test_mismatch_transformations_and_num_params():
-    @T.prim_func
+    @T.prim_func(private=True)
     def elemwise(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         relu: T.Buffer((32, 64, 224, 224), "float32"),
@@ -91,7 +91,7 @@ def test_mismatch_transformations_and_num_params():
 
 
 def test_empty_write_transformations():
-    @T.prim_func
+    @T.prim_func(private=True)
     def elemwise(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         relu: T.Buffer((32, 64, 224, 224), "float32"),
@@ -110,7 +110,7 @@ def test_empty_write_transformations():
 
 
 def test_non_bijective_block_transform():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64), "float32"),
         output: T.Buffer((32, 64), "float32"),
@@ -129,7 +129,7 @@ def test_non_bijective_block_transform():
 
 
 def test_non_affine_access():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64), "float32"),
         output: T.Buffer((32 * 64, 10), "float32"),
@@ -148,7 +148,7 @@ def test_non_affine_access():
 
 
 def test_unsupported_write_spatial_layout():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((4, 4), "float32"),
         output: T.Buffer((16), "float32"),
@@ -167,7 +167,7 @@ def test_unsupported_write_spatial_layout():
 
 
 def test_unpacked_iter_used_in_read_access():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((8, 4), "float32"),
         output: T.Buffer((4, 8), "float32"),
@@ -179,7 +179,7 @@ def test_unpacked_iter_used_in_read_access():
                 T.writes(output[v_ax0, v_ax1])
                 output[v_ax0, v_ax1] = arg[v_ax1, v_ax2]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((8, 4), "float32"),
         output: T.Buffer((32), "float32"),
@@ -199,7 +199,7 @@ def test_unpacked_iter_used_in_read_access():
 
 
 def test_invalid_index_map():
-    @T.prim_func
+    @T.prim_func(private=True)
     def elemwise(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         relu: T.Buffer((32, 64, 224, 224), "float32"),
@@ -220,7 +220,7 @@ def test_invalid_index_map():
 
 
 def test_SRSR_block():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 224, 64, 224), "float32"),
         sum: T.Buffer((32, 64), "float32"),
@@ -234,7 +234,7 @@ def test_SRSR_block():
                     sum[v_ax0, v_ax1] = T.float32(0)
                 sum[v_ax0, v_ax1] = sum[v_ax0, v_ax1] + arg[v_ax0, v_k2, 
v_ax1, v_k3]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 16, 224, 4), "float32"),
         sum: T.Buffer((32, 16, 4), "float32"),
@@ -256,7 +256,7 @@ def test_SRSR_block():
 
 
 def test_op_elemwise_symbolic():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(arg: T.handle, relu: T.handle):
         N = T.int64()
         C = T.int64()
@@ -271,7 +271,7 @@ def test_op_elemwise_symbolic():
                 T.writes(Relu[v_i0, v_i1, v_i2, v_i3])
                 Relu[v_i0, v_i1, v_i2, v_i3] = T.max(Arg[v_i0, v_i1, v_i2, 
v_i3], T.float32(0))
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(arg: T.handle, relu: T.handle):
         N = T.int64()
         C = T.int64()
@@ -295,7 +295,7 @@ def test_op_elemwise_symbolic():
 
 
 def test_op_elemwise():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         relu: T.Buffer((32, 64, 224, 224), "float32"),
@@ -307,7 +307,7 @@ def test_op_elemwise():
                 T.writes(relu[v_i0, v_i1, v_i2, v_i3])
                 relu[v_i0, v_i1, v_i2, v_i3] = T.max(arg[v_i0, v_i1, v_i2, 
v_i3], T.float32(0))
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 64), "float32"),
         relu: T.Buffer((32, 224, 224, 64), "float32"),
@@ -327,7 +327,7 @@ def test_op_elemwise():
 
 
 def test_op_pool_nchw_nhwc():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         pool_max: T.Buffer((32, 64, 111, 223), "float32"),
@@ -359,7 +359,7 @@ def test_op_pool_nchw_nhwc():
                     ],
                 )
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 64), "float32"),
         pool_max: T.Buffer((32, 111, 223, 64), "float32"),
@@ -387,7 +387,7 @@ def test_op_pool_nchw_nhwc():
 
 
 def test_op_pool_nchw16c_nhwc():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer(
             (32, 4, 224, 224, 16),
@@ -413,7 +413,7 @@ def test_op_pool_nchw16c_nhwc():
                     arg[v_ax0, v_ax1, v_ax2 * 2 + v_rv0, v_ax3 + v_rv1, v_ax4],
                 )
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 64), "float32"),
         pool_max: T.Buffer((32, 110, 220, 64), "float32"),
@@ -440,7 +440,7 @@ def test_op_pool_nchw16c_nhwc():
 
 
 def test_op_reduce():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         sum: T.Buffer((32, 64), "float32"),
@@ -454,7 +454,7 @@ def test_op_reduce():
                     sum[v_ax0, v_ax1] = T.float32(0)
                 sum[v_ax0, v_ax1] = sum[v_ax0, v_ax1] + arg[v_ax0, v_ax1, 
v_k2, v_k3]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 4, 224, 224, 16), "float32"),
         sum: T.Buffer((32, 4, 16), "float32"),
@@ -477,7 +477,7 @@ def test_op_reduce():
 
 def test_op_upsampling():
     # relay materializes the layout if H, W or D dimensions are moved or tiled.
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         resize: T.Buffer((32, 64, 202, 246), "float32"),
@@ -518,7 +518,7 @@ def test_op_upsampling():
                     ),
                 ]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         resize: T.Buffer((32, 202, 246, 64), "float32"),
@@ -568,7 +568,7 @@ def test_op_upsampling():
 
 
 def test_op_strided_slice():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         T_strided_slice_with_axes: T.Buffer((32, 64, 10, 8), "float32"),
@@ -592,7 +592,7 @@ def test_op_strided_slice():
                     v_ax3 * 7 + 4,
                 ]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 16, 4), "float32"),
         T_strided_slice_with_axes: T.Buffer((32, 10, 8, 16, 4), "float32"),
@@ -615,7 +615,7 @@ def test_op_strided_slice():
 
 
 def test_op_binary_broadcast():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg0: T.Buffer((32, 64, 224, 224), "float32"),
         arg1: T.Buffer((64, 224, 224), "float32"),
@@ -635,7 +635,7 @@ def test_op_binary_broadcast():
                     arg0[v_ax0, v_ax1, v_ax2, v_ax3] + arg1[v_ax1, v_ax2, 
v_ax3]
                 )
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg0: T.Buffer((32, 224, 224, 16, 4), "float32"),
         arg1: T.Buffer((224, 224, 16, 4), "float32"),
@@ -658,7 +658,7 @@ def test_op_binary_broadcast():
 
 
 def test_op_transpose():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         T_transpose: T.Buffer((32, 224, 224, 64), "float32"),
@@ -670,7 +670,7 @@ def test_op_transpose():
                 T.writes(T_transpose[v_ax0, v_ax1, v_ax2, v_ax3])
                 T_transpose[v_ax0, v_ax1, v_ax2, v_ax3] = arg[v_ax0, v_ax3, 
v_ax1, v_ax2]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         T_transpose: T.Buffer((32, 224, 64, 224), "float32"),
@@ -690,7 +690,7 @@ def test_op_transpose():
 
 
 def test_op_pad():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         PadInput: T.Buffer((32, 64, 230, 230), "float32"),
@@ -706,7 +706,7 @@ def test_op_pad():
                     T.float32(2),
                 )
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 16, 4), "float32"),
         PadInput: T.Buffer((32, 230, 230, 16, 4), "float32"),
@@ -730,7 +730,7 @@ def test_op_pad():
 
 
 def test_op_split():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         split0: T.Buffer((32, 32, 224, 224), "float32"),
@@ -749,7 +749,7 @@ def test_op_split():
                 T.writes(split1[v_ax0, v_ax1, v_ax2, v_ax3])
                 split1[v_ax0, v_ax1, v_ax2, v_ax3] = arg[v_ax0, v_ax1 + 32, 
v_ax2, v_ax3]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 64), "float32"),
         split0: T.Buffer((32, 224, 224, 32), "float32"),
@@ -778,7 +778,7 @@ def test_op_split():
 
 @pytest.mark.skip("temp disable, due to minor arith regression")
 def test_op_split_tiling_split_dim():
-    @T.prim_func
+    @T.prim_func(private=True)
     def before(
         arg: T.Buffer((32, 64, 224, 224), "float32"),
         split0: T.Buffer((32, 32, 224, 224), "float32"),
@@ -797,7 +797,7 @@ def test_op_split_tiling_split_dim():
                 T.writes(split1[v_ax0, v_ax1, v_ax2, v_ax3])
                 split1[v_ax0, v_ax1, v_ax2, v_ax3] = arg[v_ax0, v_ax1 + 32, 
v_ax2, v_ax3]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def expected(
         arg: T.Buffer((32, 224, 224, 16, 4), "float32"),
         split0: T.Buffer((32, 224, 224, 8, 4), "float32"),
diff --git a/tests/python/relax/test_backend_transform_shape_lower.py 
b/tests/python/relax/test_backend_transform_shape_lower.py
index 50b69a3c35..859df1c9ea 100644
--- a/tests/python/relax/test_backend_transform_shape_lower.py
+++ b/tests/python/relax/test_backend_transform_shape_lower.py
@@ -189,7 +189,7 @@ def test_symbolic_compute():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def shape_func(H: T.Buffer(T.int64(4), "int64")):
             # generated compute function
             T.func_attr({"tir.is_host_func": 1})
diff --git a/tests/python/relax/test_blockbuilder_emit_te.py 
b/tests/python/relax/test_blockbuilder_emit_te.py
index 7a519d1022..3724c1a4b8 100644
--- a/tests/python/relax/test_blockbuilder_emit_te.py
+++ b/tests/python/relax/test_blockbuilder_emit_te.py
@@ -41,7 +41,7 @@ def test_emit_te_with_symbolic_arg():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_func(
             A: T.Buffer((T.int64(10),), "float32"),
             B: T.Buffer((T.int64(10),), "float32"),
diff --git a/tests/python/relax/test_frontend_nn_op.py 
b/tests/python/relax/test_frontend_nn_op.py
index d2501cb0ef..0346a6f871 100644
--- a/tests/python/relax/test_frontend_nn_op.py
+++ b/tests/python/relax/test_frontend_nn_op.py
@@ -218,7 +218,7 @@ def test_tensor_expr_op():
     # fmt: off
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def add_one(A: T.Buffer((T.int64(10), T.int64(10)), "float32"), T_add: 
T.Buffer((T.int64(10), T.int64(10)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
diff --git a/tests/python/relax/test_meta_schedule_relax_integration.py 
b/tests/python/relax/test_meta_schedule_relax_integration.py
index a66a29405b..00a342c460 100644
--- a/tests/python/relax/test_meta_schedule_relax_integration.py
+++ b/tests/python/relax/test_meta_schedule_relax_integration.py
@@ -54,7 +54,7 @@ class Module0:
 # fmt: off
 @I.ir_module
 class Module:
-    @T.prim_func
+    @T.prim_func(private=True)
     def conv2d(rxplaceholder: T.Buffer((T.int64(1), T.int64(8), T.int64(8), 
T.int64(4)), "int32"), DepthwiseConv2d: T.Buffer((T.int64(1), T.int64(8), 
T.int64(8), T.int64(4)), "int32")):
         T.func_attr({"op_pattern": 4, "tir.noalias": True})
         # with T.block("root"):
@@ -76,7 +76,7 @@ class Module:
                     DepthwiseConv2d[v_b, v_i, v_j, v_c] = 0
                 DepthwiseConv2d[v_b, v_i, v_j, v_c] = DepthwiseConv2d[v_b, 
v_i, v_j, v_c] + PaddedInput[v_b, v_i + v_di, v_j + v_dj, v_c] * 
fused_constant_1[v_di, v_dj, v_c, T.int64(0)]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def conv2d0(rxplaceholder0: T.Buffer((T.int64(1), T.int64(8), T.int64(8), 
T.int64(4)), "int32"), DepthwiseConv2d0: T.Buffer((T.int64(1), T.int64(8), 
T.int64(8), T.int64(4)), "int32")):
         T.func_attr({"op_pattern": 4, "tir.noalias": True})
         # with T.block("root"):
@@ -98,7 +98,7 @@ class Module:
                     DepthwiseConv2d0[v_b, v_i, v_j, v_c] = 0
                 DepthwiseConv2d0[v_b, v_i, v_j, v_c] = DepthwiseConv2d0[v_b, 
v_i, v_j, v_c] + PaddedInput0[v_b, v_i + v_di, v_j + v_dj, v_c] * 
fused_constant0_1[v_di, v_dj, v_c, T.int64(0)]
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def fused_conv2d_add(data: T.Buffer((T.int64(1), T.int64(8), T.int64(8), 
T.int64(4)), "int32"), T_add: T.Buffer((T.int64(1), T.int64(8), T.int64(8), 
T.int64(4)), "int32")):
         T.func_attr({"tir.noalias": True})
         # with T.block("root"):
diff --git a/tests/python/relax/test_transform_alter_op_impl.py 
b/tests/python/relax/test_transform_alter_op_impl.py
index aa35067d58..81bc480785 100644
--- a/tests/python/relax/test_transform_alter_op_impl.py
+++ b/tests/python/relax/test_transform_alter_op_impl.py
@@ -41,7 +41,7 @@ def test_single_output():
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(arg0: T.Buffer((16,), "float32"), arg1: T.Buffer((16,), 
"float32"), output: T.Buffer((16,), "float32")):
             T.func_attr({"operator_name": "relax.add"})
             for ax0 in range(16):
@@ -60,7 +60,7 @@ def test_single_output():
             return gv
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def relax_add_replacement(arg0: T.Buffer((4, 4), "float32"), arg1: 
T.Buffer((4, 4), "float32"), output: T.Buffer((4, 4), "float32")):
             T.func_attr({"operator_name": "relax.add"})
             for ax0, ax1 in T.grid(4, 4):
@@ -81,7 +81,7 @@ def test_single_output():
                 R.output(gv)
             return gv
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def add_2d(arg0: T.Buffer((4, 4), "float32"), arg1: T.Buffer((4, 4), 
"float32"), output: T.Buffer((4, 4), "float32")):
         for ax0, ax1 in T.grid(4, 4):
             with T.block("T_add"):
@@ -104,7 +104,7 @@ def test_empty_layout_changes():
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def mul_by_2(arg0: T.Buffer((16,), "float32"), output: T.Buffer((16,), 
"float32")):
             T.func_attr({"operator_name": "relax.mul_by_2"})
             for ax0 in range(16):
@@ -123,7 +123,7 @@ def test_empty_layout_changes():
             return gv
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def relax_mul_by_2_replacement(arg0: T.Buffer((16,), "float32"), 
output: T.Buffer((16,), "float32")):
             T.func_attr({"operator_name": "relax.mul_by_2"})
             for ax0 in range(16):
@@ -141,7 +141,7 @@ def test_empty_layout_changes():
                 R.output(gv)
             return gv
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def add_x_x(arg0: T.Buffer((16,), "float32"), output: T.Buffer((16,), 
"float32")):
         T.func_attr({"operator_name": "relax.mul_by_2"})
         for ax0 in range(16):
@@ -164,7 +164,7 @@ def test_multiple_outputs():
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def some_op(arg0: T.Buffer((16,), "float32"), arg1: T.Buffer((16,), 
"float32"), output0: T.Buffer((16,), "float32"), output1: T.Buffer((16,), 
"float32")):
             T.func_attr({"operator_name": "relax.some_op"})
             for ax0 in range(16):
@@ -184,7 +184,7 @@ def test_multiple_outputs():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def relax_some_op_replacement(arg0: T.Buffer((4, 4), "float32"), arg1: 
T.Buffer((4, 4), "float32"), output0: T.Buffer((4, 4), "float32"), output1: 
T.Buffer((4, 4), "float32")):
             T.func_attr({"operator_name": "relax.some_op"})
             for ax0, ax1 in T.grid(4, 4):
@@ -209,7 +209,7 @@ def test_multiple_outputs():
                 R.output(gv)
             return gv
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def some_op_2d(arg0: T.Buffer((4, 4), "float32"), arg1: T.Buffer((4, 4), 
"float32"), output0: T.Buffer((4, 4), "float32"), output1: T.Buffer((4, 4), 
"float32")):
         for ax0, ax1 in T.grid(4, 4):
             with T.block("T_add"):
@@ -234,7 +234,7 @@ def test_multiple_outputs_with_axis_sep():
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def some_op(arg0: T.Buffer((16,), "float32"), arg1: T.Buffer((16,), 
"float32"), output0: T.Buffer((16,), "float32"), output1: T.Buffer((16,), 
"float32")):
             T.func_attr({"operator_name": "relax.some_op"})
             for ax0 in range(16):
@@ -254,7 +254,7 @@ def test_multiple_outputs_with_axis_sep():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def relax_some_op_replacement(arg0: T.Buffer((4, 4), "float32"), arg1: 
T.Buffer((4, 4), "float32"), output0: T.Buffer((4, 4), "float32"), output1: 
T.Buffer((4, 4), "float32")):
             T.func_attr({"operator_name": "relax.some_op"})
             for ax0, ax1 in T.grid(4, 4):
@@ -279,7 +279,7 @@ def test_multiple_outputs_with_axis_sep():
                 R.output(gv)
             return gv
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def some_op_2d(arg0: T.Buffer((4, 4), "float32"), arg1: T.Buffer((4, 4), 
"float32"), output0: T.Buffer((4, 4), "float32"), output1: T.Buffer((4, 4), 
"float32")):
         for ax0, ax1 in T.grid(4, 4):
             with T.block("T_add"):
@@ -314,7 +314,7 @@ def test_unsupported_implicit_padding():
                 R.output(gv)
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(arg0: T.Buffer((14,), "float32"), output: T.Buffer((14,), 
"float32")):
             T.func_attr({"operator_name": "relax.relu"})
             for ax0 in T.grid(14):
@@ -326,7 +326,7 @@ def test_unsupported_implicit_padding():
 
     before = InputModule
 
-    @T.prim_func
+    @T.prim_func(private=True)
     def relu_pad(arg0: T.Buffer((16,), "float32"), output: T.Buffer((16,), 
"float32")):
         for ax0 in T.grid(16):
             with T.block("T_add"):
@@ -354,7 +354,7 @@ def test_multiple_call_sites():
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(arg0: T.Buffer((16,), "float32"), arg1: T.Buffer((16,), 
"float32"), output: T.Buffer((16,), "float32")):
             T.func_attr({"operator_name": "relax.add"})
             for ax0 in range(16):
@@ -375,7 +375,7 @@ def test_multiple_call_sites():
             return gv
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def relax_add_replacement(arg0: T.Buffer((4, 4), "float32"), arg1: 
T.Buffer((4, 4), "float32"), output: T.Buffer((4, 4), "float32")):
             T.func_attr({"operator_name": "relax.add"})
             # with T.block("root"):
@@ -401,7 +401,7 @@ def test_multiple_call_sites():
                 gv: R.Tensor((16,), dtype="float32") = lv2_1
                 R.output(gv)
             return gv
-    @T.prim_func
+    @T.prim_func(private=True)
     def add_2d(arg0: T.Buffer((4, 4), "float32"), arg1: T.Buffer((4, 4), 
"float32"), output: T.Buffer((4, 4), "float32")):
         for ax0, ax1 in T.grid(4, 4):
             with T.block("T_add"):
diff --git a/tests/python/relax/test_transform_fuse_ops.py 
b/tests/python/relax/test_transform_fuse_ops.py
index c98bdd2c8c..f16cf73d61 100644
--- a/tests/python/relax/test_transform_fuse_ops.py
+++ b/tests/python/relax/test_transform_fuse_ops.py
@@ -859,7 +859,7 @@ def test_edge_with_call_dps_packed():
                 R.output(b, c)
             return R.tuple(b, c)
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def exp(A: T.Buffer((2, 3), "float32"), B: T.Buffer((2, 3), 
"float32")):
             T.evaluate(0)
 
@@ -880,7 +880,7 @@ def test_layer_norm_silu():
                 R.output(gv1)
             return gv1
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def layer_norm(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32"), gamma: T.Buffer((T.int64(64), T.int64(64)), 
"float32"), beta: T.Buffer((T.int64(64), T.int64(64)), "float32"), 
T_layer_norm: T.Buffer((T.int64(1), T.int64(512), T.int64(64), T.int64(64)), 
"float32")):
             rxplaceholder_red_temp_v0 = T.alloc_buffer([T.int64(64), 
T.int64(64)], dtype="float32")
             rxplaceholder_red_temp_v1 = T.alloc_buffer([T.int64(64), 
T.int64(64)], dtype="float32")
@@ -903,7 +903,7 @@ def test_layer_norm_silu():
                     T.writes(T_layer_norm[ax0, ax1, ax2, ax3])
                     T_layer_norm[ax0, ax1, ax2, ax3] = (A[ax0, ax1, ax2, ax3] 
- rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.05)) * 
T.rsqrt(rxplaceholder_red_temp_v1[ax0, ax1] * T.float32(0.05) - 
rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.05) * 
(rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.05)) + T.float32(1e-05), 
dtype="float32") * gamma[ax2, ax3] + beta[ax2, ax3]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32"), B: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32")):
             for i0, i1, i2, i3 in T.grid(T.int64(1), T.int64(512), 
T.int64(64), T.int64(64)):
                 with T.block("relu"):
@@ -914,7 +914,7 @@ def test_layer_norm_silu():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def layer_norm(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32"), gamma: T.Buffer((T.int64(64), T.int64(64)), 
"float32"), beta: T.Buffer((T.int64(64), T.int64(64)), "float32"), 
T_layer_norm: T.Buffer((T.int64(1), T.int64(512), T.int64(64), T.int64(64)), 
"float32")):
             T.func_attr({"op_pattern": 4})
             # with T.block("root"):
@@ -939,7 +939,7 @@ def test_layer_norm_silu():
                     T.writes(T_layer_norm[ax0, ax1, ax2, ax3])
                     T_layer_norm[ax0, ax1, ax2, ax3] = (A[ax0, ax1, ax2, ax3] 
- rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003)) * 
T.rsqrt(rxplaceholder_red_temp_v1[ax0, ax1] * T.float32(0.050000000000000003) - 
rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003) * 
(rxplaceholder_red_temp_v0[ax0, ax1] * T.float32(0.050000000000000003)) + 
T.float32(1.0000000000000001e-05)) * gamma[ax2, ax3] + beta[ax2, ax3]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(A: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32"), B: T.Buffer((T.int64(1), T.int64(512), T.int64(64), 
T.int64(64)), "float32")):
             T.func_attr({"op_pattern": 0})
             # with T.block("root"):
@@ -1001,7 +1001,7 @@ def test_multiple_paths():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder: T.Buffer((T.int64(2), T.int64(320), 
T.int64(64), T.int64(64)), "float32"), rxplaceholder_1: T.Buffer((T.int64(1), 
T.int64(320), T.int64(1), T.int64(1)), "float32"), T_add: T.Buffer((T.int64(2), 
T.int64(320), T.int64(64), T.int64(64)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             for ax0, ax1, ax2, ax3 in T.grid(T.int64(2), T.int64(320), 
T.int64(64), T.int64(64)):
@@ -1011,7 +1011,7 @@ def test_multiple_paths():
                     T.writes(T_add[v_ax0, v_ax1, v_ax2, v_ax3])
                     T_add[v_ax0, v_ax1, v_ax2, v_ax3] = rxplaceholder[v_ax0, 
v_ax1, v_ax2, v_ax3] + rxplaceholder_1[T.int64(0), v_ax1, T.int64(0), 
T.int64(0)]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add1(rxplaceholder: T.Buffer((T.int64(2), T.int64(320)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(320),), "float32"), T_add: 
T.Buffer((T.int64(2), T.int64(320)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             for ax0, ax1 in T.grid(T.int64(2), T.int64(320)):
@@ -1021,7 +1021,7 @@ def test_multiple_paths():
                     T.writes(T_add[v_ax0, v_ax1])
                     T_add[v_ax0, v_ax1] = rxplaceholder[v_ax0, v_ax1] + 
rxplaceholder_1[v_ax1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add2(rxplaceholder: T.Buffer((T.int64(2), T.int64(320), 
T.int64(64), T.int64(64)), "float32"), rxplaceholder_1: T.Buffer((T.int64(2), 
T.int64(320), T.int64(1), T.int64(1)), "float32"), T_add: T.Buffer((T.int64(2), 
T.int64(320), T.int64(64), T.int64(64)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             for ax0, ax1, ax2, ax3 in T.grid(T.int64(2), T.int64(320), 
T.int64(64), T.int64(64)):
@@ -1031,7 +1031,7 @@ def test_multiple_paths():
                     T.writes(T_add[v_ax0, v_ax1, v_ax2, v_ax3])
                     T_add[v_ax0, v_ax1, v_ax2, v_ax3] = rxplaceholder[v_ax0, 
v_ax1, v_ax2, v_ax3] + rxplaceholder_1[v_ax0, v_ax1, T.int64(0), T.int64(0)]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d(rxplaceholder: T.Buffer((T.int64(2), T.int64(320), 
T.int64(64), T.int64(64)), "float32"), rxplaceholder_1: T.Buffer((T.int64(320), 
T.int64(320), T.int64(3), T.int64(3)), "float32"), conv2d_nchw: 
T.Buffer((T.int64(2), T.int64(320), T.int64(64), T.int64(64)), "float32")):
             T.func_attr({"op_pattern": 4, "tir.noalias": True})
             pad_temp = T.alloc_buffer((T.int64(2), T.int64(320), T.int64(66), 
T.int64(66)))
@@ -1050,7 +1050,7 @@ def test_multiple_paths():
                         conv2d_nchw[v_nn, v_ff, v_yy, v_xx] = T.float32(0)
                     conv2d_nchw[v_nn, v_ff, v_yy, v_xx] = conv2d_nchw[v_nn, 
v_ff, v_yy, v_xx] + pad_temp[v_nn, v_rc, v_yy + v_ry, v_xx + v_rx] * 
rxplaceholder_1[v_ff, v_rc, v_ry, v_rx]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer((T.int64(2), T.int64(1280)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(1280), T.int64(320)), 
"float32"), matmul: T.Buffer((T.int64(2), T.int64(320)), "float32")):
             T.func_attr({"op_pattern": 4, "tir.noalias": True})
             for i0, i1, k in T.grid(T.int64(2), T.int64(320), T.int64(1280)):
@@ -1062,7 +1062,7 @@ def test_multiple_paths():
                         matmul[v_i0, v_i1] = T.float32(0)
                     matmul[v_i0, v_i1] = matmul[v_i0, v_i1] + 
rxplaceholder[v_i0, v_k] * rxplaceholder_1[v_k, v_i1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(rxplaceholder: T.Buffer((T.int64(320),), "float32"), 
T_reshape: T.Buffer((T.int64(1), T.int64(320), T.int64(1), T.int64(1)), 
"float32")):
             T.func_attr({"op_pattern": 2, "tir.noalias": True})
             for ax0, ax1, ax2, ax3 in T.grid(T.int64(1), T.int64(320), 
T.int64(1), T.int64(1)):
@@ -1072,7 +1072,7 @@ def test_multiple_paths():
                     T.writes(T_reshape[v_ax0, v_ax1, v_ax2, v_ax3])
                     T_reshape[v_ax0, v_ax1, v_ax2, v_ax3] = 
rxplaceholder[(v_ax1 + v_ax2 + v_ax3) % T.int64(320)]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape1(rxplaceholder: T.Buffer((T.int64(2), T.int64(320)), 
"float32"), T_reshape: T.Buffer((T.int64(2), T.int64(320), T.int64(1), 
T.int64(1)), "float32")):
             T.func_attr({"op_pattern": 2, "tir.noalias": True})
             for ax0, ax1, ax2, ax3 in T.grid(T.int64(2), T.int64(320), 
T.int64(1), T.int64(1)):
@@ -1082,7 +1082,7 @@ def test_multiple_paths():
                     T.writes(T_reshape[v_ax0, v_ax1, v_ax2, v_ax3])
                     T_reshape[v_ax0, v_ax1, v_ax2, v_ax3] = 
rxplaceholder[((v_ax1 + v_ax2 + v_ax3) // T.int64(320) + v_ax0) % T.int64(2), 
(v_ax1 + v_ax2 + v_ax3) % T.int64(320)]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose(rxplaceholder: T.Buffer((T.int64(320), T.int64(1280)), 
"float32"), T_transpose: T.Buffer((T.int64(1280), T.int64(320)), "float32")):
             T.func_attr({"op_pattern": 2, "tir.noalias": True})
             for ax0, ax1 in T.grid(T.int64(1280), T.int64(320)):
@@ -1156,7 +1156,7 @@ def test_dead_group():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder: T.Buffer((T.int64(1), T.int64(128)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(128),), "float32"), T_add: 
T.Buffer((T.int64(1), T.int64(128)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             # with T.block("root"):
@@ -1167,7 +1167,7 @@ def test_dead_group():
                     T.writes(T_add[v_ax0, v_ax1])
                     T_add[v_ax0, v_ax1] = rxplaceholder[v_ax0, v_ax1] + 
rxplaceholder_1[v_ax1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add1(rxplaceholder: T.Buffer((T.int64(1), T.int64(10)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(10),), "float32"), T_add: 
T.Buffer((T.int64(1), T.int64(10)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             # with T.block("root"):
@@ -1178,7 +1178,7 @@ def test_dead_group():
                     T.writes(T_add[v_ax0, v_ax1])
                     T_add[v_ax0, v_ax1] = rxplaceholder[v_ax0, v_ax1] + 
rxplaceholder_1[v_ax1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer((T.int64(1), T.int64(784)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(784), T.int64(128)), "float32"), 
matmul_1: T.Buffer((T.int64(1), T.int64(128)), "float32")):
             T.func_attr({"op_pattern": 4, "tir.noalias": True})
             # with T.block("root"):
@@ -1191,7 +1191,7 @@ def test_dead_group():
                         matmul_1[v_i0, v_i1] = T.float32(0)
                     matmul_1[v_i0, v_i1] = matmul_1[v_i0, v_i1] + 
rxplaceholder[v_i0, v_k] * rxplaceholder_1[v_k, v_i1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul1(rxplaceholder: T.Buffer((T.int64(1), T.int64(128)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(128), T.int64(10)), "float32"), 
matmul: T.Buffer((T.int64(1), T.int64(10)), "float32")):
             T.func_attr({"op_pattern": 4, "tir.noalias": True})
             # with T.block("root"):
@@ -1204,7 +1204,7 @@ def test_dead_group():
                         matmul[v_i0, v_i1] = T.float32(0)
                     matmul[v_i0, v_i1] = matmul[v_i0, v_i1] + 
rxplaceholder[v_i0, v_k] * rxplaceholder_1[v_k, v_i1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(rxplaceholder: T.Buffer((T.int64(1), T.int64(128)), 
"float32"), compute: T.Buffer((T.int64(1), T.int64(128)), "float32")):
             T.func_attr({"op_pattern": 0, "tir.noalias": True})
             # with T.block("root"):
@@ -1215,7 +1215,7 @@ def test_dead_group():
                     T.writes(compute[v_i0, v_i1])
                     compute[v_i0, v_i1] = T.max(rxplaceholder[v_i0, v_i1], 
T.float32(0))
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose(rxplaceholder: T.Buffer((T.int64(128), T.int64(784)), 
"float32"), T_transpose: T.Buffer((T.int64(784), T.int64(128)), "float32")):
             T.func_attr({"op_pattern": 2, "tir.noalias": True})
             # with T.block("root"):
@@ -1226,7 +1226,7 @@ def test_dead_group():
                     T.writes(T_transpose[v_ax0, v_ax1])
                     T_transpose[v_ax0, v_ax1] = rxplaceholder[v_ax1, v_ax0]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose1(rxplaceholder: T.Buffer((T.int64(10), T.int64(128)), 
"float32"), T_transpose: T.Buffer((T.int64(128), T.int64(10)), "float32")):
             T.func_attr({"op_pattern": 2, "tir.noalias": True})
             # with T.block("root"):
diff --git a/tests/python/relax/test_transform_fuse_tir.py 
b/tests/python/relax/test_transform_fuse_tir.py
index f59e3f2e9e..9f0ddde08b 100644
--- a/tests/python/relax/test_transform_fuse_tir.py
+++ b/tests/python/relax/test_transform_fuse_tir.py
@@ -625,7 +625,7 @@ def test_multiple_relax_functions():
                 R.output(gv3)
             return gv3
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused_add1_exp1_squeeze1(
             x: T.Buffer((T.int64(20), T.int64(10)), "float32"),
             p0: T.Buffer((), "float32"),
@@ -653,7 +653,7 @@ def test_multiple_relax_functions():
                     T.writes(T_squeeze[v_ax0, v_ax1])
                     T_squeeze[v_ax0, v_ax1] = compute[v_ax0, v_ax1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused_add_exp_squeeze(
             x: T.Buffer((T.int64(10), T.int64(20)), "float32"),
             p0: T.Buffer((), "float32"),
@@ -796,7 +796,7 @@ def test_symbolic_shape_aware_fuse_with_allocation():
 def test_symbolic_var_in_call_tir_args():
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def foo(
             X: T.Buffer((T.int64(1), T.int64(1), T.int64(32), T.int64(128)), 
"float32"),
             Y: T.Buffer((T.int64(2048), T.int64(128)), "float32"),
@@ -842,7 +842,7 @@ def test_symbolic_var_in_call_tir_args():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused(
             X: T.Buffer((T.int64(1), T.int64(1), T.int64(32), T.int64(128)), 
"float32"),
             Y: T.Buffer((T.int64(2048), T.int64(128)), "float32"),
@@ -886,7 +886,7 @@ def test_symbolic_var_in_call_tir_args():
 def test_same_buffer_multiple_read():
     @I.ir_module
     class Module:
-        @T.prim_func
+        @T.prim_func(private=True)
         def concatenate(
             rxplaceholder: T.Buffer((T.int64(1), T.int64(4), T.int64(64), 
T.int64(64)), "float32"),
             rxplaceholder_1: T.Buffer(
@@ -909,7 +909,7 @@ def test_same_buffer_multiple_read():
                         rxplaceholder[v_ax0, v_ax1, v_ax2, v_ax3],
                     )
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose2(
             rxplaceholder: T.Buffer((T.int64(2), T.int64(4), T.int64(64), 
T.int64(64)), "float32"),
             T_transpose: T.Buffer((T.int64(2), T.int64(64), T.int64(64), 
T.int64(4)), "float32"),
@@ -955,7 +955,7 @@ def test_same_buffer_multiple_read():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused_concatenate_transpose2(
             inp_0: T.Buffer((T.int64(1), T.int64(4), T.int64(64), 
T.int64(64)), "float32"),
             T_transpose_handle_intermediate: T.Buffer(
@@ -1033,7 +1033,7 @@ def test_tir_expression_in_shape():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused_transpose_matmul(
             x: T.Buffer((T.int64(3), T.int64(4)), "float32"),
             p_y: T.handle,
@@ -1082,7 +1082,7 @@ def test_tir_expression_in_shape():
 def test_tuple_input_unused_field():
     @I.ir_module
     class Module:
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(
             A: T.Buffer((T.int64(4), T.int64(8), T.int64(2048)), "float32"),
             T_reshape: T.Buffer((T.int64(4), T.int64(8), T.int64(32), 
T.int64(64)), "float32"),
@@ -1145,7 +1145,7 @@ def test_tuple_input_unused_field():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def fused_reshape(
             lv_0: T.Buffer((T.int64(4), T.int64(8), T.int64(2048)), "float32"),
             T_reshape_handle_intermediate: T.Buffer(
diff --git a/tests/python/relax/test_transform_gradient_te_register.py 
b/tests/python/relax/test_transform_gradient_te_register.py
index b6b785fe3c..f5da9bed68 100644
--- a/tests/python/relax/test_transform_gradient_te_register.py
+++ b/tests/python/relax/test_transform_gradient_te_register.py
@@ -59,7 +59,7 @@ def get_expected_1():
     # fmt: off
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), B: 
T.Buffer((T.int64(5), T.int64(5)), "float32"), f_mul_1: T.Buffer((T.int64(5), 
T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -70,7 +70,7 @@ def get_expected_1():
                     T.writes(f_mul_1[v_i0, v_i1])
                     f_mul_1[v_i0, v_i1] = A[v_i0, v_i1] * B[v_i0, v_i1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul_grad(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), B: 
T.Buffer((T.int64(5), T.int64(5)), "float32"), C: T.Buffer((T.int64(5), 
T.int64(5)), "float32"), f_mul_grad_1: T.Buffer((T.int64(5), T.int64(5)), 
"float32"), f_mul_grad_2: T.Buffer((T.int64(5), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -146,7 +146,7 @@ def test_call_tir(register_te_grads):
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), B: 
T.Buffer((T.int64(5), T.int64(5)), "float32"), f_mul_1: T.Buffer((T.int64(5), 
T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -175,7 +175,7 @@ def get_expected_2():
     # fmt: off
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), f_mul2: 
T.Buffer((T.int64(5), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -186,7 +186,7 @@ def get_expected_2():
                     T.writes(f_mul2[v_i0, v_i1])
                     f_mul2[v_i0, v_i1] = A[v_i0, v_i1] * T.float32(2)
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mulk_grad(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), B: 
T.Buffer((T.int64(5), T.int64(5)), "float32"), f_mulk_grad_1: 
T.Buffer((T.int64(5), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -255,7 +255,7 @@ def test_call_tir_kwargs(register_te_grads):
     # fmt: off
     @I.ir_module
     class Before:
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul(A: T.Buffer((T.int64(5), T.int64(5)), "float32"), f_mul2: 
T.Buffer((T.int64(5), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -284,7 +284,7 @@ def get_expected_3():
     # fmt: off
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul(var_A: T.handle, var_B: T.handle, var_f_mul: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             n = T.int64()
@@ -299,7 +299,7 @@ def get_expected_3():
                     T.writes(f_mul_1[v_i0, v_i1])
                     f_mul_1[v_i0, v_i1] = A[v_i0, v_i1] * B[v_i0, v_i1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def f_mul_grad(var_A: T.handle, var_B: T.handle, var_C: T.handle, 
var_f_mul_grad_1: T.handle, var_f_mul_grad_2: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             n = T.int64()
diff --git a/tests/python/relax/test_transform_legalize_ops.py 
b/tests/python/relax/test_transform_legalize_ops.py
index 73c5770c5d..146e2e0cea 100644
--- a/tests/python/relax/test_transform_legalize_ops.py
+++ b/tests/python/relax/test_transform_legalize_ops.py
@@ -41,7 +41,7 @@ def test_customize_legalize():
             gv = R.call_tir(cls.add, (y, x), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(1)), "float32"), rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), T_add: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -70,7 +70,7 @@ def test_legalize_multiple_types_of_call():
             gv = R.multiply(x, R.const(2.0, "float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def identity(rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), 
"float32"), T_id: T.Buffer((T.int64(3), T.int64(3)), "float32")):
             for ax0, ax1 in T.grid(T.int64(3), T.int64(3)):
                 with T.block("T_add"):
@@ -95,7 +95,7 @@ def test_legalize_multiple_types_of_call():
             gv = R.call_tir(cls.multiply, (x,), R.Tensor((3, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def identity(rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), 
"float32"), T_id: T.Buffer((T.int64(3), T.int64(3)), "float32")):
             for ax0, ax1 in T.grid(T.int64(3), T.int64(3)):
                 with T.block("T_add"):
@@ -104,7 +104,7 @@ def test_legalize_multiple_types_of_call():
                     T.writes(T_id[v_ax0, v_ax1])
                     T_id[v_ax0, v_ax1] = rxplaceholder[v_ax0, v_ax1]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def multiply(rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), 
"float32"), T_multiply: T.Buffer((T.int64(3), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for ax0, ax1 in T.grid(T.int64(3), T.int64(3)):
@@ -185,7 +185,7 @@ def test_legalize_scalar_data_type_preserve():
 
     @tvm.script.ir_module
     class Expected0:
-        @T.prim_func
+        @T.prim_func(private=True)
         def multiply(
             rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), "float16"),
             T_multiply: T.Buffer((T.int64(3), T.int64(3)), "float16"),
@@ -209,7 +209,7 @@ def test_legalize_scalar_data_type_preserve():
 
     @tvm.script.ir_module
     class Expected1:
-        @T.prim_func
+        @T.prim_func(private=True)
         def multiply(
             rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), "uint8"),
             T_multiply: T.Buffer((T.int64(3), T.int64(3)), "uint8"),
@@ -231,7 +231,7 @@ def test_legalize_scalar_data_type_preserve():
 
     @tvm.script.ir_module
     class Expected2:
-        @T.prim_func
+        @T.prim_func(private=True)
         def equal(
             rxplaceholder: T.Buffer((T.int64(3), T.int64(3)), "bool"),
             T_equal: T.Buffer((T.int64(3), T.int64(3)), "bool"),
diff --git a/tests/python/relax/test_transform_legalize_ops_binary.py 
b/tests/python/relax/test_transform_legalize_ops_binary.py
index dc14a0c3fd..d71a248b25 100644
--- a/tests/python/relax/test_transform_legalize_ops_binary.py
+++ b/tests/python/relax/test_transform_legalize_ops_binary.py
@@ -41,7 +41,7 @@ def test_add():
             gv = R.call_tir(Expected.add, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), T.int64(3)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(1)), "float32"), T_add: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -72,7 +72,7 @@ def test_add_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.add, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
T_add: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -103,7 +103,7 @@ def test_add_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.add, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
T_add: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -142,7 +142,7 @@ def test_add_symbolic():
             gv = R.call_tir(Expected.add, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def add(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_add: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -180,7 +180,7 @@ def test_divide():
             gv = R.call_tir(Expected.divide, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def divide(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_divide: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -211,7 +211,7 @@ def test_divide_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.divide, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def divide(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_divide: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -242,7 +242,7 @@ def test_divide_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.divide, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def divide(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_divide: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -281,7 +281,7 @@ def test_divide_symbolic():
             gv = R.call_tir(Expected.divide, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def divide(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_divide: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -319,7 +319,7 @@ def test_floor_divide():
             gv = R.call_tir(Expected.floor_divide, (x, y), R.Tensor((4, 3, 2, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def floor_divide(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_floor_divide: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -350,7 +350,7 @@ def test_floor_divide_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.floor_divide, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def floor_divide(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_floor_divide: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -381,7 +381,7 @@ def test_floor_divide_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.floor_divide, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def floor_divide(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_floor_divide: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -420,7 +420,7 @@ def test_floor_divide_symbolic():
             gv = R.call_tir(Expected.floor_divide, (x, y), R.Tensor((a, b, c, 
d), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def floor_divide(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_floor_divide: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -458,7 +458,7 @@ def test_multiply():
             gv = R.call_tir(Expected.multiply, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def multiply(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_multiply: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -497,7 +497,7 @@ def test_multiply_symbolic():
             gv = R.call_tir(Expected.multiply, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def multiply(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_multiply: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -530,7 +530,7 @@ def test_power():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def power(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_power: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -567,7 +567,7 @@ def test_power_symbolic():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def power(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_power: T.handle):
             T.func_attr({"tir.noalias": True})
             c = T.int64()
@@ -615,7 +615,7 @@ def test_subtract():
             gv = R.call_tir(Expected.subtract, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def subtract(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_subtract: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -654,7 +654,7 @@ def test_subtract_symbolic():
             gv = R.call_tir(Expected.subtract, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def subtract(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_subtract: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -695,7 +695,7 @@ def test_equal():
             gv = R.call_tir(Expected.equal, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def equal(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_equal: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -726,7 +726,7 @@ def test_equal_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.equal, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def equal(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_equal: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -757,7 +757,7 @@ def test_equal_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.equal, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def equal(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_equal: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -796,7 +796,7 @@ def test_equal_symbolic():
             gv = R.call_tir(Expected.equal, (x, y), R.Tensor((a, b, c, d), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def equal(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_equal: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -834,7 +834,7 @@ def test_greater():
             gv = R.call_tir(Expected.greater, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_greater: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -865,7 +865,7 @@ def test_greater_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.greater, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_greater: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -896,7 +896,7 @@ def test_greater_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.greater, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_greater: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -935,7 +935,7 @@ def test_greater_symbolic():
             gv = R.call_tir(Expected.greater, (x, y), R.Tensor((a, b, c, d), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_greater: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -973,7 +973,7 @@ def test_greater_equal():
             gv = R.call_tir(Expected.greater_equal, (x, y), R.Tensor((4, 3, 2, 
3), dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater_equal(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_greater_equal: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1012,7 +1012,7 @@ def test_greater_equal_symbolic():
             gv = R.call_tir(Expected.greater_equal, (x, y), R.Tensor((a, b, c, 
d), dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def greater_equal(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_greater_equal: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1050,7 +1050,7 @@ def test_less():
             gv = R.call_tir(Expected.less, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), T.int64(3)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(1)), "float32"), T_less: T.Buffer((T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1089,7 +1089,7 @@ def test_less_symbolic():
             gv = R.call_tir(Expected.less, (x, y), R.Tensor((a, b, c, d), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_less: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1127,7 +1127,7 @@ def test_less_equal():
             gv = R.call_tir(Expected.less_equal, (x, y), R.Tensor((4, 3, 2, 
3), dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less_equal(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_less_equal: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1158,7 +1158,7 @@ def test_less_equal_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.less_equal, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less_equal(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_less_equal: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1189,7 +1189,7 @@ def test_less_equal_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.less_equal, (x,), R.Tensor((2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less_equal(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_less_equal: T.Buffer((T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1228,7 +1228,7 @@ def test_less_equal_symbolic():
             gv = R.call_tir(Expected.less_equal, (x, y), R.Tensor((a, b, c, 
d), dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def less_equal(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_less_equal: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1266,7 +1266,7 @@ def test_not_equal():
             gv = R.call_tir(Expected.not_equal, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def not_equal(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_not_equal: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "bool")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1305,7 +1305,7 @@ def test_not_equal_symbolic():
             gv = R.call_tir(Expected.not_equal, (x, y), R.Tensor((a, b, c, d), 
dtype="bool"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def not_equal(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_not_equal: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1344,7 +1344,7 @@ def test_maximum():
             gv = R.call_tir(Expected.maximum, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def maximum(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_maximum: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1375,7 +1375,7 @@ def test_maximum_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.maximum, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def maximum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_maximum: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1406,7 +1406,7 @@ def test_maximum_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.maximum, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def maximum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_maximum: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1445,7 +1445,7 @@ def test_maximum_symbolic():
             gv = R.call_tir(Expected.maximum, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def maximum(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_maximum: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1484,7 +1484,7 @@ def test_minimum():
             gv = R.call_tir(Expected.minimum, (x, y), R.Tensor((4, 3, 2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def minimum(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(2), T.int64(1)), "float32"), T_minimum: T.Buffer((T.int64(4), 
T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(3), T.int64(2), 
T.int64(3)):
@@ -1515,7 +1515,7 @@ def test_minimum_with_arg0_constant_scalar():
             gv = R.call_tir(Expected.minimum, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def minimum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_minimum: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1546,7 +1546,7 @@ def test_minimum_with_arg1_constant_scalar():
             gv = R.call_tir(Expected.minimum, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def minimum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_minimum: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1585,7 +1585,7 @@ def test_minimum_symbolic():
             gv = R.call_tir(Expected.minimum, (x, y), R.Tensor((a, b, c, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def minimum(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_T_minimum: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
diff --git a/tests/python/relax/test_transform_legalize_ops_create_datatype.py 
b/tests/python/relax/test_transform_legalize_ops_create_datatype.py
index 1e904823d3..7b2b2d2e76 100644
--- a/tests/python/relax/test_transform_legalize_ops_create_datatype.py
+++ b/tests/python/relax/test_transform_legalize_ops_create_datatype.py
@@ -40,7 +40,7 @@ def test_full():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((2, 3), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "int32"), T_full: 
T.Buffer((T.int64(2), T.int64(3)), "int32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -71,7 +71,7 @@ def test_full_constant_scalar_fill_value():
             gv = R.call_tir(Expected.full, R.tuple(), R.Tensor((2, 3), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(T_full: T.Buffer((T.int64(2), T.int64(3)), "int32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -102,7 +102,7 @@ def test_full_different_dtype():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "int32"), T_full: 
T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -137,7 +137,7 @@ def test_full_symbolic():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((m, n), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "int32"), var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -171,7 +171,7 @@ def test_full_like():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "float32"), T_full: 
T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -202,7 +202,7 @@ def test_full_like_constant_scalar_fill_value():
             gv = R.call_tir(Expected.full, R.tuple(), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(T_full: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -233,7 +233,7 @@ def test_full_like_different_dtype():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((2, 3), 
dtype="float64"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "float32"), T_full: 
T.Buffer((T.int64(2), T.int64(3)), "float64")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -268,7 +268,7 @@ def test_full_like_symbolic():
             gv = R.call_tir(Expected.full, (v,), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def full(rxplaceholder: T.Buffer((), "float32"), var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -302,7 +302,7 @@ def test_ones():
             gv = R.call_tir(Expected.ones, R.tuple(), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def ones(T_full: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -337,7 +337,7 @@ def test_ones_symbolic():
             gv = R.call_tir(Expected.ones, R.tuple(), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def ones(var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -371,7 +371,7 @@ def test_ones_like():
             gv = R.call_tir(Expected.ones, R.tuple(), R.Tensor((2, 3), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def ones(T_full: T.Buffer((T.int64(2), T.int64(3)), "int32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -406,7 +406,7 @@ def test_ones_like_symbolic():
             gv = R.call_tir(Expected.ones, R.tuple(), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def ones(var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -440,7 +440,7 @@ def test_zeros():
             gv = R.call_tir(Expected.zeros, R.tuple(), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def zeros(T_full: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -475,7 +475,7 @@ def test_zeros_symbolic():
             gv = R.call_tir(Expected.zeros, R.tuple(), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def zeros(var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -509,7 +509,7 @@ def test_zeros_like():
             gv = R.call_tir(Expected.zeros, R.tuple(), R.Tensor((2, 3), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def zeros(T_full: T.Buffer((T.int64(2), T.int64(3)), "int32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -544,7 +544,7 @@ def test_zeros_like_symbolic():
             gv = R.call_tir(Expected.zeros, R.tuple(), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def zeros(var_T_full: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -602,7 +602,7 @@ def test_arange_symbolic():
             gv = R.call_tir(cls.arange, R.tuple(), out_sinfo=R.Tensor((n // 
2,), dtype="int64"), tir_vars=R.shape([n]))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def arange(var_T_arange: T.handle, n: T.int64):
             T.func_attr({"tir.noalias": T.bool(True)})
             T_arange = T.match_buffer(var_T_arange, (n // T.int64(2),), 
"int64")
@@ -632,7 +632,7 @@ def test_tril():
             gv = R.call_tir(Expected.tril, (x,), R.Tensor((2, 3, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def tril(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), 
"float32"), trilu: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(3), T.int64(4)):
@@ -669,7 +669,7 @@ def test_tril_symbolic():
             gv = R.call_tir(Expected.tril, (x,), R.Tensor((m, n, k), 
dtype="int8"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def tril(var_rxplaceholder: T.handle, var_trilu: T.handle):
             T.func_attr({"tir.noalias": True})
             k = T.int64()
@@ -705,7 +705,7 @@ def test_triu():
             gv = R.call_tir(Expected.triu, (x,), R.Tensor((2, 3, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def triu(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), 
"float32"), trilu: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(3), T.int64(4)):
@@ -742,7 +742,7 @@ def test_triu_symbolic():
             gv = R.call_tir(Expected.triu, (x,), R.Tensor((m, n, k), 
dtype="int8"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def triu(var_rxplaceholder: T.handle, var_trilu: T.handle):
             T.func_attr({"tir.noalias": True})
             k = T.int64()
@@ -781,7 +781,7 @@ def test_astype():
             gv = R.call_tir(Expected.cast, (x,), R.Tensor((2, 3, 4), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def cast(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), 
"float32"), compute: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), "int32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(3), T.int64(4)):
@@ -837,7 +837,7 @@ def test_astype_symbolic():
             gv = R.call_tir(Expected.cast, (x,), R.Tensor((m, n), 
dtype="int32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def cast(var_rxplaceholder: T.handle, var_compute: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
diff --git a/tests/python/relax/test_transform_legalize_ops_grad.py 
b/tests/python/relax/test_transform_legalize_ops_grad.py
index 67d0b9194b..f38e1f971e 100644
--- a/tests/python/relax/test_transform_legalize_ops_grad.py
+++ b/tests/python/relax/test_transform_legalize_ops_grad.py
@@ -31,7 +31,7 @@ def test_nll_loss_backward():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss_backward(rxplaceholder: T.Buffer((), "float32"), 
rxplaceholder_1: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), 
"float32"), rxplaceholder_2: T.Buffer((T.int64(2), T.int64(4), T.int64(5)), 
"int64"), rxplaceholder_3: T.Buffer((T.int64(4),), "float32"), pred_grad: 
T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -94,7 +94,7 @@ def test_nll_loss_backward_no_weight():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_nll_loss_backward_no_weight(rxplaceholder: T.Buffer((), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_2: T.Buffer((T.int64(2), T.int64(4), 
T.int64(5)), "int64"), pred_grad: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -170,7 +170,7 @@ def test_nll_loss_backward_no_batch():
             gv = R.call_tir(cls.nll_loss_backward, (output_grad, predictions, 
targets, weights), out_sinfo=R.Tensor((4,), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss_backward(rxplaceholder: T.Buffer((), "float32"), 
rxplaceholder_1: T.Buffer((T.int64(4),), "float32"), rxplaceholder_2: 
T.Buffer((), "int64"), rxplaceholder_3: T.Buffer((T.int64(4),), "float32"), 
pred_grad: T.Buffer((T.int64(4),), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -215,7 +215,7 @@ def test_max_pool2d_backward():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def max_pool2d_backward(A: T.Buffer((T.int64(3), T.int64(2), 
T.int64(6), T.int64(5)), "float32"), B: T.Buffer((T.int64(3), T.int64(2), 
T.int64(10), T.int64(10)), "float32"), T_pool_grad: T.Buffer((T.int64(3), 
T.int64(2), T.int64(10), T.int64(10)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -271,7 +271,7 @@ def test_avg_pool2d_backward():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def avg_pool2d_backward(rxplaceholder: T.Buffer((T.int64(3), 
T.int64(2), T.int64(6), T.int64(5)), "float32"), rxplaceholder_1: 
T.Buffer((T.int64(3), T.int64(2), T.int64(10), T.int64(10)), "float32"), 
T_pool_grad: T.Buffer((T.int64(3), T.int64(2), T.int64(10), T.int64(10)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -306,7 +306,7 @@ def test_take_backward():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def take_backward(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, out_buf: T.Buffer((T.int64(3), 
T.int64(4), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             rxplaceholder = T.match_buffer(var_rxplaceholder, (T.int64(3), 
T.int64(2), T.int64(5)), offset_factor=1)
@@ -344,7 +344,7 @@ def test_take_backward_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def take_backward(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_take_backward: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             m, i = T.int64(), T.int64()
diff --git a/tests/python/relax/test_transform_legalize_ops_image.py 
b/tests/python/relax/test_transform_legalize_ops_image.py
index 18acb282c2..7c06ed46b6 100644
--- a/tests/python/relax/test_transform_legalize_ops_image.py
+++ b/tests/python/relax/test_transform_legalize_ops_image.py
@@ -37,7 +37,7 @@ def test_image_resize2d():
             gv = R.call_tir(Expected.resize2d, (x,), R.Tensor((2, 16, 16, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def resize2d(rxplaceholder: T.Buffer((T.int64(2), T.int64(8), 
T.int64(8), T.int64(3)), "float32"), resize: T.Buffer((T.int64(2), T.int64(16), 
T.int64(16), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(16), T.int64(16), 
T.int64(3)):
@@ -76,7 +76,7 @@ def test_image_resize2d_symbolic():
             gv = R.call_tir(Expected.resize2d, (x,), R.Tensor((n, c, oh, ow, 
16), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def resize2d(var_rxplaceholder: T.handle, var_resize: T.handle):
             T.func_attr({"tir.noalias": True})
             c = T.int64()
diff --git 
a/tests/python/relax/test_transform_legalize_ops_index_linear_algebra.py 
b/tests/python/relax/test_transform_legalize_ops_index_linear_algebra.py
index 8c10255741..2f1f8bb53b 100644
--- a/tests/python/relax/test_transform_legalize_ops_index_linear_algebra.py
+++ b/tests/python/relax/test_transform_legalize_ops_index_linear_algebra.py
@@ -40,7 +40,7 @@ def test_take():
             gv = R.call_tir(Expected.take, (x, indices), R.Tensor((2, 4, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def take(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), 
"float32"), rxplaceholder_1: T.Buffer(T.int64(4), "int64"), T_take: 
T.Buffer((T.int64(2), T.int64(4), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(4), T.int64(4)):
@@ -75,7 +75,7 @@ def test_take_symbolic():
             gv = R.call_tir(Expected.take, (x, indices), R.Tensor((m, i), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def take(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_T_take: T.handle):
             T.func_attr({"tir.noalias": True})
             i = T.int64()
@@ -112,7 +112,7 @@ def test_strided_slice():
             gv = R.call_tir(Expected.strided_slice, (x,), R.Tensor((4, 9, 10, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def strided_slice(rxplaceholder: T.Buffer((T.int64(8), T.int64(9), 
T.int64(10), T.int64(10)), "float32"), T_strided_slice_with_axes: 
T.Buffer((T.int64(4), T.int64(9), T.int64(10), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(9), T.int64(10), 
T.int64(3)):
@@ -143,7 +143,7 @@ def test_strided_slice_no_strides():
             gv = R.call_tir(Expected.strided_slice, (x,), 
out_sinfo=R.Tensor((7, 9, 10, 2), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def strided_slice(rxplaceholder: T.Buffer((T.int64(8), T.int64(9), 
T.int64(10), T.int64(10)), "float32"), T_strided_slice_with_axes: 
T.Buffer((T.int64(7), T.int64(9), T.int64(10), T.int64(2)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -192,7 +192,7 @@ def test_strided_slice_symbolic():
             gv = R.call_tir(Expected.strided_slice, (x,), R.Tensor((3, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def strided_slice(var_rxplaceholder: T.handle, 
var_T_strided_slice_with_axes: T.handle):
             T.func_attr({"tir.noalias": True})
             n = T.int64()
@@ -220,7 +220,7 @@ def test_dynamic_strided_slice():
             return gv
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def dynamic_strided_slice(
             rxplaceholder: T.Buffer(
                 (T.int64(8), T.int64(9), T.int64(10), T.int64(10)), "float32"
@@ -265,7 +265,7 @@ def test_dynamic_strided_slice():
                         + v_ax3 * rxplaceholder_3[T.int64(3)],
                     ]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def shape_func(
             rxplaceholder: T.Buffer(
                 (T.int64(8), T.int64(9), T.int64(10), T.int64(10)), "float32"
@@ -512,7 +512,7 @@ def test_dynamic_strided_slice_symbolic():
             return gv
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def dynamic_strided_slice(
             var_rxplaceholder: T.handle,
             rxplaceholder: T.Buffer((T.int64(2),), "int64"),
@@ -547,7 +547,7 @@ def test_dynamic_strided_slice_symbolic():
                         + v_ax1 * rxplaceholder_2[T.int64(1)],
                     ]
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def shape_func(
             var_rxplaceholder: T.handle,
             rxplaceholder: T.Buffer((T.int64(2),), "int64"),
@@ -718,7 +718,7 @@ def test_matmul_1_4():
             gv = R.call_tir(Expected.matmul, (x, y), R.Tensor((2, 3, 5), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer(T.int64(4), "float32"), 
rxplaceholder_1: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), 
"float32"), matmul: T.Buffer((T.int64(2), T.int64(3), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(3), T.int64(5), 
T.int64(4)):
@@ -751,7 +751,7 @@ def test_matmul_4_1():
             gv = R.call_tir(Expected.matmul, (x, y), R.Tensor((2, 3, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_1: T.Buffer(T.int64(5), 
"float32"), matmul: T.Buffer((T.int64(2), T.int64(3), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)):
@@ -784,7 +784,7 @@ def test_matmul_1_1():
             gv = R.call_tir(Expected.matmul, (x, y), R.Tensor((), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer(T.int64(4), "float32"), 
rxplaceholder_1: T.Buffer(T.int64(4), "float32"), matmul: T.Buffer((), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for i0 in T.serial(T.int64(4)):
@@ -817,7 +817,7 @@ def test_matmul_4_5():
             gv = R.call_tir(Expected.matmul, (x, y), R.Tensor((6, 2, 3, 4, 7), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float16"), rxplaceholder_1: T.Buffer((T.int64(6), 
T.int64(2), T.int64(3), T.int64(5), T.int64(7)), "float16"), matmul: 
T.Buffer((T.int64(6), T.int64(2), T.int64(3), T.int64(4), T.int64(7)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3, i4, i5 in T.grid(T.int64(6), T.int64(2), 
T.int64(3), T.int64(4), T.int64(7), T.int64(5)):
@@ -860,7 +860,7 @@ def test_matmul_4_5_symbolic():
             gv = R.call_tir(Expected.matmul, (x, y), R.Tensor((a, b, c, m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_matmul: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -897,7 +897,7 @@ def test_matmul_batching_dim_1():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def matmul(A: T.Buffer((T.int64(1), T.int64(1), T.int64(4), 
T.int64(5)), "float32"), B: T.Buffer((T.int64(1), T.int64(1), T.int64(5), 
T.int64(7)), "float32"), matmul_1: T.Buffer((T.int64(1), T.int64(1), 
T.int64(4), T.int64(7)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -940,7 +940,7 @@ def test_einsum():
             gv = R.call_tir(cls.einsum, (x, y), out_sinfo=R.Tensor((2, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def einsum(
             rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"),
             rxplaceholder_1: T.Buffer((T.int64(3), T.int64(4)), "float32"),
@@ -987,7 +987,7 @@ def test_einsum_symbolic():
             gv = R.call_tir(cls.einsum, (x, y), out_sinfo=R.Tensor((a, c), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def einsum(
             var_rxplaceholder: T.handle,
             var_rxplaceholder_1: T.handle,
diff --git a/tests/python/relax/test_transform_legalize_ops_manipulate.py 
b/tests/python/relax/test_transform_legalize_ops_manipulate.py
index 5f556730d9..09cad024df 100644
--- a/tests/python/relax/test_transform_legalize_ops_manipulate.py
+++ b/tests/python/relax/test_transform_legalize_ops_manipulate.py
@@ -42,7 +42,7 @@ def test_broadcast_to():
             gv = R.call_tir(Expected.broadcast_to, (x,), R.Tensor((4, 2, 5, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def broadcast_to(rxplaceholder: T.Buffer((T.int64(2), T.int64(1), 
T.int64(3)), "float32"), T_broadcast_to: T.Buffer((T.int64(4), T.int64(2), 
T.int64(5), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(4), T.int64(2), T.int64(5), 
T.int64(3)):
@@ -81,7 +81,7 @@ def test_broadcast_to_symbolic():
             gv = R.call_tir(Expected.broadcast_to, (x,), R.Tensor((a, b, c, 
d), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def broadcast_to(var_rxplaceholder: T.handle, var_T_broadcast_to: 
T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -118,7 +118,7 @@ def test_concat():
             gv = R.call_tir(Expected.concatenate, (x1, x2, x3), R.Tensor((1, 
9, 3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def concatenate(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(1), T.int64(3), 
T.int64(3)), "float32"), rxplaceholder_2: T.Buffer((T.int64(1), T.int64(4), 
T.int64(3)), "float32"), T_concat: T.Buffer((T.int64(1), T.int64(9), 
T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(1), T.int64(9), T.int64(3)):
@@ -151,7 +151,7 @@ def test_concat_input_tuple_var():
             gv2 = R.call_tir(Expected.concatenate, (gv, gv1), R.Tensor((3, 9), 
dtype="float32"))
             return gv2
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def concatenate(rxplaceholder: T.Buffer((T.int64(3), T.int64(4)), 
"float32"), rxplaceholder_1: T.Buffer((T.int64(3), T.int64(5)), "float32"), 
T_concat: T.Buffer((T.int64(3), T.int64(9)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(3), T.int64(9)):
@@ -193,7 +193,7 @@ def test_concat_input_tuple_var_symbolic():
             gv3 = R.call_tir(Expected.concatenate, (gv, gv1, gv2), 
R.Tensor((a, ((b0 + b1) + b2)), dtype="float32"))
             return gv3
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def concatenate(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_T_concat: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -232,7 +232,7 @@ def test_expand_dims():
             gv = R.call_tir(Expected.expand_dims, (x,), R.Tensor((2, 1, 1, 1, 
3, 1, 4, 1), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def expand_dims(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4)), "float32"), expand_dims: T.Buffer((T.int64(2), T.int64(1), 
T.int64(1), T.int64(1), T.int64(3), T.int64(1), T.int64(4), T.int64(1)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3, i4, i5, i6, i7 in T.grid(T.int64(2), 
T.int64(1), T.int64(1), T.int64(1), T.int64(3), T.int64(1), T.int64(4), 
T.int64(1)):
@@ -269,7 +269,7 @@ def test_expand_dims_symbolic():
             gv = R.call_tir(Expected.expand_dims, (x,), R.Tensor((a, 1, b, 1, 
c, 1), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def expand_dims(var_rxplaceholder: T.handle, var_expand_dims: 
T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -305,7 +305,7 @@ def test_flatten():
             gv = R.call_tir(Expected.reshape, (x,), R.Tensor((24,), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4)), "float32"), T_reshape: T.Buffer(T.int64(24), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0 in T.serial(T.int64(24)):
@@ -336,7 +336,7 @@ def test_flatten_zero_rank():
             gv = R.call_tir(Expected.reshape, (x,), R.Tensor((1,), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(rxplaceholder: T.Buffer((), "float32"), T_reshape: 
T.Buffer(T.int64(1), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0 in T.serial(T.int64(1)):
@@ -373,7 +373,7 @@ def test_flatten_symbolic():
             gv = R.call_tir(Expected.reshape, (x,), R.Tensor((((a * b) * c),), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(var_rxplaceholder: T.handle, var_T_reshape: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -409,7 +409,7 @@ def test_permute_dims():
             gv = R.call_tir(Expected.transpose, (x,), R.Tensor((2, 4, 3, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3), T.int64(4)), "float32"), T_transpose: T.Buffer((T.int64(2), 
T.int64(4), T.int64(3), T.int64(1)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(4), T.int64(3), 
T.int64(1)):
@@ -448,7 +448,7 @@ def test_permute_dims_symbolic():
             gv = R.call_tir(Expected.transpose, (x,), R.Tensor((b, d, c, a), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def transpose(var_rxplaceholder: T.handle, var_T_transpose: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -485,7 +485,7 @@ def test_reshape():
             gv = R.call_tir(Expected.reshape, (x,), R.Tensor((8, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(rxplaceholder: T.Buffer((T.int64(1), T.int64(2), 
T.int64(3), T.int64(4)), "float32"), T_reshape: T.Buffer((T.int64(8), 
T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(8), T.int64(3)):
@@ -512,7 +512,7 @@ def test_reshape():
     # After lowering, redundant var might be removed by later dead code 
elimination
     @tvm.script.ir_module
     class Expected2:
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(
             rxplaceholder: T.Buffer((T.int64(1), T.int64(2), T.int64(3), 
T.int64(4)), "float32"),
             T_reshape: T.Buffer((T.int64(8), T.int64(3)), "float32"),
@@ -569,7 +569,7 @@ def test_reshape_symbolic():
             gv = R.call_tir(Expected.reshape, (x,), R.Tensor(((a // 2), (b * 
2)), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(var_rxplaceholder: T.handle, var_T_reshape: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -609,7 +609,7 @@ def test_reshape_symbolic():
             gv = R.call_tir(Expected2.reshape, (x,), R.Tensor(((a // 2), (b * 
2)), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(var_rxplaceholder: T.handle, var_T_reshape: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -649,7 +649,7 @@ def test_reshape_symbolic():
     # After lowering, redundant var might be removed by later dead code 
elimination
     @I.ir_module
     class Expected3:
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(var_rxplaceholder: T.handle, var_T_reshape: T.handle):
             T.func_attr({"tir.noalias": True})
             b = T.int64()
@@ -704,7 +704,7 @@ def test_data_dependent_reshape():
     # fmt: off
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def reshape(
             rxplaceholder: T.Buffer((T.int64(3),), "int64"), var_T_reshape: 
T.handle
         ):
@@ -747,7 +747,7 @@ def test_split_by_indices():
             gv = R.call_tir(Expected.split, (x,), [R.Tensor((2, 3, 4), 
"float32"), R.Tensor((2, 4, 4), "float32"), R.Tensor((2, 3, 4), "float32")])
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def split(rxplaceholder: T.Buffer((T.int64(2), T.int64(10), 
T.int64(4)), "float32"), T_split: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4)), "float32"), T_split_1: T.Buffer((T.int64(2), T.int64(4), 
T.int64(4)), "float32"), T_split_2: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(3), T.int64(4)):
@@ -804,7 +804,7 @@ def test_split_by_indices_n_section_divisible():
             gv = R.call_tir(Expected.split, (x,), [R.Tensor((2, 5, 4), 
"float32"), R.Tensor((2, 5, 4), "float32")])
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def split(rxplaceholder: T.Buffer((T.int64(2), T.int64(10), 
T.int64(4)), "float32"), T_split_sections: T.Buffer((T.int64(2), T.int64(5), 
T.int64(4)), "float32"), T_split_sections_1: T.Buffer((T.int64(2), T.int64(5), 
T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(5), T.int64(4)):
@@ -845,7 +845,7 @@ def test_split_by_indices_n_section_divisible_symbolic():
             gv = R.call_tir(Expected.split, (x,), [R.Tensor((m, ((n * 3) // 
3)), "float32"), R.Tensor((m, ((((n * 3) // 3) * 2) - ((n * 3) // 3))), 
"float32"), R.Tensor((m, ((n * 3) - (((n * 3) // 3) * 2))), "float32")], 
tir_vars=(n,))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def split(var_rxplaceholder: T.handle, var_T_split_sections: T.handle, 
var_T_split_sections_1: T.handle, var_T_split_sections_2: T.handle, n: T.int64):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -893,7 +893,7 @@ def test_squeeze():
             gv = R.call_tir(Expected.squeeze, (x,), R.Tensor((2, 3, 1, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def squeeze(rxplaceholder: T.Buffer((T.int64(2), T.int64(1), 
T.int64(3), T.int64(1), T.int64(1), T.int64(4)), "float32"), T_squeeze: 
T.Buffer((T.int64(2), T.int64(3), T.int64(1), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(3), T.int64(1), 
T.int64(4)):
@@ -924,7 +924,7 @@ def test_squeeze_no_axis():
             gv = R.call_tir(Expected.squeeze, (x,), R.Tensor((2, 3, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def squeeze(rxplaceholder: T.Buffer((T.int64(2), T.int64(1), 
T.int64(3), T.int64(1), T.int64(1), T.int64(4)), "float32"), T_squeeze: 
T.Buffer((T.int64(2), T.int64(3), T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(2), T.int64(3), T.int64(4)):
@@ -959,7 +959,7 @@ def test_squeeze_symbolic():
             gv = R.call_tir(Expected.squeeze, (x,), R.Tensor((a, b, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def squeeze(var_rxplaceholder: T.handle, var_T_squeeze: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -994,7 +994,7 @@ def test_collapse_sum_like():
             gv = R.call_tir(Expected.collapse_sum, (x,), R.Tensor((1, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def collapse_sum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), rxplaceholder_red: T.Buffer((T.int64(1), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(1), T.int64(3), T.int64(2)):
@@ -1030,7 +1030,7 @@ def test_collapse_sum_to():
             gv = R.call_tir(Expected.collapse_sum, (x,), R.Tensor((2, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def collapse_sum(rxplaceholder: T.Buffer((T.int64(3), T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_red: T.Buffer((T.int64(2), T.int64(1)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for ax0, ax1, k0, k2 in T.grid(T.int64(2), T.int64(1), T.int64(3), 
T.int64(3)):
@@ -1063,7 +1063,7 @@ def test_repeat():
             gv = R.call_tir(Expected.repeat, (x,), out_sinfo=R.Tensor((6, 2, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def repeat(rxplaceholder: T.Buffer((T.int64(3), T.int64(2), 
T.int64(3)), "float32"), T_repeat: T.Buffer((T.int64(6), T.int64(2), 
T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -1097,7 +1097,7 @@ def test_repeat_no_axis():
             gv = R.call_tir(Expected.repeat, (x,), out_sinfo=R.Tensor((36,), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def repeat(
             rxplaceholder: T.Buffer((T.int64(3), T.int64(2), T.int64(3)), 
"float32"),
             T_repeat: T.Buffer((T.int64(36),), "float32"),
@@ -1144,7 +1144,7 @@ def test_repeat_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def repeat(var_rxplaceholder: T.handle, var_T_repeat: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1184,7 +1184,7 @@ def test_tile():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def tile(rxplaceholder: T.Buffer((T.int64(3), T.int64(2), T.int64(3)), 
"float32"), T_tile: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(9)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -1216,7 +1216,7 @@ def test_tile_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def tile(var_rxplaceholder: T.handle, var_T_tile: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1261,7 +1261,7 @@ def test_flip():
             gv = R.call_tir(cls.flip, (x,), out_sinfo=R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def flip(
             rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"),
             T_reverse_sequence: T.Buffer((T.int64(2), T.int64(3)), "float32"),
@@ -1303,7 +1303,7 @@ def test_flip_symbolic():
             gv = R.call_tir(cls.flip, (x,), out_sinfo=R.Tensor((a, b), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def flip(var_rxplaceholder: T.handle, var_T_reverse_sequence: 
T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             a, b = T.int64(), T.int64()
@@ -1334,7 +1334,7 @@ def test_scatter_elements():
             return gv
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def scatter_elements(
             var_rxplaceholder: T.handle,
             var_rxplaceholder_1: T.handle,
@@ -1436,7 +1436,7 @@ def test_scatter_elements_symbolic():
             return gv
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def scatter_elements(
             var_rxplaceholder: T.handle,
             var_rxplaceholder_1: T.handle,
@@ -1537,7 +1537,7 @@ def test_layout_transform():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_layout_transform(A: T.Buffer((T.int64(10), T.int64(21), 
T.int64(30)), "float32"), te_layout_transform_1: T.Buffer((T.int64(10), 
T.int64(30), T.int64(7), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -1574,7 +1574,7 @@ def test_layout_transform_with_pad():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_layout_transform_with_pad(A: T.Buffer((T.int64(10), 
T.int64(20), T.int64(30)), "float32"), te_layout_transform_with_pad_1: 
T.Buffer((T.int64(10), T.int64(30), T.int64(7), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -1611,7 +1611,7 @@ def test_layout_transform_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_layout_transform_with_pad(var_A: T.handle, 
var_te_layout_transform_with_pad: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             a, b, c = T.int64(), T.int64(), T.int64()
@@ -1655,7 +1655,7 @@ def test_layout_transform_with_pad_axis_sep():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def te_layout_transform_with_pad_axis_separator(A: 
T.Buffer((T.int64(10), T.int64(20), T.int64(30)), "float32"), 
var_te_layout_transform_with_pad_axis_separator: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             te_layout_transform_with_pad_axis_separator_1 = 
T.match_buffer(var_te_layout_transform_with_pad_axis_separator, (T.int64(10), 
T.int64(30), T.int64(7), T.int64(3)), axis_separators=[3])
diff --git a/tests/python/relax/test_transform_legalize_ops_nn.py 
b/tests/python/relax/test_transform_legalize_ops_nn.py
index d750901b59..0737b2784c 100644
--- a/tests/python/relax/test_transform_legalize_ops_nn.py
+++ b/tests/python/relax/test_transform_legalize_ops_nn.py
@@ -43,7 +43,7 @@ def test_conv1d():
             gv = R.call_tir(Expected.conv1d, (x, w), out_sinfo=R.Tensor((2, 
64, 13), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv1d(rxplaceholder: T.Buffer((T.int64(2), T.int64(128), 
T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(64), T.int64(16), 
T.int64(3)), "float32"), conv1d_ncw: T.Buffer((T.int64(2), T.int64(64), 
T.int64(13)), "float32")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer((T.int64(2), T.int64(128), T.int64(30)))
@@ -83,7 +83,7 @@ def test_conv1d_with_out_dtype():
             gv = R.call_tir(Expected.conv1d, (x, w), out_sinfo=R.Tensor((2, 4, 
26), dtype="float16"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv1d(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), T.int64(3), 
T.int64(3)), "float32"), conv1d_ncw: T.Buffer((T.int64(2), T.int64(4), 
T.int64(26)), "float16")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -124,7 +124,7 @@ def test_conv1d_nwc():
             gv = R.call_tir(Expected.conv1d, (x, w), out_sinfo=R.Tensor((2, 
26, 64), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv1d(rxplaceholder: T.Buffer((T.int64(2), T.int64(28), 
T.int64(128)), "float32"), rxplaceholder_1: T.Buffer((T.int64(64), 
T.int64(128), T.int64(3)), "float32"), conv1d_nwc: T.Buffer((T.int64(2), 
T.int64(26), T.int64(64)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -174,7 +174,7 @@ def test_conv1d_symbolic():
             gv = R.call_tir(Expected.conv1d, (x, kernel), 
out_sinfo=R.Tensor((n, f, w - kw + 1), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv1d(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_conv1d_ncw: T.handle):
             T.func_attr({"tir.noalias": True})
             n, c, w = T.int64(), T.int64(), T.int64()
@@ -220,7 +220,7 @@ def test_conv2d():
             gv = R.call_tir(Expected.conv2d, (x, w), R.Tensor((2, 64, 13, 13), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d(rxplaceholder: T.Buffer((T.int64(2), T.int64(128), 
T.int64(28), T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(64), 
T.int64(16), T.int64(3), T.int64(3)), "float32"), group_conv2d_nchw: 
T.Buffer((T.int64(2), T.int64(64), T.int64(13), T.int64(13)), "float32")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer([T.int64(2), T.int64(128), T.int64(30), 
T.int64(30)], dtype="float32")
@@ -260,7 +260,7 @@ def test_conv2d_with_out_dtype():
             gv = R.call_tir(Expected.conv2d, (x, w), R.Tensor((2, 4, 26, 26), 
dtype="float16"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(28), T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), 
T.int64(3), T.int64(3), T.int64(3)), "float32"), conv2d_nchw: 
T.Buffer((T.int64(2), T.int64(4), T.int64(26), T.int64(26)), "float16")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer([T.int64(2), T.int64(3), T.int64(28), 
T.int64(28)], dtype="float32")
@@ -300,7 +300,7 @@ def test_conv2d_nhwc():
             gv = R.call_tir(Expected.conv2d, (x, w), R.Tensor((2, 26, 26, 64), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d(rxplaceholder: T.Buffer((T.int64(2), T.int64(28), 
T.int64(28), T.int64(128)), "float32"), rxplaceholder_1: T.Buffer((T.int64(64), 
T.int64(128), T.int64(3), T.int64(3)), "float32"), conv2d_nhwc: 
T.Buffer((T.int64(2), T.int64(26), T.int64(26), T.int64(64)), "float32")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer([T.int64(2), T.int64(28), T.int64(28), 
T.int64(128)], dtype="float32")
@@ -352,7 +352,7 @@ def test_conv2d_symbolic():
             gv = R.call_tir(Expected.conv2d, (x, kernel), R.Tensor((n, f, ((h 
- kh) + 1), ((w - kw) + 1)), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_conv2d_nchw: T.handle):
             T.func_attr({"tir.noalias": True})
             c = T.int64()
@@ -402,7 +402,7 @@ def test_conv2d_transpose():
             gv = R.call_tir(Expected.conv2d_transpose, (x, w), 
out_sinfo=R.Tensor((2, 128, 56, 84), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d_transpose(rxplaceholder: T.Buffer((T.int64(2), 
T.int64(128), T.int64(28), T.int64(28)), "float32"), rxplaceholder_1: 
T.Buffer((T.int64(128), T.int64(16), T.int64(3), T.int64(3)), "float32"), 
compute: T.Buffer((T.int64(2), T.int64(128), T.int64(56), T.int64(84)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -457,7 +457,7 @@ def test_conv2d_transpose_with_out_dtype():
             gv = R.call_tir(Expected.conv2d_transpose, (x, w), 
out_sinfo=R.Tensor((2, 4, 30, 30), dtype="float16"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d_transpose(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(28), T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(3), 
T.int64(4), T.int64(3), T.int64(3)), "float32"), compute: T.Buffer((T.int64(2), 
T.int64(4), T.int64(30), T.int64(30)), "float16")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -519,7 +519,7 @@ def test_conv2d_transpose_symbolic():
             gv = R.call_tir(Expected.conv2d_transpose, (x, kernel), 
out_sinfo=R.Tensor((n, c, h * 3 + kh - 3, w * 3 + kw - 3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def conv2d_transpose(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_compute: T.handle):
             T.func_attr({"tir.noalias": True})
             n = T.int64()
@@ -584,7 +584,7 @@ def test_max_pool2d():
             gv = R.call_tir(Expected.max_pool2d, (x,), R.Tensor((4, 56, 56, 
6), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def max_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(112), 
T.int64(112), T.int64(6)), "float32"), pool_max: T.Buffer((T.int64(4), 
T.int64(56), T.int64(56), T.int64(6)), "float32")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer([T.int64(4), T.int64(114), T.int64(114), 
T.int64(6)], dtype="float32")
@@ -625,7 +625,7 @@ def test_max_pool2d_NCHW16c():
             gv = R.call_tir(Expected.max_pool2d, (x,), R.Tensor((4, 4, 110, 
110, 16), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def max_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(4), 
T.int64(112), T.int64(112), T.int64(16)), "float32"), pool_max: 
T.Buffer((T.int64(4), T.int64(4), T.int64(110), T.int64(110), T.int64(16)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3, i4, i5, i6 in T.grid(T.int64(4), T.int64(4), 
T.int64(110), T.int64(110), T.int64(16), T.int64(3), T.int64(3)):
@@ -659,7 +659,7 @@ def test_max_pool2d_ceil_mode():
             gv = R.call_tir(Expected.max_pool2d, (x,), R.Tensor((4, 6, 38, 
38), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def max_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(6), 
T.int64(112), T.int64(112)), "float32"), pool_max: T.Buffer((T.int64(4), 
T.int64(6), T.int64(38), T.int64(38)), "float32")):
             T.func_attr({"tir.noalias": True})
             pad_temp = T.alloc_buffer([T.int64(4), T.int64(6), T.int64(116), 
T.int64(116)], dtype="float32")
@@ -717,7 +717,7 @@ def test_avg_pool2d():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def avg_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(112), 
T.int64(112), T.int64(6)), "float32"), pool_avg: T.Buffer((T.int64(4), 
T.int64(56), T.int64(56), T.int64(6)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -766,7 +766,7 @@ def test_avg_pool2d_NCHW16c():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def avg_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(4), 
T.int64(112), T.int64(112), T.int64(16)), "float32"), pool_avg: 
T.Buffer((T.int64(4), T.int64(4), T.int64(110), T.int64(110), T.int64(16)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -808,7 +808,7 @@ def test_avg_pool2d_ceil_mode():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def avg_pool2d(rxplaceholder: T.Buffer((T.int64(4), T.int64(6), 
T.int64(112), T.int64(112)), "float32"), pool_avg: T.Buffer((T.int64(4), 
T.int64(6), T.int64(38), T.int64(38)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -885,7 +885,7 @@ def test_adaptive_avg_pool2d():
             gv = R.call_tir(Expected.adaptive_avg_pool2d, (x,), R.Tensor((2, 
4, 1, 1, 16), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def adaptive_avg_pool2d(rxplaceholder: T.Buffer((T.int64(2), 
T.int64(4), T.int64(7), T.int64(7), T.int64(16)), "float32"), 
adaptive_pool_avg: T.Buffer((T.int64(2), T.int64(4), T.int64(1), T.int64(1), 
T.int64(16)), "float32")):
             T.func_attr({"tir.noalias": True})
             adaptive_pool_sum = T.alloc_buffer([T.int64(2), T.int64(4), 
T.int64(1), T.int64(1), T.int64(16)], dtype="float32")
@@ -926,7 +926,7 @@ def test_adaptive_avg_pool2d_without_output_size():
             gv = R.call_tir(Expected.adaptive_avg_pool2d, (x,), R.Tensor((2, 
16, 7, 7), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def adaptive_avg_pool2d(rxplaceholder: T.Buffer((T.int64(2), 
T.int64(16), T.int64(7), T.int64(7)), "float32"), adaptive_pool_avg: 
T.Buffer((T.int64(2), T.int64(16), T.int64(7), T.int64(7)), "float32")):
             T.func_attr({"tir.noalias": True})
             adaptive_pool_sum = T.alloc_buffer([T.int64(2), T.int64(16), 
T.int64(7), T.int64(7)], dtype="float32")
@@ -986,7 +986,7 @@ def test_relu():
             gv = R.call_tir(Expected.relu, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
compute: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1021,7 +1021,7 @@ def test_relu_symbolic():
             gv = R.call_tir(Expected.relu, (x,), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def relu(var_rxplaceholder: T.handle, var_compute: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -1057,7 +1057,7 @@ def test_leakyrelu():
             gv = R.call_tir(Expected.leaky_relu, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def leaky_relu(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), compute: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1 in T.grid(T.int64(2), T.int64(3)):
@@ -1093,7 +1093,7 @@ def test_leakyrelu_symbolic():
             gv = R.call_tir(Expected.leaky_relu, (x, ), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def leaky_relu(var_rxplaceholder: T.handle, var_compute: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -1129,7 +1129,7 @@ def test_gelu():
             gv = R.call_tir(Expected.gelu, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def gelu(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
T_multiply: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             T_multiply_1 = T.alloc_buffer([T.int64(2), T.int64(3)], 
dtype="float32")
@@ -1192,7 +1192,7 @@ def test_gelu_symbolic():
             gv = R.call_tir(Expected.gelu, (x,), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def gelu(var_rxplaceholder: T.handle, var_T_multiply: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -1255,7 +1255,7 @@ def test_gelu_tanh():
             gv = R.call_tir(Expected.gelu_tanh, (x,), out_sinfo=R.Tensor((2, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def gelu_tanh(A: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
T_multiply: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             T_power = T.alloc_buffer((T.int64(2), T.int64(3)))
@@ -1332,7 +1332,7 @@ def test_gelu_tanh_symbolic():
             gv = R.call_tir(Expected.gelu_tanh, (x,), out_sinfo=R.Tensor((m, 
n), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def gelu_tanh(var_A: T.handle, var_T_multiply: T.handle):
             T.func_attr({"tir.noalias": T.bool(True)})
             m, n = T.int64(), T.int64()
@@ -1408,7 +1408,7 @@ def test_silu():
             gv = R.call_tir(Expected.silu, (x,), R.Tensor((2, 3), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def silu(rxplaceholder: T.Buffer((T.int64(2), T.int64(3)), "float32"), 
T_multiply: T.Buffer((T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             compute = T.alloc_buffer([T.int64(2), T.int64(3)], dtype="float32")
@@ -1450,7 +1450,7 @@ def test_silu_symbolic():
             gv = R.call_tir(Expected.silu, (x,), R.Tensor((m, n), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def silu(var_rxplaceholder: T.handle, var_T_multiply: T.handle):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -1492,7 +1492,7 @@ def test_softmax():
             gv = R.call_tir(Expected.softmax, (x,), R.Tensor((2, 3, 16, 32), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def softmax(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(16), T.int64(32)), "float32"), T_softmax_norm: T.Buffer((T.int64(2), 
T.int64(3), T.int64(16), T.int64(32)), "float32")):
             T.func_attr({"tir.noalias": True})
             T_softmax_maxelem = T.alloc_buffer([T.int64(2), T.int64(3), 
T.int64(32)], dtype="float32")
@@ -1555,7 +1555,7 @@ def test_softmax_symbolic():
             gv = R.call_tir(Expected.softmax, (x,), R.Tensor((a, b, c), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def softmax(var_rxplaceholder: T.handle, var_T_softmax_norm: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1617,7 +1617,7 @@ def test_log_softmax():
             gv = R.call_tir(Expected.log_softmax, (x,), R.Tensor((2, 3, 16, 
32), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def log_softmax(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(16), T.int64(32)), "float32"), compute: T.Buffer((T.int64(2), 
T.int64(3), T.int64(16), T.int64(32)), "float32"),):
             T.func_attr({"tir.noalias": True})
             T_softmax_maxelem = T.alloc_buffer([T.int64(2), T.int64(3), 
T.int64(32)], dtype="float32")
@@ -1674,7 +1674,7 @@ def test_log_softmax_symbolic():
             gv = R.call_tir(Expected.log_softmax, (x,), R.Tensor((a, b, c), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def log_softmax(var_rxplaceholder: T.handle, var_compute: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1729,7 +1729,7 @@ def test_cross_entropy_with_logits():
             gv = R.call_tir(Expected.cross_entropy_with_logits, (x, y), 
R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def cross_entropy_with_logits(rxplaceholder: T.Buffer(T.int64(3), 
"float32"), rxplaceholder_1: T.Buffer(T.int64(3), "float32"), T_multiply: 
T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             T_multiply_1 = T.alloc_buffer([T.int64(3)], dtype="float32")
@@ -1775,7 +1775,7 @@ def test_cross_entropy_with_logits_batch():
             gv = R.call_tir(Expected.cross_entropy_with_logits, (x, y), 
R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def cross_entropy_with_logits(rxplaceholder: T.Buffer((T.int64(2), 
T.int64(3)), "float32"), rxplaceholder_1: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), T_divide: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             T_multiply = T.alloc_buffer([T.int64(2), T.int64(3)], 
dtype="float32")
@@ -1829,7 +1829,7 @@ def test_cross_entropy_with_logits_batch_symbolic():
             gv = R.call_tir(Expected.cross_entropy_with_logits, (x, y), 
R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def cross_entropy_with_logits(var_rxplaceholder: T.handle, 
var_rxplaceholder_1: T.handle, T_divide: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             m = T.int64()
@@ -1880,7 +1880,7 @@ def test_batch_norm():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def batch_norm(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(28), T.int64(28)), "float32"), rxplaceholder_1: T.Buffer((T.int64(3),), 
"float32"), rxplaceholder_2: T.Buffer((T.int64(3),), "float32"), 
rxplaceholder_3: T.Buffer((T.int64(3),), "float32"), rxplaceholder_4: 
T.Buffer((T.int64(3),), "float32"), T_add: T.Buffer((T.int64(2), T.int64(3), 
T.int64(28), T.int64(28)), "float32"), T_add_1: T.Buffer((T.int64(3),), 
"float32"), T_add_2: T.Buffer((T.int64(3),), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -2109,7 +2109,7 @@ def test_batch_norm_symbolic():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def batch_norm(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_rxplaceholder_3: T.handle, 
var_rxplaceholder_4: T.handle, var_T_add: T.handle, var_T_add_1: T.handle, 
var_T_add_2: T.handle):
             T.func_attr({"tir.noalias": True})
             n = T.int64()
@@ -2355,7 +2355,7 @@ def test_layer_norm():
             gv = R.call_tir(Expected.layer_norm, (x, gamma, beta), 
R.Tensor((2, 3, 4, 5), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def layer_norm(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_2: T.Buffer((T.int64(4), T.int64(5)), 
"float32"), T_layer_norm: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder_red_temp_v0 = T.alloc_buffer([T.int64(2), 
T.int64(3)], dtype="float32")
@@ -2394,7 +2394,7 @@ def test_layer_norm_fp16():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def layer_norm(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_T_layer_norm: T.handle):
             T.func_attr({"tir.noalias": True})
             rxplaceholder = T.match_buffer(var_rxplaceholder, (T.int64(2), 
T.int64(3), T.int64(4), T.int64(5)), "float16")
@@ -2468,7 +2468,7 @@ def test_layer_norm_symbolic():
             gv = R.call_tir(Expected.layer_norm, (x, gamma, beta), 
R.Tensor((n, s, f), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def layer_norm(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_T_layer_norm: T.handle):
             T.func_attr({"tir.noalias": True})
             f = T.int64()
@@ -2514,7 +2514,7 @@ def test_group_norm():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def group_norm(rxplaceholder: T.Buffer((T.int64(2), T.int64(4), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_1: T.Buffer((T.int64(4),), 
"float32"), rxplaceholder_2: T.Buffer((T.int64(4),), "float32"), T_reshape: 
T.Buffer((T.int64(2), T.int64(4), T.int64(4), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             T_reshape_1 = T.alloc_buffer((T.int64(2), T.int64(2), T.int64(2), 
T.int64(4), T.int64(5)))
@@ -2591,7 +2591,7 @@ def test_group_norm_fp16():
             gv = R.call_tir(Expected.group_norm, (x, gamma, beta), 
out_sinfo=R.Tensor((2, 4, 4, 5), dtype="float16"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def group_norm(rxplaceholder: T.Buffer((T.int64(2), T.int64(4), 
T.int64(4), T.int64(5)), "float16"), rxplaceholder_1: T.Buffer((T.int64(4),), 
"float16"), rxplaceholder_2: T.Buffer((T.int64(4),), "float16"), T_reshape: 
T.Buffer((T.int64(2), T.int64(4), T.int64(4), T.int64(5)), "float16")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -2671,7 +2671,7 @@ def test_group_norm_symbolic():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def group_norm(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, var_T_reshape: T.handle, c: T.int64):
             T.func_attr({"tir.noalias": True})
             n = T.int64()
@@ -2755,7 +2755,7 @@ def test_rms_norm():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def rms_norm(
             A: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), 
"float32"),
             B: T.Buffer((T.int64(4), T.int64(5)), "float32"),
@@ -2835,7 +2835,7 @@ def test_rms_norm_fp16():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def rms_norm(
             A: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), 
"float16"),
             B: T.Buffer((T.int64(4), T.int64(5)), "float16"),
@@ -2920,7 +2920,7 @@ def test_rms_norm_symbolic():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def rms_norm(
             var_A: T.handle, var_B: T.handle, var_T_rms_norm: T.handle
         ):
@@ -3000,7 +3000,7 @@ def test_rms_norm_no_bias():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def rms_norm(
             A: T.Buffer((T.int64(2), T.int64(3), T.int64(4), T.int64(5)), 
"float32"),
             B: T.Buffer((T.int64(4), T.int64(5)), "float32"),
@@ -3079,7 +3079,7 @@ def test_attention():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def attention_bias(A: T.Buffer((T.int64(4), T.int64(16), T.int64(32), 
T.int64(8)), "float32"), B: T.Buffer((T.int64(4), T.int64(8), T.int64(32), 
T.int64(8)), "float32"), C: T.Buffer((T.int64(4), T.int64(8), T.int64(32), 
T.int64(16)), "float32"), D: T.Buffer((T.int64(4), T.int64(32), T.int64(16), 
T.int64(8)), "float32"), T_transpose: T.Buffer((T.int64(4), T.int64(16), 
T.int64(32), T.int64(16)), "float32")):
             T.func_attr({"tir.noalias": T.bool(True)})
             # with T.block("root"):
@@ -3268,7 +3268,7 @@ def test_nll_loss():
             gv = R.call_tir(Expected.nll_loss, (predictions, targets, 
weights), R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_1: T.Buffer((T.int64(2), 
T.int64(4), T.int64(5)), "int64"), rxplaceholder_2: T.Buffer(T.int64(4), 
"float32"), T_divide: T.Buffer((), "float32"),):
             # function attr dict
             T.func_attr({"tir.noalias": True})
@@ -3333,7 +3333,7 @@ def test_nll_no_weight():
             gv = R.call_tir(Expected.nll_loss_without_weight, (predictions, 
targets), R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss_without_weight(rxplaceholder: T.Buffer((T.int64(2), 
T.int64(3), T.int64(4), T.int64(5)), "float32"), rxplaceholder_1: 
T.Buffer((T.int64(2), T.int64(4), T.int64(5)), "int64"), T_divide: T.Buffer((), 
"float32"),):
             # function attr dict
             T.func_attr({"tir.noalias": True})
@@ -3406,7 +3406,7 @@ def test_nll_no_batch():
             gv = R.call_tir(Expected.nll_loss, (predictions, targets, 
weights), out_sinfo=R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss(var_rxplaceholder: T.handle, rxplaceholder: T.Buffer((), 
"int64"), var_rxplaceholder_1: T.handle, T_divide: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             C = T.int64()
@@ -3453,7 +3453,7 @@ def test_nll_loss_symbolic():
             gv = R.call_tir(Expected.nll_loss, (predictions, targets, 
weights), R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def nll_loss(var_rxplaceholder: T.handle, var_rxplaceholder_1: 
T.handle, var_rxplaceholder_2: T.handle, T_divide: T.Buffer((), "float32"),):
             # function attr dict
             T.func_attr({"tir.noalias": True})
diff --git 
a/tests/python/relax/test_transform_legalize_ops_search_statistical.py 
b/tests/python/relax/test_transform_legalize_ops_search_statistical.py
index b612bbbae5..5b81885790 100644
--- a/tests/python/relax/test_transform_legalize_ops_search_statistical.py
+++ b/tests/python/relax/test_transform_legalize_ops_search_statistical.py
@@ -41,7 +41,7 @@ def test_where():
             gv = R.call_tir(Expected.where, (condition, x, y), R.Tensor((3, 2, 
3), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def where(rxplaceholder: T.Buffer((T.int64(3), T.int64(2), 
T.int64(1)), "bool"), rxplaceholder_1: T.Buffer((T.int64(2), T.int64(3)), 
"float32"), rxplaceholder_2: T.Buffer((T.int64(2), T.int64(1)), "float32"), 
T_where: T.Buffer((T.int64(3), T.int64(2), T.int64(3)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2 in T.grid(T.int64(3), T.int64(2), T.int64(3)):
@@ -78,7 +78,7 @@ def test_where_symbolic():
             gv = R.call_tir(Expected.where, (condition, x, y), R.Tensor((a, b, 
c), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def where(var_rxplaceholder: T.handle, var_rxplaceholder_1: T.handle, 
var_rxplaceholder_2: T.handle, var_T_where: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -116,7 +116,7 @@ def test_argmax():
             gv = R.call_tir(Expected.argmax, (x,), out_sinfo=R.Tensor((2, 4, 
5), dtype="int64"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def argmax(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((T.int64(2), 
T.int64(4), T.int64(5)), "int64")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder_red_temp_v0 = T.alloc_buffer((T.int64(2), 
T.int64(4), T.int64(5)), "int64")
@@ -167,7 +167,7 @@ def test_argmax_symbolic():
             gv = R.call_tir(Expected.argmax, (x,), out_sinfo=R.Tensor((a, 1, 
c, d), dtype="int64"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def argmax(var_rxplaceholder: T.handle, var_rxplaceholder_red: 
T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -214,7 +214,7 @@ def test_argmin():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def argmin(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((), "int64")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder_red_temp_v0 = T.alloc_buffer((), "int64")
@@ -258,7 +258,7 @@ def test_argmin_symbolic():
 
     @tvm.script.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def argmin(var_rxplaceholder: T.handle, rxplaceholder_red: 
T.Buffer((T.int64(1), T.int64(1), T.int64(1), T.int64(1)), "int64")):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -316,7 +316,7 @@ def test_max():
             gv = R.call_tir(Expected.max, (x,), R.Tensor((2, 5), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def max(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((T.int64(2), T.int64(5)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(5), T.int64(3), 
T.int64(4)):
@@ -353,7 +353,7 @@ def test_max_symbolic():
             gv = R.call_tir(Expected.max, (x,), R.Tensor((a, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def max(var_rxplaceholder: T.handle, var_rxplaceholder_red: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -392,7 +392,7 @@ def test_min():
             gv = R.call_tir(Expected.min, (x,), R.Tensor((2, 1, 1, 5), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def min(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((T.int64(2), T.int64(1), 
T.int64(1), T.int64(5)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3, i4, i5 in T.grid(T.int64(2), T.int64(1), 
T.int64(1), T.int64(5), T.int64(3), T.int64(4)):
@@ -429,7 +429,7 @@ def test_min_symbolic():
             gv = R.call_tir(Expected.min, (x,), R.Tensor((a, 1, 1, d), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def min(var_rxplaceholder: T.handle, var_rxplaceholder_red: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -468,7 +468,7 @@ def test_sum():
             gv = R.call_tir(Expected.sum, (x,), R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def sum(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3 in T.grid(T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)):
@@ -501,7 +501,7 @@ def test_sum_symbolic():
             gv = R.call_tir(Expected.sum, (x,), R.Tensor((), dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def sum(var_rxplaceholder: T.handle, rxplaceholder_red: T.Buffer((), 
"float32")):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -539,7 +539,7 @@ def test_prod():
             gv = R.call_tir(Expected.prod, (x,), R.Tensor((1, 1, 1, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def prod(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), rxplaceholder_red: T.Buffer((T.int64(1), T.int64(1), 
T.int64(1), T.int64(1)), "float32")):
             T.func_attr({"tir.noalias": True})
             for i0, i1, i2, i3, i4, i5, i6, i7 in T.grid(T.int64(1), 
T.int64(1), T.int64(1), T.int64(1), T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)):
@@ -572,7 +572,7 @@ def test_prod_symbolic():
             gv = R.call_tir(Expected.prod, (x,), R.Tensor((1, 1, 1, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def prod(var_rxplaceholder: T.handle, rxplaceholder_red: 
T.Buffer((T.int64(1), T.int64(1), T.int64(1), T.int64(1)), "float32")):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -610,7 +610,7 @@ def test_mean():
             gv = R.call_tir(Expected.mean, (x,), R.Tensor((3, 4), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def mean(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), T_divide: T.Buffer((T.int64(3), T.int64(4)), 
"float32")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder_red = T.alloc_buffer([T.int64(3), T.int64(4)], 
dtype="float32")
@@ -654,7 +654,7 @@ def test_mean_symbolic():
             gv = R.call_tir(Expected.mean, (x,), R.Tensor((b, c), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def mean(var_rxplaceholder: T.handle, var_T_divide: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -695,7 +695,7 @@ def test_std():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def std(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), T.int64(4), 
T.int64(5)), "float32"), compute: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -772,7 +772,7 @@ def test_std_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def std(var_rxplaceholder: T.handle, compute: T.Buffer((), "float32")):
             T.func_attr({"tir.noalias": True})
             a, b, c, d = T.int64(), T.int64(), T.int64(), T.int64()
@@ -860,7 +860,7 @@ def test_variance():
             gv = R.call_tir(Expected.variance, (x,), R.Tensor((1, 3, 4, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def variance(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), T_divide: T.Buffer((T.int64(1), 
T.int64(3), T.int64(4), T.int64(1)), "float32")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder_red = T.alloc_buffer([T.int64(1), T.int64(3), 
T.int64(4), T.int64(1)], dtype="float32")
@@ -934,7 +934,7 @@ def test_variance_symbolic():
             gv = R.call_tir(Expected.variance, (x,), R.Tensor((1, b, c, 1), 
dtype="float32"))
             return gv
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def variance(var_rxplaceholder: T.handle, var_T_divide: T.handle):
             T.func_attr({"tir.noalias": True})
             a = T.int64()
@@ -1005,7 +1005,7 @@ def test_variance_no_keepdims():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def variance(rxplaceholder: T.Buffer((T.int64(2), T.int64(3), 
T.int64(4), T.int64(5)), "float32"), T_divide: T.Buffer((T.int64(3), 
T.int64(4)), "float32")):
             T.func_attr({"tir.noalias": True})
             # with T.block("root"):
@@ -1077,7 +1077,7 @@ def test_cumsum():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def cumsum(var_rxplaceholder: T.handle, out_buf: T.Buffer((T.int64(3), 
T.int64(2), T.int64(3)), "int32")):
             T.func_attr({"tir.noalias": True})
             rxplaceholder = T.match_buffer(var_rxplaceholder, (T.int64(3), 
T.int64(2), T.int64(3)), offset_factor=1)
@@ -1111,7 +1111,7 @@ def test_cumsum_symbolic():
 
     @I.ir_module
     class Expected:
-        @T.prim_func
+        @T.prim_func(private=True)
         def cumsum(var_rxplaceholder: T.handle, var_cumsum_generic: T.handle):
             T.func_attr({"tir.noalias": True})
             a, b, c = T.int64(), T.int64(), T.int64()
diff --git a/tests/python/relax/test_tvmscript_parser.py 
b/tests/python/relax/test_tvmscript_parser.py
index e917cc795c..bc324fe364 100644
--- a/tests/python/relax/test_tvmscript_parser.py
+++ b/tests/python/relax/test_tvmscript_parser.py
@@ -184,7 +184,7 @@ def test_unassigned_call_fail():
 def test_simple_module():
     @I.ir_module
     class TestModule:
-        @T.prim_func
+        @T.prim_func(private=True)
         def tir_func(
             x: T.Buffer((T.int64(128), T.int64(128)), "float32"),
             y: T.Buffer((T.int64(128), T.int64(128)), "float32"),
@@ -213,7 +213,7 @@ def test_simple_module():
 def test_emit_te_primfunc_attrs():
     @I.ir_module
     class TestModule:
-        @T.prim_func
+        @T.prim_func(private=True)
         def plus_one(
             x: T.Buffer((T.int64(128), T.int64(128)), "float32"),
             y: T.Buffer((T.int64(128), T.int64(128)), "float32"),
@@ -275,7 +275,7 @@ def test_module_with_attr_and_global_info():
             }
         )
 
-        @T.prim_func
+        @T.prim_func(private=True)
         def tir_func(
             x: T.Buffer((T.int64(128), T.int64(128)), "float32"),
             y: T.Buffer((T.int64(128), T.int64(128)), "float32"),

[tvm] branch unity-staging updated: [MERGE-FIX] Update the code to fix merge issues

Reply via email to