This is an automated email from the ASF dual-hosted git repository.

ruihangl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f02606af2 [OPENCL] Always use convert_T for type conversion (#14972)
7f02606af2 is described below

commit 7f02606af2bc034d8ba2b09f938a58f8d9982855
Author: Tianqi Chen <[email protected]>
AuthorDate: Thu Jun 1 15:52:26 2023 -0400

    [OPENCL] Always use convert_T for type conversion (#14972)
    
    This PR changes the Cast in OpenCL to always relying on convert_T to get 
closer to the spec and more reliable.
---
 src/target/source/codegen_opencl.cc                | 10 +--
 .../python/unittest/test_target_codegen_opencl.py  | 80 ++++++++++------------
 .../unittest/test_target_texture_codegen_opencl.py | 38 +++++-----
 3 files changed, 61 insertions(+), 67 deletions(-)

diff --git a/src/target/source/codegen_opencl.cc 
b/src/target/source/codegen_opencl.cc
index 61a8ee8a57..fa4ca7d34b 100644
--- a/src/target/source/codegen_opencl.cc
+++ b/src/target/source/codegen_opencl.cc
@@ -370,17 +370,19 @@ std::string CodeGenOpenCL::CastFromTo(std::string value, 
DataType from, DataType
 
 std::string CodeGenOpenCL::CastTo(std::string value, DataType target) {
   std::ostringstream os;
-  if (target.lanes() == 1) {
-    os << "((";
+  if (target == DataType::Bool()) {
+    os << "(";
+    os << "(";
     this->PrintType(target, os);
     os << ")" << value << ")";
-  } else {  // convert vector type
+    return os.str();
+  } else {
     os << "(";
     os << "convert_";
     this->PrintType(target, os);
     os << "(" << value << "))";
+    return os.str();
   }
-  return os.str();
 }
 
 void CodeGenOpenCL::VisitStmt_(const AllocateNode* op) {
diff --git a/tests/python/unittest/test_target_codegen_opencl.py 
b/tests/python/unittest/test_target_codegen_opencl.py
index 67dc37363e..4a426c952b 100644
--- a/tests/python/unittest/test_target_codegen_opencl.py
+++ b/tests/python/unittest/test_target_codegen_opencl.py
@@ -32,10 +32,12 @@ def test_opencl_ternary_expression():
         max_lhs = tvm.tir.const(2, dtype=dtype)
         max_rhs = tvm.tir.if_then_else(A[0] > 0, true_value, false_value)
         C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name="C")
-        s = te.create_schedule(C.op)
-        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
-        fun = tvm.build(s, [A, C], target)
 
+        func = te.create_prim_func([A, C])
+        sch = tvm.tir.Schedule(func)
+        (x,) = sch.get_loops(sch.get_block("C"))
+        sch.bind(x, "threadIdx.x")
+        fun = tvm.build(sch.mod, target=target)
         a = tvm.nd.empty((n,), A.dtype, dev)
         c = tvm.nd.empty((n,), A.dtype, dev)
         # Only need to test compiling here
@@ -48,9 +50,11 @@ def test_opencl_ternary_expression():
         max_lhs = tvm.tir.const(2, dtype=dtype)
         max_rhs = tvm.tir.Select(A[0] > 0, true_value, false_value)
         C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name="C")
-        s = te.create_schedule(C.op)
-        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
-        fun = tvm.build(s, [A, C], target)
+        func = te.create_prim_func([A, C])
+        sch = tvm.tir.Schedule(func)
+        (x,) = sch.get_loops(sch.get_block("C"))
+        sch.bind(x, "threadIdx.x")
+        fun = tvm.build(sch.mod, target=target)
 
         a = tvm.nd.empty((n,), A.dtype, dev)
         c = tvm.nd.empty((n,), A.dtype, dev)
@@ -76,9 +80,11 @@ def test_opencl_inf_nan():
         A = te.placeholder((n,), name="A", dtype=dtype)
         inf_value = tvm.tir.const(value, dtype=dtype)
         C = te.compute((n,), lambda i: inf_value, name="C")
-        s = te.create_schedule(C.op)
-        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
-        fun = tvm.build(s, [A, C], target)
+        func = te.create_prim_func([A, C])
+        sch = tvm.tir.Schedule(func)
+        (x,) = sch.get_loops(sch.get_block("C"))
+        sch.bind(x, "threadIdx.x")
+        fun = tvm.build(sch.mod, target=target)
         a = tvm.nd.empty((n,), A.dtype, dev)
         c = tvm.nd.empty((n,), A.dtype, dev)
         # Only need to test compiling here
@@ -102,9 +108,11 @@ def test_opencl_max():
         max_lhs = A[0] + tvm.tir.const(1, dtype=dtype)
         max_rhs = tvm.tir.const(0, dtype=dtype)
         C = te.compute((n,), lambda i: tvm.te.max(max_lhs, max_rhs), name="C")
-        s = te.create_schedule(C.op)
-        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
-        fun = tvm.build(s, [A, C], target)
+        func = te.create_prim_func([A, C])
+        sch = tvm.tir.Schedule(func)
+        (x,) = sch.get_loops(sch.get_block("C"))
+        sch.bind(x, "threadIdx.x")
+        fun = tvm.build(sch.mod, target=target)
 
         a = tvm.nd.empty((n,), A.dtype, dev)
         c = tvm.nd.empty((n,), A.dtype, dev)
@@ -150,7 +158,7 @@ def test_opencl_type_casting():
                 tvm.tir.all(
                     *[
                         i // block_size == tvm.tir.const(3, "int32"),
-                        i % block_size == tvm.tir.const(3, "int32"),
+                        i % 3 == tvm.tir.const(1, "int32"),
                     ]
                 ),
                 tvm.tir.const(1, dtype),
@@ -158,42 +166,26 @@ def test_opencl_type_casting():
             ),
             name="C",
         )
-        s = te.create_schedule(C.op)
-        (tx, vx) = s[C].split(s[C].op.axis[0], factor=block_size)
-        s[C].vectorize(vx)
-        thrx = te.thread_axis("threadIdx.x")
-
-        s[C].bind(tx, thrx)
-        fun = tvm.build(s, [C], target)
-
+        # NOTE: test simple convert pattern
+        func = te.create_prim_func([C])
+        sch = tvm.tir.Schedule(func)
+        (x,) = sch.get_loops(sch.get_block("C"))
+        tx, vx = sch.split(x, factors=[None, block_size])
+        sch.bind(tx, "threadIdx.x")
+        sch.vectorize(vx)
+
+        fun = tvm.build(sch.mod, target=target)
         c = tvm.nd.empty((n,), dtype, ctx)
         assembly = fun.imported_modules[0].get_source()
-
-        if dtype == "float32":
-            false_branch = "((float4)(0.000000e+00f, 0.000000e+00f, 
0.000000e+00f, 0.000000e+00f))"
-            true_branch = "((float4)(1.000000e+00f, 1.000000e+00f, 
1.000000e+00f, 1.000000e+00f))"
-            lcond = 
"convert_int4(((convert_uint4(((uint4)((((int)get_local_id(0)) == 3), 
(((int)get_local_id(0)) == 3), (((int)get_local_id(0)) == 3), 
(((int)get_local_id(0)) == 3)))))"
-            rcond = "(convert_uint4((((int4)((0)+(1*0), (0)+(1*1), (0)+(1*2), 
(0)+(1*3))) == ((int4)(3, 3, 3, 3)))))"
-            cond = "({} && {})".format(lcond, rcond)
-            select = "select({}, {}, {})".format(false_branch, true_branch, 
cond)
-            count = assembly.count(select)
-            assert count == 1
-            fun(c)
-
-        elif dtype == "float16":
-            false_branch = "((half4)((half)0.000000e+00f, (half)0.000000e+00f, 
(half)0.000000e+00f, (half)0.000000e+00f))"
-            true_branch = "((half4)((half)1.000000e+00f, (half)1.000000e+00f, 
(half)1.000000e+00f, (half)1.000000e+00f))"
-            lcond = 
"convert_short4(((convert_uint4(((uint4)((((int)get_local_id(0)) == 3), 
(((int)get_local_id(0)) == 3), (((int)get_local_id(0)) == 3), 
(((int)get_local_id(0)) == 3)))))"
-            rcond = "(convert_uint4((((int4)((0)+(1*0), (0)+(1*1), (0)+(1*2), 
(0)+(1*3))) == ((int4)(3, 3, 3, 3)))))))"
-            cond = "({} && {})".format(lcond, rcond)
-            select = "select({}, {}, {})".format(false_branch, true_branch, 
cond)
-            count = assembly.count(select)
-            assert count == 1
-            fun(c)
+        lcond = 
"convert_int4(((convert_uint4(((uint4)(((convert_int(get_local_id(0))) == 3), 
((convert_int(get_local_id(0))) == 3), ((convert_int(get_local_id(0))) == 3), 
((convert_int(get_local_id(0))) == 3)))))"
+        rcond = 
"(convert_uint4(((((int4)(((convert_int(get_local_id(0))))+(1*0), 
((convert_int(get_local_id(0))))+(1*1), ((convert_int(get_local_id(0))))+(1*2), 
((convert_int(get_local_id(0))))+(1*3))) % ((int4)(3, 3, 3, 3))) == ((int4)(1, 
1, 1, 1))))))))"
+        pattern_cond = "({} && {})".format(lcond, rcond)
+        assert assembly.count(pattern_cond) != 0
+        fun(c)
 
     dev = tvm.device(target, 0)
 
-    check_type_casting(dev, 16, "float32")
+    check_type_casting(dev, 32, "float32")
     # fp16 is not yet supported in ci
     # check_type_casting(dev, 16, "float16")
 
diff --git a/tests/python/unittest/test_target_texture_codegen_opencl.py 
b/tests/python/unittest/test_target_texture_codegen_opencl.py
index 639159c495..5681dcf9e6 100644
--- a/tests/python/unittest/test_target_texture_codegen_opencl.py
+++ b/tests/python/unittest/test_target_texture_codegen_opencl.py
@@ -1466,8 +1466,8 @@ class TestSimpleTextureToScalarFP16:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((int)get_local_id(0)) % 40), (((((int)get_group_id(0)) & 1) * 20) + 
(((int)get_local_id(0)) / 40)))));",
-                "out[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = ((half)((float*)&v_)[(((int)get_group_id(0)) >> 
1)]);",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((convert_int(get_local_id(0))) % 40), 
((((convert_int(get_group_id(0))) & 1) * 20) + ((convert_int(get_local_id(0))) 
/ 40)))));",
+                "out[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
(convert_half(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)]));",
             ],
         ),
         # 2. Buffer (NCHW4c) -> Cast(FP16) -> Buffer (NCHW)
@@ -1475,7 +1475,7 @@ class TestSimpleTextureToScalarFP16:
             ["", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "out[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = ((half)p0_comp[((((((int)get_group_id(0)) & 1) * 
3200) + (((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))]);"
+                "out[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
(convert_half(p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) + 
((convert_int(get_local_id(0))) * 4)) + ((convert_int(get_group_id(0))) >> 
1))]));"
             ],
         ),
         # 3. Texture (NCHW4c) -> Cast(FP16) -> Texture (NCHW4c)
@@ -1483,8 +1483,8 @@ class TestSimpleTextureToScalarFP16:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["global.texture", (1, 1, 40, 40, 4)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((((int)get_group_id(0)) * 24) + ((int)get_local_id(0))) % 40), 
(((((int)get_group_id(0)) * 8) + (((int)get_local_id(0)) >> 3)) / 5))));",
-                "write_imageh(out, (int2)((((((int)get_group_id(0)) * 24) + 
((int)get_local_id(0))) % 40), (((((int)get_group_id(0)) * 8) + 
(((int)get_local_id(0)) >> 3)) / 5)), (convert_half4(v_)));",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((((convert_int(get_group_id(0))) * 24) + 
(convert_int(get_local_id(0)))) % 40), ((((convert_int(get_group_id(0))) * 8) + 
((convert_int(get_local_id(0))) >> 3)) / 5))));",
+                "write_imageh(out, (int2)(((((convert_int(get_group_id(0))) * 
24) + (convert_int(get_local_id(0)))) % 40), ((((convert_int(get_group_id(0))) 
* 8) + ((convert_int(get_local_id(0))) >> 3)) / 5)), (convert_half4(v_)));",
             ],
         ),
     )
@@ -1507,8 +1507,8 @@ class TestSimpleTextureToScalarFP32:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((int)get_local_id(0)) % 40), (((((int)get_group_id(0)) & 1) * 20) + 
(((int)get_local_id(0)) / 40)))));",
-                "out[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = ((float*)&v_)[(((int)get_group_id(0)) >> 1)];",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((convert_int(get_local_id(0))) % 40), 
((((convert_int(get_group_id(0))) & 1) * 20) + ((convert_int(get_local_id(0))) 
/ 40)))));",
+                "out[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
((float*)&v_)[((convert_int(get_group_id(0))) >> 1)];",
             ],
         ),
         # 2. Buffer (NCHW4c) -> Buffer (NCHW)
@@ -1516,7 +1516,7 @@ class TestSimpleTextureToScalarFP32:
             ["", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "out[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = p0_comp[((((((int)get_group_id(0)) & 1) * 3200) + 
(((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))];"
+                "out[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = p0_comp[(((((convert_int(get_group_id(0))) & 
1) * 3200) + ((convert_int(get_local_id(0))) * 4)) + 
((convert_int(get_group_id(0))) >> 1))];"
             ],
         ),
     )
@@ -1619,8 +1619,8 @@ class TestTextureToScalarReuseSSAFP16:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((int)get_local_id(0)) % 40), (((((int)get_group_id(0)) & 1) * 20) + 
(((int)get_local_id(0)) / 40)))));",
-                "out_sum[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = (((half)((float*)&v_)[(((int)get_group_id(0)) >> 
1)]) + (((half)((float*)&v_)[(((int)get_group_id(0)) >> 1)]) + 
((half)((float*)&v_)[(((int)get_group_id(0)) >> 1)])));",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((convert_int(get_local_id(0))) % 40), 
((((convert_int(get_group_id(0))) & 1) * 20) + ((convert_int(get_local_id(0))) 
/ 40)))));",
+                "out_sum[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
((convert_half(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)])) + 
((convert_half(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)])) + 
(convert_half(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)]))));",
             ],
         ),
         # 2. Buffer (NCHW4c) -> Cast(FP16) -> Buffer (NCHW)
@@ -1628,7 +1628,7 @@ class TestTextureToScalarReuseSSAFP16:
             ["", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "out_sum[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = (((half)p0_comp[((((((int)get_group_id(0)) & 1) * 
3200) + (((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))]) + 
(((half)p0_comp[((((((int)get_group_id(0)) & 1) * 3200) + 
(((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))]) + 
((half)p0_comp[((((((int)get_group_id(0)) & 1) * 3200) + 
(((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))])));"
+                " out_sum[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
((convert_half(p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) + 
((convert_int(get_local_id(0))) * 4)) + ((convert_int(get_group_id(0))) >> 
1))])) + ((convert_half(p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) 
+ ((convert_int(get_local_id(0))) * 4)) + ((convert_int(get_group_id(0))) >> 
1))])) + (convert_half(p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) 
+ ((c [...]
             ],
         ),
         # 3. Texture (NCHW4c) -> Cast(FP16) -> Texture (NCHW4c)
@@ -1636,8 +1636,8 @@ class TestTextureToScalarReuseSSAFP16:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["global.texture", (1, 1, 40, 40, 4)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((((int)get_group_id(0)) * 24) + ((int)get_local_id(0))) % 40), 
(((((int)get_group_id(0)) * 8) + (((int)get_local_id(0)) >> 3)) / 5))));",
-                "write_imageh(out_sum, (int2)((((((int)get_group_id(0)) * 24) 
+ ((int)get_local_id(0))) % 40), (((((int)get_group_id(0)) * 8) + 
(((int)get_local_id(0)) >> 3)) / 5)), ((convert_half4(v_)) + 
((convert_half4(v_)) + (convert_half4(v_)))));",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((((convert_int(get_group_id(0))) * 24) + 
(convert_int(get_local_id(0)))) % 40), ((((convert_int(get_group_id(0))) * 8) + 
((convert_int(get_local_id(0))) >> 3)) / 5))));",
+                "write_imageh(out_sum, 
(int2)(((((convert_int(get_group_id(0))) * 24) + 
(convert_int(get_local_id(0)))) % 40), ((((convert_int(get_group_id(0))) * 8) + 
((convert_int(get_local_id(0))) >> 3)) / 5)), ((convert_half4(v_)) + 
((convert_half4(v_)) + (convert_half4(v_)))));",
             ],
         ),
     )
@@ -1660,8 +1660,8 @@ class TestTextureToScalarReuseSSAFP32:
             ["global.texture", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)((((int)get_local_id(0)) % 40), (((((int)get_group_id(0)) & 1) * 20) + 
(((int)get_local_id(0)) / 40)))));",
-                "out_sum[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = (((float*)&v_)[(((int)get_group_id(0)) >> 1)] + 
(((float*)&v_)[(((int)get_group_id(0)) >> 1)] + 
((float*)&v_)[(((int)get_group_id(0)) >> 1)]));",
+                "float4 v_ = READ_IMAGEF(p0_comp, image_sampler, 
((int2)(((convert_int(get_local_id(0))) % 40), 
((((convert_int(get_group_id(0))) & 1) * 20) + ((convert_int(get_local_id(0))) 
/ 40)))));",
+                "out_sum[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = 
(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)] + 
(((float*)&v_)[((convert_int(get_group_id(0))) >> 1)] + 
((float*)&v_)[((convert_int(get_group_id(0))) >> 1)]));",
             ],
         ),
         # 2. Buffer (NCHW4c) -> Buffer (NCHW)
@@ -1669,7 +1669,7 @@ class TestTextureToScalarReuseSSAFP32:
             ["", (1, 1, 40, 40, 4)],
             ["", (1, 4, 40, 40)],
             [
-                "out_sum[((((int)get_group_id(0)) * 800) + 
((int)get_local_id(0)))] = (p0_comp[((((((int)get_group_id(0)) & 1) * 3200) + 
(((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))] + 
(p0_comp[((((((int)get_group_id(0)) & 1) * 3200) + (((int)get_local_id(0)) * 
4)) + (((int)get_group_id(0)) >> 1))] + p0_comp[((((((int)get_group_id(0)) & 1) 
* 3200) + (((int)get_local_id(0)) * 4)) + (((int)get_group_id(0)) >> 1))]));"
+                "out_sum[(((convert_int(get_group_id(0))) * 800) + 
(convert_int(get_local_id(0))))] = (p0_comp[(((((convert_int(get_group_id(0))) 
& 1) * 3200) + ((convert_int(get_local_id(0))) * 4)) + 
((convert_int(get_group_id(0))) >> 1))] + 
(p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) + 
((convert_int(get_local_id(0))) * 4)) + ((convert_int(get_group_id(0))) >> 1))] 
+ p0_comp[(((((convert_int(get_group_id(0))) & 1) * 3200) + 
((convert_int(get_local_id(0))) * 4)) + ((convert_ [...]
             ],
         ),
     )
@@ -1693,10 +1693,10 @@ class TestLocalArrayToTexture:
             (1, 2, 38, 38, 4),
             [
                 "float out_local[4];",
-                "float4 v_ = READ_IMAGEF(p1_comp, image_sampler, 
((int2)((((((int)get_group_id(0)) * 14) + ((int)get_local_id(0))) % 38), 
((((((int)get_group_id(0)) * 64) + (((int)get_local_id(0)) >> 1)) % 722) / 
19))));",
-                "float4 v__1 = READ_IMAGEF(p2_comp, image_sampler, ((int2)(rw, 
((((((((int)get_group_id(0)) * 32) + (((int)get_local_id(0)) >> 2)) / 361) * 
12) + (rcb * 3)) + rh))));",
+                "float4 v_ = READ_IMAGEF(p1_comp, image_sampler, 
((int2)(((((convert_int(get_group_id(0))) * 14) + 
(convert_int(get_local_id(0)))) % 38), (((((convert_int(get_group_id(0))) * 64) 
+ ((convert_int(get_local_id(0))) >> 1)) % 722) / 19))));",
+                "float4 v__1 = READ_IMAGEF(p2_comp, image_sampler, ((int2)(rw, 
(((((((convert_int(get_group_id(0))) * 32) + ((convert_int(get_local_id(0))) >> 
2)) / 361) * 12) + (rcb * 3)) + rh))));",
                 "out_local[cb_c] = (out_local[cb_c] + (((float*)&v_)[rcb] * 
((float*)&v__1)[cb_c]));",
-                "write_imagef(out, (int2)((((((int)get_group_id(0)) * 14) + 
((int)get_local_id(0))) % 38), (((((int)get_group_id(0)) * 64) + 
(((int)get_local_id(0)) >> 1)) / 19)), vload4(0, out_local + 0));",
+                "write_imagef(out, (int2)(((((convert_int(get_group_id(0))) * 
14) + (convert_int(get_local_id(0)))) % 38), ((((convert_int(get_group_id(0))) 
* 64) + ((convert_int(get_local_id(0))) >> 1)) / 19)), vload4(0, out_local + 
0));",
             ],
         ),
     )

Reply via email to