From: Pan Xiuli <xiuli....@intel.com> If last sub group has the max sub group size, it will return 0 instead of max sub group size. Fix the bug. V2: Remove useless check for threadn, threadn should always bigger than 1.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl index 9c09b21..8e22015 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl @@ -30,8 +30,8 @@ uint get_sub_group_size(void) { uint threadn = get_num_sub_groups(); uint threadid = get_sub_group_id(); - if((threadid == (threadn - 1)) && (threadn > 1)) - return (get_local_size(0)*get_local_size(1)*get_local_size(2)) % get_max_sub_group_size(); + if (threadid == (threadn - 1)) + return (get_local_size(0)*get_local_size(1)*get_local_size(2) -1) % get_max_sub_group_size() + 1; else return get_max_sub_group_size(); } -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet