Homer,

It's better to enhance our autoneration script to generate the select
vector functions. The attached is the patch, please review and give
your comments, thanks.


On Mon, Sep 02, 2013 at 01:03:30PM +0800, Homer Hsing wrote:
> some data type was missing in vector version of built-in function "select"
> this patch adds missing versions.
> 
> Signed-off-by: Homer Hsing <[email protected]>
> ---
>  backend/src/ocl_stdlib.tmpl.h | 107 
> +++++++++++++++++++++++++++++++++---------
>  1 file changed, 84 insertions(+), 23 deletions(-)
> 
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index 7d8d480..8bb15e6 100644
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -1439,29 +1439,90 @@ DEF(float, int)
>  DEF(float, uint)
>  #undef DEF
>  
> -// This will be optimized out by LLVM and will output LLVM select 
> instructions
> -#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
> -INLINE_OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
> -  TYPE4 dst; \
> -  const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
> -  const TYPE x1 = src1.x; \
> -  const TYPE y0 = src0.y; \
> -  const TYPE y1 = src1.y; \
> -  const TYPE z0 = src0.z; \
> -  const TYPE z1 = src1.z; \
> -  const TYPE w0 = src0.w; \
> -  const TYPE w1 = src1.w; \
> -  dst.x = (cond.x & MASK) ? x1 : x0; \
> -  dst.y = (cond.y & MASK) ? y1 : y0; \
> -  dst.z = (cond.z & MASK) ? z1 : z0; \
> -  dst.w = (cond.w & MASK) ? w1 : w0; \
> -  return dst; \
> -}
> -DECL_SELECT4(int4, int, int4, 0x80000000)
> -DECL_SELECT4(int4, int, uint4, 0x80000000)
> -DECL_SELECT4(float4, float, int4, 0x80000000)
> -DECL_SELECT4(float4, float, uint4, 0x80000000)
> -#undef DECL_SELECT4
> +#define DEF2(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##2 select(TYPE##2 x, TYPE##2 y, COND_TYPE##2 z) { 
> \
> +    return (TYPE##2)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK))); \
> +  }
> +
> +#define DEF3(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##3 select(TYPE##3 x, TYPE##3 y, COND_TYPE##3 z) { 
> \
> +    return (TYPE##3)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK))); \
> +  }
> +
> +#define DEF4(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##4 select(TYPE##4 x, TYPE##4 y, COND_TYPE##4 z) { 
> \
> +    return (TYPE##4)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK))); \
> +  }
> +
> +#define DEF8(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##8 select(TYPE##8 x, TYPE##8 y, COND_TYPE##8 z) { 
> \
> +    return (TYPE##8)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> +      select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> +      select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> +      select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> +      select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK))); \
> +  }
> +
> +#define DEF16(TYPE, COND_TYPE, MASK) \
> +  INLINE_OVERLOADABLE TYPE##16 select(TYPE##16 x, TYPE##16 y, COND_TYPE##16 
> z) { \
> +    return (TYPE##16)(select(x.s0, y.s0, (COND_TYPE)(z.s0 & MASK)), \
> +      select(x.s1, y.s1, (COND_TYPE)(z.s1 & MASK)), \
> +      select(x.s2, y.s2, (COND_TYPE)(z.s2 & MASK)), \
> +      select(x.s3, y.s3, (COND_TYPE)(z.s3 & MASK)), \
> +      select(x.s4, y.s4, (COND_TYPE)(z.s4 & MASK)), \
> +      select(x.s5, y.s5, (COND_TYPE)(z.s5 & MASK)), \
> +      select(x.s6, y.s6, (COND_TYPE)(z.s6 & MASK)), \
> +      select(x.s7, y.s7, (COND_TYPE)(z.s7 & MASK)), \
> +      select(x.s8, y.s8, (COND_TYPE)(z.s8 & MASK)), \
> +      select(x.s9, y.s9, (COND_TYPE)(z.s9 & MASK)), \
> +      select(x.sa, y.sa, (COND_TYPE)(z.sa & MASK)), \
> +      select(x.sb, y.sb, (COND_TYPE)(z.sb & MASK)), \
> +      select(x.sc, y.sc, (COND_TYPE)(z.sc & MASK)), \
> +      select(x.sd, y.sd, (COND_TYPE)(z.sd & MASK)), \
> +      select(x.se, y.se, (COND_TYPE)(z.se & MASK)), \
> +      select(x.sf, y.sf, (COND_TYPE)(z.sf & MASK))); \
> +  }
> +
> +#define DEF(TYPE, COND_TYPE, MASK) \
> +  DEF2(TYPE, COND_TYPE, MASK) \
> +  DEF3(TYPE, COND_TYPE, MASK) \
> +  DEF4(TYPE, COND_TYPE, MASK) \
> +  DEF8(TYPE, COND_TYPE, MASK) \
> +  DEF16(TYPE, COND_TYPE, MASK)
> +
> +DEF(char, char, 0x80)
> +DEF(char, uchar, 0x80)
> +DEF(uchar, char, 0x80)
> +DEF(uchar, uchar, 0x80)
> +DEF(short, short, 0x8000)
> +DEF(short, ushort, 0x8000)
> +DEF(ushort, short, 0x8000)
> +DEF(ushort, ushort, 0x8000)
> +DEF(int, int, 0x80000000)
> +DEF(int, uint, 0x80000000)
> +DEF(uint, int, 0x80000000)
> +DEF(uint, uint, 0x80000000)
> +DEF(long, long, 0x8000000000000000UL)
> +DEF(long, ulong, 0x8000000000000000UL)
> +DEF(ulong, long, 0x8000000000000000UL)
> +DEF(ulong, ulong, 0x8000000000000000UL)
> +DEF(float, int, 0x80000000)
> +DEF(float, uint, 0x80000000)
> +#undef DEF
> +#undef DEF2
> +#undef DEF3
> +#undef DEF4
> +#undef DEF8
> +#undef DEF16
>  
>  /////////////////////////////////////////////////////////////////////////////
>  // Common Functions (see 6.11.4 of OCL 1.1 spec)
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/beignet
>From 2cc934695a8c35c9b74c8b6e331b62f769edb071 Mon Sep 17 00:00:00 2001
From: Zhigang Gong <[email protected]>
Date: Tue, 3 Sep 2013 14:30:46 +0800
Subject: [PATCH] GBE: Support builtin vector functions for select()
 autogeneration.

Signed-off-by: Zhigang Gong <[email protected]>
---
 backend/src/builtin_vector_proto.def |    4 ++--
 backend/src/gen_builtin_vector.py    |    5 ++++-
 backend/src/ocl_stdlib.tmpl.h        |   24 ------------------------
 3 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def
index 2a3daf2..37206a6 100644
--- a/backend/src/builtin_vector_proto.def
+++ b/backend/src/builtin_vector_proto.def
@@ -253,8 +253,8 @@ int any (igentype x)
 int all (igentype x)
 # XXX need to revisit select latter
 #gentype bitselect (gentype a, gentype b, gentype c)
-#gentype select (gentype a, gentype b, igentype c)
-#gentype select (gentype a, gentype b, ugentype c)
+gentype select (gentype a, gentype b, igentype c)
+gentype select (gentype a, gentype b, ugentype c)
 
 ##misc
 #gentypen shuffle (gentypem x, ugentypen mask)
diff --git a/backend/src/gen_builtin_vector.py b/backend/src/gen_builtin_vector.py
index 35e3a2a..cf023e7 100755
--- a/backend/src/gen_builtin_vector.py
+++ b/backend/src/gen_builtin_vector.py
@@ -311,7 +311,10 @@ class builtinProto():
                 if (isPointer(ptype)):
                     formatStr += '({} {} *)param{} + {:2d}'.format(ptype[2], ptype[0], n, j)
                 else:
-                    formatStr += 'param{}.s{:x}'.format(n, j)
+                    if (self.functionName == 'select' and n == 2):
+                        formatStr += '({})(param{}.s{:x} & (1 << (sizeof({})*8 - 1)))'.format(ptype[0], n, j, ptype[0])
+                    else:
+                        formatStr += 'param{}.s{:x}'.format(n, j)
 
             formatStr += ')'
 
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 2a20ee6..f3d2a8e 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1427,30 +1427,6 @@ DEF(float, int)
 DEF(float, uint)
 #undef DEF
 
-// This will be optimized out by LLVM and will output LLVM select instructions
-#define DECL_SELECT4(TYPE4, TYPE, COND_TYPE4, MASK) \
-INLINE_OVERLOADABLE TYPE4 select(TYPE4 src0, TYPE4 src1, COND_TYPE4 cond) { \
-  TYPE4 dst; \
-  const TYPE x0 = src0.x; /* Fix performance issue with CLANG */ \
-  const TYPE x1 = src1.x; \
-  const TYPE y0 = src0.y; \
-  const TYPE y1 = src1.y; \
-  const TYPE z0 = src0.z; \
-  const TYPE z1 = src1.z; \
-  const TYPE w0 = src0.w; \
-  const TYPE w1 = src1.w; \
-  dst.x = (cond.x & MASK) ? x1 : x0; \
-  dst.y = (cond.y & MASK) ? y1 : y0; \
-  dst.z = (cond.z & MASK) ? z1 : z0; \
-  dst.w = (cond.w & MASK) ? w1 : w0; \
-  return dst; \
-}
-DECL_SELECT4(int4, int, int4, 0x80000000)
-DECL_SELECT4(int4, int, uint4, 0x80000000)
-DECL_SELECT4(float4, float, int4, 0x80000000)
-DECL_SELECT4(float4, float, uint4, 0x80000000)
-#undef DECL_SELECT4
-
 /////////////////////////////////////////////////////////////////////////////
 // Common Functions (see 6.11.4 of OCL 1.1 spec)
 /////////////////////////////////////////////////////////////////////////////
-- 
1.7.9.5

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to