Re: [Mesa-dev] [PATCH 1/3] swr/rast: simdlib changes for clang/gcc

2017-07-21 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak  

> On Jul 20, 2017, at 5:09 PM, Tim Rowley  wrote:
> 
> Tested with clang-4.0 and gcc-6.3.
> ---
> .../swr/rasterizer/common/simdlib_512_avx512.inl   | 43 +-
> .../swr/rasterizer/common/simdlib_types.hpp|  2 +-
> 2 files changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl 
> b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
> index 7d90b7d1b0..7447d35ee2 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
> @@ -24,6 +24,21 @@
> #error Do not include this file directly, use "simdlib.hpp" instead.
> #endif
> 
> +#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER)
> +// gcc missing these intrinsics
> +#ifndef _mm512_cmpneq_ps_mask
> +#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ)
> +#endif
> +
> +#ifndef _mm512_cmplt_ps_mask
> +#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS)
> +#endif
> +
> +#ifndef _mm512_cmplt_pd_mask
> +#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS)
> +#endif
> +#endif
> +
> //
> // SIMD16 AVX512 (F) implementation
> //
> @@ -138,6 +153,17 @@ using SIMD256T = SIMD256Impl::AVX2Impl;
> }
> #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
> 
> +#define SIMD_EMU_IWRAPPER_2(op) \
> +static SIMDINLINE \
> +Integer SIMDCALL op(Integer a, Integer b)\
> +{\
> +return Integer\
> +{\
> +SIMD256T::op(a.v8[0], b.v8[0]),\
> +SIMD256T::op(a.v8[1], b.v8[1]),\
> +};\
> +}
> +
> private:
> static SIMDINLINE Integer vmask(__mmask8 m)
> {
> @@ -234,14 +260,6 @@ SIMD_IWRAPPER_1I(slli_epi32);   // return a 
> << ImmT
> SIMD_IWRAPPER_2(sllv_epi32);
> SIMD_IWRAPPER_1I(srai_epi32);   // return a >> ImmT   (int32)
> SIMD_IWRAPPER_1I(srli_epi32);   // return a >> ImmT   (uint32)
> -SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint)
> -
> -template  // same as srli_si, but with 
> Float cast to int
> -static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
> -{
> -return castsi_ps(srli_si(castps_si(a)));
> -}
> -
> SIMD_IWRAPPER_2(srlv_epi32);
> 
> //---
> @@ -443,10 +461,17 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, 
> SIMD256Impl::Integer b)
> return _mm512_inserti64x4(a, b, imm);
> }
> 
> +#if !defined(AVX512F_STRICT)
> SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm512_packs_epi16 
> and _mm512_packs_epi16
> SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm512_packs_epi32 
> and _mm512_packs_epi32
> SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm512_packus_epi16 
> and _mm512_packus_epi16
> SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm512_packus_epi32 
> and _mm512_packus_epi32
> +#else
> +SIMD_EMU_IWRAPPER_2(packs_epi16)
> +SIMD_EMU_IWRAPPER_2(packs_epi32)
> +SIMD_EMU_IWRAPPER_2(packus_epi16)
> +SIMD_EMU_IWRAPPER_2(packus_epi32)
> +#endif
> 
> static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz)
> // return a[swiz[i]] for each 32-bit lane i (float)
> {
> @@ -679,4 +704,4 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
> #undef SIMD_IWRAPPER_2
> #undef SIMD_IWRAPPER_2_
> #undef SIMD_IWRAPPER_2I
> -
> +#undef SIMD_EMU_IWRAPPER_2
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp 
> b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
> index 07775e7b83..bc23867c7b 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
> @@ -262,7 +262,7 @@ namespace SIMDImpl
> 
> namespace SIMD512Impl
> {
> -#if !defined(_MM_K0_REG)
> +#if !defined(__AVX512F__)
> // Define AVX512 types if not included via immintrin.h.
> // All data members of these types are ONLY to viewed
> // in a debugger.  Do NOT access them via code!
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] swr/rast: simdlib changes for clang/gcc

2017-07-20 Thread Tim Rowley
Tested with clang-4.0 and gcc-6.3.
---
 .../swr/rasterizer/common/simdlib_512_avx512.inl   | 43 +-
 .../swr/rasterizer/common/simdlib_types.hpp|  2 +-
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 7d90b7d1b0..7447d35ee2 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -24,6 +24,21 @@
 #error Do not include this file directly, use "simdlib.hpp" instead.
 #endif
 
+#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER)
+// gcc missing these intrinsics
+#ifndef _mm512_cmpneq_ps_mask
+#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ)
+#endif
+
+#ifndef _mm512_cmplt_ps_mask
+#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS)
+#endif
+
+#ifndef _mm512_cmplt_pd_mask
+#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS)
+#endif
+#endif
+
 //
 // SIMD16 AVX512 (F) implementation
 //
@@ -138,6 +153,17 @@ using SIMD256T = SIMD256Impl::AVX2Impl;
 }
 #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
 
+#define SIMD_EMU_IWRAPPER_2(op) \
+static SIMDINLINE \
+Integer SIMDCALL op(Integer a, Integer b)\
+{\
+return Integer\
+{\
+SIMD256T::op(a.v8[0], b.v8[0]),\
+SIMD256T::op(a.v8[1], b.v8[1]),\
+};\
+}
+
 private:
 static SIMDINLINE Integer vmask(__mmask8 m)
 {
@@ -234,14 +260,6 @@ SIMD_IWRAPPER_1I(slli_epi32);   // return a << 
ImmT
 SIMD_IWRAPPER_2(sllv_epi32);
 SIMD_IWRAPPER_1I(srai_epi32);   // return a >> ImmT   (int32)
 SIMD_IWRAPPER_1I(srli_epi32);   // return a >> ImmT   (uint32)
-SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint)
-
-template  // same as srli_si, but with 
Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
-{
-return castsi_ps(srli_si(castps_si(a)));
-}
-
 SIMD_IWRAPPER_2(srlv_epi32);
 
 //---
@@ -443,10 +461,17 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, 
SIMD256Impl::Integer b)
 return _mm512_inserti64x4(a, b, imm);
 }
 
+#if !defined(AVX512F_STRICT)
 SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm512_packs_epi16 
and _mm512_packs_epi16
 SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm512_packs_epi32 
and _mm512_packs_epi32
 SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm512_packus_epi16 
and _mm512_packus_epi16
 SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm512_packus_epi32 
and _mm512_packus_epi32
+#else
+SIMD_EMU_IWRAPPER_2(packs_epi16)
+SIMD_EMU_IWRAPPER_2(packs_epi32)
+SIMD_EMU_IWRAPPER_2(packus_epi16)
+SIMD_EMU_IWRAPPER_2(packus_epi32)
+#endif
 
 static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz)
// return a[swiz[i]] for each 32-bit lane i (float)
 {
@@ -679,4 +704,4 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
 #undef SIMD_IWRAPPER_2
 #undef SIMD_IWRAPPER_2_
 #undef SIMD_IWRAPPER_2I
-
+#undef SIMD_EMU_IWRAPPER_2
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
index 07775e7b83..bc23867c7b 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
@@ -262,7 +262,7 @@ namespace SIMDImpl
 
 namespace SIMD512Impl
 {
-#if !defined(_MM_K0_REG)
+#if !defined(__AVX512F__)
 // Define AVX512 types if not included via immintrin.h.
 // All data members of these types are ONLY to viewed
 // in a debugger.  Do NOT access them via code!
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev