This patch add correct vector addition and subtraction intrisics when
using Altivec with PPC. Current code uses default path and LLVM backend
ends up issuing carry-out arithmetic instruction while it is expected
saturated ones.

It also includes a fix for PowerPC where char are unsigned by default,
resulting in bogus values for vector shifting.

The patch corrects the lp_test_blend testcase. Any tips, advices,
comments?


>From d04d123ff7e9cdf1dad0261f6ce9c288d0e80af8 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <[email protected]>
Date: Mon, 5 Nov 2012 11:55:05 -0600
Subject: [PATCH 3/3] PowerPC: Add Altivec vector add/sub intrisics

This patch add correct vector addition and subtraction intrisics when
using Altivec with PPC. Current code uses default path and LLVM backend
ends up issuing carry-out arithmetic instruction while it is expected
saturated ones.

It also includes a fix for PowerPC where char are unsigned by default,
resulting in bogus values for vector shifting.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c    |   42 +++++++++++++++---------
 src/gallium/auxiliary/gallivm/lp_bld_swizzle.c |    2 +-
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 8eb906a..59efe86 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -365,14 +365,19 @@ lp_build_add(struct lp_build_context *bld,
       if(a == bld->one || b == bld->one)
         return bld->one;
 
-      if(util_cpu_caps.has_sse2 &&
-         type.width * type.length == 128 &&
-         !type.floating && !type.fixed) {
-         if(type.width == 8)
-            intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
-         if(type.width == 16)
-            intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
-      }
+      if (type.width * type.length == 128 &&
+          !type.floating && !type.fixed) {
+         if(util_cpu_caps.has_sse2) {
+           if(type.width == 8)
+             intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
+           if(type.width == 16)
+             intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
+         } else if (util_cpu_caps.has_altivec) {
+           if(type.width == 8)
+              intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
+           if(type.width == 16)
+              intrinsic = type.sign ? "llvm.ppc.altivec.vaddsws" : "llvm.ppc.altivec.vadduws";
+         }
    
       if(intrinsic)
          return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
@@ -636,14 +641,19 @@ lp_build_sub(struct lp_build_context *bld,
       if(b == bld->one)
         return bld->zero;
 
-      if(util_cpu_caps.has_sse2 &&
-         type.width * type.length == 128 &&
-         !type.floating && !type.fixed) {
-         if(type.width == 8)
-            intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
-         if(type.width == 16)
-            intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
-      }
+      if (type.width * type.length == 128 &&
+          !type.floating && !type.fixed) {
+         if (util_cpu_caps.has_sse2) {
+           if(type.width == 8)
+              intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
+           if(type.width == 16)
+              intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
+         } else if (util_cpu_caps.has_altivec) {
+           if(type.width == 8)
+              intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
+           if(type.width == 16)
+              intrinsic = type.sign ? "llvm.ppc.altivec.vsubsws" : "llvm.ppc.altivec.vsubuws";
+         }
    
       if(intrinsic)
          return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 201a348..e1466e1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -200,7 +200,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
        *   YYYY YYYY .... YYYY  <= output
        */
       struct lp_type type4;
-      const char shifts[4][2] = {
+      const int shifts[4][2] = {
          { 1,  2},
          {-1,  2},
          { 1, -2},
-- 
1.7.1

_______________________________________________
mesa-dev mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to