Hi,

On Thu, Sep 16, 2010 at 2:37 AM, Jose Fonseca <jfons...@vmware.com> wrote:
> Hi Igor,
>
> The overall intent is good, but this creates 4*64bit = 256 bit registers 
> which don't exist. LLVM can split into 128bit instructions, but I found that 
> to be buggy in some cases, and it affects our ability to use sse intrinsics.
>
> It is also unnecessary for vertical operations such as multiplication.
>
> So I'd prefer that you create a <2*double> vector, and issue two 
> multiplications per channel, and do the same for other double opcodes.
>
> Is there any double opcode for which this would not work?

No. as we are doing a operation by chan i believe we can do just one
mutiplication and lp_build_vec_type is enough to
create the vec type(patch bellow)

>
> A few minor details: instead of lp_types_to_double/lp_double_to_types, I'd 
> prefer cast_to_double, cast_from_double; and the double type should be 
> computed once and stored in the tgsi build context.

Ok. about the double type stored to be computed in tgsi build context
i do not think it is a good idea because tgsi just support float
operations we are doing a "hack" using double operations, the chans
when using double operations are float we are just doing something
like:

muld result.xy, a.yz, b,yz

so the hsb and msb are moved to x and y respectively.

But as i said before this is my first experience with llvm pipe :)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index ca8db9c..c9174ce 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -970,6 +970,56 @@ emit_kil(
       lp_build_mask_update(bld->mask, mask);
 }

+static LLVMValueRef
+lp_cast_to_double(struct lp_build_context *bld,
+                   LLVMValueRef a,
+                   LLVMValueRef b)
+{
+   struct lp_type type;
+   LLVMValueRef res;
+   LLVMTypeRef vec_type;
+   LLVMTypeRef vec_double_type;
+
+   assert(lp_check_value(bld->type, a));
+   assert(lp_check_value(bld->type, b));
+
+   type = lp_type_uint_vec(64);
+   vec_type = lp_build_vec_type(type);
+
+   a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, vec_type, "");
+
+   res = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type, 32),"");
+   res = LLVMBuildOr(bld->builder, res, b, "");
+
+   a = LLVMBuildBitCast(bld->builder, a, bld->vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+
+   type = lp_type_float_vec(64);
+   vec_double_type = lp_build_vec_type(type);
+   res = LLVMBuildBitCast(bld->builder, res, vec_double_type, "");
+
+   return res;
+}
+
+static void
+lp_cast_from_double(struct lp_build_context *bld,
+                    LLVMValueRef double_value,
+                    LLVMValueRef a,
+                    LLVMValueRef b)
+{
+   LLVMTypeRef double_type;
+   struct lp_type type = lp_type_uint_vec(64);
+
+   double_type = lp_build_vec_type(type);
+   a = LLVMBuildBitCast(bld->builder, double_value, double_type, "");
+
+   b = LLVMBuildAnd(bld->builder, a, lp_build_const_int_vec(type,
0x00000000FFFFFFFF), "");
+
+   a = LLVMBuildBitCast(bld->builder, a, bld->vec_type, "");
+   b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
+}
+

 /**
  * Predicated fragment kill.
@@ -1988,6 +2038,34 @@ emit_instruction(
    case TGSI_OPCODE_NOP:
       break;

+   case TGSI_OPCODE_DMUL:
+      if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) &&
IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
+         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
+         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
+
+         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
+         tmp3 = emit_fetch( bld, inst, 1, CHAN_Y );
+
+         src0 = lp_cast_to_double(&bld->base, tmp0, tmp1);
+         src1 = lp_cast_to_double(&bld->base, tmp2, tmp3);
+         tmp4 = lp_build_mul(&bld->base, src0, src1);
+         lp_cast_from_double(&bld->base, tmp4, dst0[CHAN_X], dst0[CHAN_Y]);
+      }
+
+      if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) &&
IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
+         tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
+         tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
+
+         tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
+         tmp3 = emit_fetch( bld, inst, 1, CHAN_W );
+
+         src0 = lp_cast_to_double(&bld->base, tmp0, tmp1);
+         src1 = lp_cast_to_double(&bld->base, tmp2, tmp3);
+         tmp4 = lp_build_mul(&bld->base, src0, src1);
+         lp_cast_from_double(&bld->base, tmp4, dst0[CHAN_Z], dst0[CHAN_W]);
+      }
+      break;
+
    default:
       return FALSE;
    }

------------------------------------------------------------------------------
Start uncovering the many advantages of virtual appliances
and start using them to simplify application deployment and
accelerate your shift to cloud computing.
http://p.sf.net/sfu/novell-sfdev2dev
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to