This adds support for ARB_gpu_shader_fp64 and ARB_vertex_attrib_64bit to llvmpipe.
Two things that don't mix well are SoA and doubles, see emit_fetch_double, and emit_store_double_chan in this. I've also had to split emit_data.chan, to add src_chan, which can be different for doubles. Open issues: are intrinsics okay for floor/ceil? should and of these functions have CPU versions? tested with piglit, no regressions, all the fp64 tests seem to pass. Signed-off-by: Dave Airlie <[email protected]> --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 12 ++ src/gallium/auxiliary/gallivm/lp_bld_limits.h | 1 + src/gallium/auxiliary/gallivm/lp_bld_logic.c | 2 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 47 +++- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 4 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 240 +++++++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h | 3 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 163 +++++++++++++- 8 files changed, 458 insertions(+), 14 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 9daa93e..8fba43f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1997,6 +1997,12 @@ lp_build_floor(struct lp_build_context *bld, LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMTypeRef vec_type = bld->vec_type; + if (type.width != 32) { + char intrinsic[32]; + util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width); + return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); + } + assert(type.width == 32); /* might want to handle doubles at some point */ inttype = type; @@ -2066,6 +2072,12 @@ lp_build_ceil(struct lp_build_context *bld, LLVMTypeRef int_vec_type = bld->int_vec_type; LLVMTypeRef vec_type = bld->vec_type; + if (type.width != 32) { + char intrinsic[32]; + util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width); + return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); + } + assert(type.width == 32); /* might want to handle doubles at some point */ inttype = type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 2851fd1..3db7261 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -132,6 +132,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param) case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 1; case PIPE_SHADER_CAP_DOUBLES: + return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 80b53e5..f724cfa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -81,7 +81,7 @@ lp_build_compare_ext(struct gallivm_state *gallivm, boolean ordered) { LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, lp_type_int_vec(32, 32 * type.length)); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index e391d8a..1887956 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -175,13 +175,52 @@ void lp_build_fetch_args( unsigned src; for (src = 0; src < emit_data->info->num_src; src++) { emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src, - emit_data->chan); + emit_data->src_chan); } emit_data->arg_count = emit_data->info->num_src; lp_build_action_set_dst_type(emit_data, bld_base, emit_data->inst->Instruction.Opcode); } +/** + * with doubles src and dst channels aren't 1:1. + * check the src/dst types for the opcode, + * 1. if neither is double then src == dst; + * 2. if dest is double + * - don't store to y or w + * - if src is double then src == dst. + * - else for f2d, d.xy = s.x + * - else for f2d, d.zw = s.y + * 3. if dst is single, src is double + * - map dst x,z to src xy; + * - map dst y,w to src zw; + */ +static int get_src_chan_idx(unsigned opcode, + int dst_chan_index) +{ + enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode); + enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode); + + if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE) + return dst_chan_index; + if (dtype == TGSI_TYPE_DOUBLE) { + if (dst_chan_index == 1 || dst_chan_index == 3) + return -1; + if (stype == TGSI_TYPE_DOUBLE) + return dst_chan_index; + if (dst_chan_index == 0) + return 0; + if (dst_chan_index == 2) + return 1; + } else { + if (dst_chan_index == 0 || dst_chan_index == 2) + return 0; + if (dst_chan_index == 1 || dst_chan_index == 3) + return 2; + } + return -1; +} + /* XXX: COMMENT * It should be assumed that this function ignores writemasks */ @@ -197,7 +236,6 @@ lp_build_tgsi_inst_llvm( struct lp_build_emit_data emit_data; unsigned chan_index; LLVMValueRef val; - bld_base->pc++; if (bld_base->emit_debug) { @@ -240,7 +278,12 @@ lp_build_tgsi_inst_llvm( /* Emit the instructions */ if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) { TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + int src_index = get_src_chan_idx(inst->Instruction.Opcode, chan_index); + /* ignore channels 1/3 in double dst */ + if (src_index == -1) + continue; emit_data.chan = chan_index; + emit_data.src_chan = src_index; if (!action->fetch_args) { lp_build_fetch_args(bld_base, &emit_data); } else { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 967373c..5809c5a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -338,6 +338,7 @@ struct lp_build_tgsi_context struct lp_build_context uint_bld; struct lp_build_context int_bld; + struct lp_build_context dbl_bld; /** This array stores functions that are used to transform TGSI opcodes to * LLVM instructions. */ @@ -349,6 +350,9 @@ struct lp_build_tgsi_context struct lp_build_tgsi_action sqrt_action; + struct lp_build_tgsi_action drsq_action; + + struct lp_build_tgsi_action dsqrt_action; const struct tgsi_shader_info *info; lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT]; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 9cb42b2..ca57e0e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -894,6 +894,121 @@ const struct lp_build_tgsi_action xpd_action = { xpd_emit /* emit */ }; +/* TGSI_OPCODE_D2F */ +static void +d2f_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildFPTrunc(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->base.vec_type, ""); +} + +/* TGSI_OPCODE_D2F */ +static void +d2i_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildFPToSI(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->base.int_vec_type, ""); +} + +/* TGSI_OPCODE_D2U */ +static void +d2u_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildFPToUI(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->base.int_vec_type, ""); +} + +static void +f2d_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildFPExt(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->dbl_bld.vec_type, ""); +} +static void +u2d_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildUIToFP(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->dbl_bld.vec_type, ""); +} + +static void +i2d_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = + LLVMBuildSIToFP(bld_base->base.gallivm->builder, + emit_data->args[0], + bld_base->dbl_bld.vec_type, ""); +} +/* TGSI_OPCODE_DMAD */ + +static void +dmad_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL, + emit_data->args[0], + emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DADD, tmp, emit_data->args[2]); +} + +/*.TGSI_OPCODE_DRCP.*/ +static void drcp_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef one; + one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f); + emit_data->output[emit_data->chan] = LLVMBuildFDiv( + bld_base->base.gallivm->builder, + one, emit_data->args[0], ""); +} + +/* TGSI_OPCODE_DFRAC */ +static void dfrac_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_floor(&bld_base->dbl_bld, + emit_data->args[0]); + emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder, + emit_data->args[0], tmp, ""); +} + void lp_set_default_actions(struct lp_build_tgsi_context * bld_base) { @@ -948,6 +1063,25 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit; bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit; + + bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit; + bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit; + bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit; + bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit; + + bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit; + bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit; + bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit; + + bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit; + bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit; + bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit; + + bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit; + + bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit; + bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit; + } /* CPU Only default actions */ @@ -1792,6 +1926,102 @@ xor_emit_cpu( emit_data->args[1]); } +static void +dabs_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld, + emit_data->args[0]); +} + +/* TGSI_OPCODE_DNEG (CPU Only) */ +static void +dneg_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld, + bld_base->dbl_bld.zero, + emit_data->args[0]); +} + +/* TGSI_OPCODE_DSET Helper (CPU Only) */ +static void +dset_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + unsigned pipe_func) +{ + LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func, + emit_data->args[0], emit_data->args[1]); + emit_data->output[emit_data->chan] = cond; +} + +/* TGSI_OPCODE_DSEQ (CPU Only) */ +static void +dseq_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL); +} + +/* TGSI_OPCODE_DSGE (CPU Only) */ +static void +dsge_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL); +} + +/* TGSI_OPCODE_DSLT (CPU Only) */ +static void +dslt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS); +} + +/* TGSI_OPCODE_DSNE (CPU Only) */ +static void +dsne_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL); +} + +/* Reciprical squareroot (CPU Only) */ +static void +drecip_sqrt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld, + emit_data->args[0]); +} + +static void +dsqrt_emit_cpu( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld, + emit_data->args[0]); +} + void lp_set_default_actions_cpu( struct lp_build_tgsi_context * bld_base) @@ -1864,4 +2094,14 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu; + + bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu; + } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h index fc7fdbd..1b3b01c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -71,6 +71,9 @@ struct lp_build_emit_data { */ unsigned chan; + /** + * This is used to specifed the src channel to read from for doubles */ + unsigned src_chan; /** The lp_build_tgsi_action::emit 'executes' the opcode and writes the * results to this array. */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 268379e..10372fe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1139,8 +1139,10 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base, case TGSI_TYPE_SIGNED: bld_fetch = &bld_base->int_bld; break; - case TGSI_TYPE_VOID: case TGSI_TYPE_DOUBLE: + bld_fetch = &bld_base->dbl_bld; + break; + case TGSI_TYPE_VOID: default: assert(0); bld_fetch = NULL; @@ -1241,13 +1243,18 @@ emit_fetch_constant( else { LLVMValueRef index; /* index into the const buffer */ LLVMValueRef scalar, scalar_ptr; - + struct lp_build_context *bld_broad = &bld_base->base; index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); scalar_ptr = LLVMBuildGEP(builder, consts_ptr, &index, 1, ""); + if (stype == TGSI_TYPE_DOUBLE) { + LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); + bld_broad = &bld_base->dbl_bld; + } scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - res = lp_build_broadcast_scalar(&bld_base->base, scalar); + res = lp_build_broadcast_scalar(bld_broad, scalar); } if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) { @@ -1258,6 +1265,45 @@ emit_fetch_constant( return res; } +/** + * Fetch double values from two separate channels. + * Doubles are stored split across two channels, like xy and zw. + * this functions creates a set of 16 floats, + * extracts the values from the two channels, + * puts them in the correct place, then casts to 8 doubles. + */ +static LLVMValueRef +emit_fetch_double( + struct lp_build_tgsi_context * bld_base, + enum tgsi_opcode_type stype, + LLVMValueRef input, + LLVMValueRef input2) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef res; + struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); + int i; + + /* need to create an array of floats interleaved, + then cast that to that to a double array */ + res = LLVMBuildBitCast(builder, bld_base->dbl_bld.undef, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); + + for (i = 0; i < bld_base->base.type.length * 2; i+=2) { + LLVMValueRef scalar, scalar2; + LLVMValueRef ii = lp_build_const_int32(gallivm, i); + LLVMValueRef ii1 = lp_build_const_int32(gallivm, i + 1); + LLVMValueRef si = lp_build_const_int32(gallivm, i >> 1); + + scalar = LLVMBuildExtractElement(builder, input, si, ""); + res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); + scalar2 = LLVMBuildExtractElement(builder, input2, si, ""); + res = LLVMBuildInsertElement(builder, res, scalar2, ii1, ""); + } + return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); +} + static LLVMValueRef emit_fetch_immediate( struct lp_build_tgsi_context * bld_base, @@ -1309,12 +1355,16 @@ emit_fetch_immediate( } else { res = bld->immediates[reg->Register.Index][swizzle]; + if (stype == TGSI_TYPE_DOUBLE) + res = emit_fetch_double(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]); } if (stype == TGSI_TYPE_UNSIGNED) { res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); } else if (stype == TGSI_TYPE_SIGNED) { res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); + } else if (stype == TGSI_TYPE_DOUBLE) { + res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); } return res; } @@ -1357,12 +1407,27 @@ emit_fetch_input( if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { LLVMValueRef lindex = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); - LLVMValueRef input_ptr = LLVMBuildGEP(builder, - bld->inputs_array, &lindex, 1, ""); + LLVMValueRef input_ptr = LLVMBuildGEP(builder, + bld->inputs_array, &lindex, 1, ""); + res = LLVMBuildLoad(builder, input_ptr, ""); + if (stype == TGSI_TYPE_DOUBLE) { + LLVMValueRef lindex1; + LLVMValueRef input_ptr2; + LLVMValueRef res2; + + lindex1 = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle + 1); + input_ptr2 = LLVMBuildGEP(builder, + bld->inputs_array, &lindex1, 1, ""); + res2 = LLVMBuildLoad(builder, input_ptr2, ""); + res = emit_fetch_double(bld_base, stype, res, res2); + } } else { res = bld->inputs[reg->Register.Index][swizzle]; + if (stype == TGSI_TYPE_DOUBLE) + res = emit_fetch_double(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]); } } @@ -1372,6 +1437,8 @@ emit_fetch_input( res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); } else if (stype == TGSI_TYPE_SIGNED) { res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); + } else if (stype == TGSI_TYPE_DOUBLE) { + res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); } return res; @@ -1413,7 +1480,7 @@ emit_fetch_gs_input( } else { attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); } - + if (reg->Dimension.Indirect) { vertex_index = get_indirect_index(bld, reg->Register.File, @@ -1436,6 +1503,8 @@ emit_fetch_gs_input( res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); } else if (stype == TGSI_TYPE_SIGNED) { res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); + } else if (stype == TGSI_TYPE_DOUBLE) { + res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); } return res; @@ -1480,6 +1549,14 @@ emit_fetch_temporary( LLVMValueRef temp_ptr; temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); res = LLVMBuildLoad(builder, temp_ptr, ""); + + if (stype == TGSI_TYPE_DOUBLE) { + LLVMValueRef temp_ptr2, res2; + + temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1); + res2 = LLVMBuildLoad(builder, temp_ptr2, ""); + res = emit_fetch_double(bld_base, stype, res, res2); + } } if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) { @@ -1648,6 +1725,49 @@ emit_fetch_predicate( } } +/** + * store an array of 8 doubles into two arrays of 8 floats + * i.e. + * value is d0, d1, d2, d3 etc. + * each double has high and low pieces x, y + * so gets stored into the separate channels as: + * chan_ptr = d0.x, d1.x, d2.x, d3.x + * chan_ptr2 = d0.y, d1.y, d2.y, d3.y + */ +static void +emit_store_double_chan(struct lp_build_tgsi_context *bld_base, + int dtype, + LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, + LLVMValueRef pred, + LLVMValueRef value) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *float_bld = &bld_base->base; + int i; + if (dtype == TGSI_TYPE_DOUBLE) { + LLVMValueRef temp, temp2; + + temp = float_bld->undef; + temp2 = float_bld->undef; + for (i = 0; i < bld_base->base.type.length * 2; i += 2) { + LLVMValueRef scalar, scalar2; + LLVMValueRef ii = lp_build_const_int32(gallivm, i); + LLVMValueRef ii1 = lp_build_const_int32(gallivm, i + 1); + LLVMValueRef si = lp_build_const_int32(gallivm, i >> 1); + scalar = LLVMBuildExtractElement(builder, value, ii, ""); + temp = LLVMBuildInsertElement(builder, temp, scalar, si, ""); + scalar2 = LLVMBuildExtractElement(builder, value, ii1, ""); + temp2 = LLVMBuildInsertElement(builder, temp2, scalar2, si, ""); + } + + lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr); + lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2); + } else { + lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, chan_ptr); + } +} /** * Register store. @@ -1721,13 +1841,21 @@ emit_store_chan( else { LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, chan_index); - lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr); + LLVMValueRef out_ptr2 = NULL; + if (dtype == TGSI_TYPE_DOUBLE) + out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, + chan_index + 1); + + emit_store_double_chan(bld_base, dtype, out_ptr, out_ptr2, pred, value); } break; case TGSI_FILE_TEMPORARY: /* Temporaries are always stored as floats */ - value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); + if (dtype != TGSI_TYPE_DOUBLE) + value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); + else + value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); if (reg->Register.Indirect) { LLVMValueRef index_vec; /* indexes into the temp registers */ @@ -1747,9 +1875,12 @@ emit_store_chan( &bld->exec_mask, pred); } else { - LLVMValueRef temp_ptr; + LLVMValueRef temp_ptr, temp_ptr2 = NULL; temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); - lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr); + if (dtype == TGSI_TYPE_DOUBLE) + temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index + 1); + + emit_store_double_chan(bld_base, dtype, temp_ptr, temp_ptr2, pred, value); } break; @@ -1818,13 +1949,16 @@ emit_store( { unsigned chan_index; struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - + enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); if(info->num_dst) { LLVMValueRef pred[TGSI_NUM_CHANNELS]; emit_fetch_predicate( bld, inst, pred ); TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + + if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3)) + continue; emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); } } @@ -2823,6 +2957,7 @@ void lp_emit_immediate_soa( lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); break; + case TGSI_IMM_FLOAT64: case TGSI_IMM_UINT32: for( i = 0; i < size; ++i ) { LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); @@ -3674,6 +3809,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); + { + struct lp_type dbl_type; + dbl_type = type; + dbl_type.width *= 2; + lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); + } bld.mask = mask; bld.inputs = inputs; bld.outputs = outputs; -- 2.4.3 _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
