Forgot to mention, this passes the piglit arb_blend_func_extended tests. Oh and I forgot to remove the load_shader_output function in this patch (this was an attempt to factor out some more code but ultimately I reverted it as it made the code no bit simpler).
Roland Am 08.02.2013 04:19, schrieb srol...@vmware.com: > From: Roland Scheidegger <srol...@vmware.com> > > link up the fs outputs and blend inputs, and make sure the second blend source > is correctly loaded and converted (which is quite complex). > There's a slight refactoring of the monster generate_unswizzled_blend() > function where it makes sense to factor out alpha conversion (which needs > to run twice for dual source blend). > --- > src/gallium/drivers/llvmpipe/lp_bld_blend.h | 1 + > src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 13 +- > src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- > src/gallium/drivers/llvmpipe/lp_state_fs.c | 331 > +++++++++++++++++------ > src/gallium/drivers/llvmpipe/lp_test_blend.c | 3 +- > 5 files changed, 257 insertions(+), 93 deletions(-) > > diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h > b/src/gallium/drivers/llvmpipe/lp_bld_blend.h > index 4bd2867..249a345 100644 > --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h > +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h > @@ -62,6 +62,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, > LLVMValueRef src, > LLVMValueRef src_alpha, > LLVMValueRef src1, > + LLVMValueRef src1_alpha, > LLVMValueRef dst, > LLVMValueRef mask, > LLVMValueRef const_, > diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > index 8e9e7fe..c4d04a2 100644 > --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c > @@ -70,6 +70,7 @@ struct lp_build_blend_aos_context > LLVMValueRef src; > LLVMValueRef src_alpha; > LLVMValueRef src1; > + LLVMValueRef src1_alpha; > LLVMValueRef dst; > LLVMValueRef const_; > LLVMValueRef const_alpha; > @@ -94,6 +95,7 @@ lp_build_blend_factor_unswizzled(struct > lp_build_blend_aos_context *bld, > boolean alpha) > { > LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src; > + LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1; > LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : > bld->const_; > > switch (factor) { > @@ -123,8 +125,9 @@ lp_build_blend_factor_unswizzled(struct > lp_build_blend_aos_context *bld, > case PIPE_BLENDFACTOR_CONST_ALPHA: > return const_alpha; > case PIPE_BLENDFACTOR_SRC1_COLOR: > - case PIPE_BLENDFACTOR_SRC1_ALPHA: > return bld->src1; > + case PIPE_BLENDFACTOR_SRC1_ALPHA: > + return src1_alpha; > case PIPE_BLENDFACTOR_INV_SRC_COLOR: > if(!bld->inv_src) > bld->inv_src = lp_build_comp(&bld->base, bld->src); > @@ -147,8 +150,9 @@ lp_build_blend_factor_unswizzled(struct > lp_build_blend_aos_context *bld, > bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha); > return bld->inv_const_alpha; > case PIPE_BLENDFACTOR_INV_SRC1_COLOR: > - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: > return lp_build_comp(&bld->base, bld->src1); > + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: > + return lp_build_comp(&bld->base, src1_alpha); > default: > assert(0); > return bld->base.zero; > @@ -265,10 +269,13 @@ lp_build_blend_factor(struct lp_build_blend_aos_context > *bld, > * @param type data type of the pixel vector > * @param rt render target index > * @param src blend src > + * @param src_alpha blend src alpha (if not included in src) > * @param src1 second blend src (for dual source blend) > + * @param src1_alpha second blend src alpha (if not included in src1) > * @param dst blend dst > * @param mask optional mask to apply to the blending result > * @param const_ const blend color > + * @param const_alpha const blend color alpha (if not included in const_) > * @param swizzle swizzle values for RGBA > * > * @return the result of blending src and dst > @@ -282,6 +289,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, > LLVMValueRef src, > LLVMValueRef src_alpha, > LLVMValueRef src1, > + LLVMValueRef src1_alpha, > LLVMValueRef dst, > LLVMValueRef mask, > LLVMValueRef const_, > @@ -307,6 +315,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, > bld.dst = dst; > bld.const_ = const_; > bld.src_alpha = src_alpha; > + bld.src1_alpha = src1_alpha; > bld.const_alpha = const_alpha; > > /* Find the alpha channel if not provided seperately */ > diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c > b/src/gallium/drivers/llvmpipe/lp_screen.c > index f3bc516..b9c1567 100644 > --- a/src/gallium/drivers/llvmpipe/lp_screen.c > +++ b/src/gallium/drivers/llvmpipe/lp_screen.c > @@ -115,7 +115,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum > pipe_cap param) > case PIPE_CAP_SM3: > return 1; > case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: > - return 0; > + return 1; > case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: > return PIPE_MAX_SO_BUFFERS; > case PIPE_CAP_ANISOTROPIC_FILTER: > diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c > b/src/gallium/drivers/llvmpipe/lp_state_fs.c > index 2b31c14..c04fa5e 100644 > --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c > +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c > @@ -66,6 +66,7 @@ > #include "util/u_dump.h" > #include "util/u_string.h" > #include "util/u_simple_list.h" > +#include "util/u_dual_blend.h" > #include "os/os_time.h" > #include "pipe/p_shader_tokens.h" > #include "draw/draw_context.h" > @@ -247,6 +248,8 @@ generate_fs(struct gallivm_state *gallivm, > boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] > == 0 && > shader->info.base.num_inputs < 3 && > shader->info.base.num_instructions < 8); > + const boolean dual_source_blend = key->blend.rt[0].blend_enable && > + util_blend_state_is_dual(&key->blend, > 0); > unsigned attrib; > unsigned chan; > unsigned cbuf; > @@ -302,6 +305,12 @@ generate_fs(struct gallivm_state *gallivm, > color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color"); > } > } > + if (dual_source_blend) { > + assert(key->nr_cbufs <= 1); > + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { > + color[1][chan] = lp_build_alloca(gallivm, vec_type, "color1"); > + } > + } > > /* do triangle edge testing */ > if (partial_mask) { > @@ -414,8 +423,9 @@ generate_fs(struct gallivm_state *gallivm, > /* Color write */ > for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) > { > - if (shader->info.base.output_semantic_name[attrib] == > TGSI_SEMANTIC_COLOR && > - shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) > + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; > + if ((shader->info.base.output_semantic_name[attrib] == > TGSI_SEMANTIC_COLOR) && > + ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) > { > unsigned cbuf = shader->info.base.output_semantic_index[attrib]; > for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { > @@ -478,6 +488,8 @@ generate_fs_loop(struct gallivm_state *gallivm, > boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] > == 0 && > shader->info.base.num_inputs < 3 && > shader->info.base.num_instructions < 8); > + const boolean dual_source_blend = key->blend.rt[0].blend_enable && > + util_blend_state_is_dual(&key->blend, > 0); > unsigned attrib; > unsigned chan; > unsigned cbuf; > @@ -551,7 +563,15 @@ generate_fs_loop(struct gallivm_state *gallivm, > num_loop, "color"); > } > } > - > + if (dual_source_blend) { > + assert(key->nr_cbufs <= 1); > + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { > + out_color[1][chan] = lp_build_array_alloca(gallivm, > + > lp_build_vec_type(gallivm, > + type), > + num_loop, "color1"); > + } > + } > > > /* 'mask' will control execution based on quad's pixel alive/killed state > */ > @@ -656,10 +676,10 @@ generate_fs_loop(struct gallivm_state *gallivm, > /* Color write */ > for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) > { > - if (shader->info.base.output_semantic_name[attrib] == > TGSI_SEMANTIC_COLOR && > - shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) > + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; > + if ((shader->info.base.output_semantic_name[attrib] == > TGSI_SEMANTIC_COLOR) && > + ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) > { > - unsigned cbuf = shader->info.base.output_semantic_index[attrib]; > for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { > if(outputs[attrib][chan]) { > /* XXX: just initialize outputs to point at colors[] and > @@ -1283,6 +1303,142 @@ convert_from_blend_type(struct gallivm_state *gallivm, > } > } > > +static void > +load_shader_output(struct gallivm_state *gallivm, > + struct lp_type fs_type, > + const unsigned char swizzle[TGSI_NUM_CHANNELS], > + LLVMValueRef fs_out_color[TGSI_NUM_CHANNELS][4], > + LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS], > + LLVMValueRef* fs_mask, > + LLVMValueRef* src_mask, > + LLVMValueRef* src_alpha, > + const unsigned alpha_channel, > + const unsigned src_channels, > + const bool has_alpha, > + const unsigned num_fs, > + const unsigned dst_channels) > +{ > + LLVMBuilderRef builder = gallivm->builder; > + unsigned i, j; > + > + for (i = 0; i < num_fs; ++i) { > + /* Always load alpha for use in blending */ > + LLVMValueRef alpha = LLVMBuildLoad(builder, > fs_out_color[alpha_channel][i], ""); > + > + /* Load each channel */ > + for (j = 0; j < dst_channels; ++j) { > + fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[swizzle[j]][i], > ""); > + } > + > + /* If 3 channels then pad to include alpha for 4 element transpose */ > + /* > + * XXX If we include that here maybe could actually use it instead of > + * separate alpha for blending? > + */ > + if (dst_channels == 3 && !has_alpha) { > + fs_src[i][3] = alpha; > + } > + > + /* We split the row_mask and row_alpha as we want 128bit interleave */ > + if (fs_type.length == 8) { > + src_mask[i*2 + 0] = lp_build_extract_range(gallivm, fs_mask[i], 0, > src_channels); > + src_mask[i*2 + 1] = lp_build_extract_range(gallivm, fs_mask[i], > src_channels, src_channels); > + > + src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, > src_channels); > + src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, > src_channels, src_channels); > + } else { > + src_mask[i] = fs_mask[i]; > + src_alpha[i] = alpha; > + } > + } > +} > + > + > +static void > +convert_alpha(struct gallivm_state *gallivm, > + struct lp_type row_type, > + struct lp_type alpha_type, > + const unsigned block_size, > + const unsigned block_height, > + const unsigned src_count, > + const unsigned dst_channels, > + const bool pad_inline, > + LLVMValueRef* src_alpha) > +{ > + LLVMBuilderRef builder = gallivm->builder; > + unsigned i, j; > + unsigned length = row_type.length; > + row_type.length = alpha_type.length; > + > + /* Twiddle the alpha to match pixels */ > + lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, 4, src_alpha); > + > + for (i = 0; i < 4; ++i) { > + lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, > &src_alpha[i], 1); > + } > + > + alpha_type = row_type; > + row_type.length = length; > + > + /* If only one channel we can only need the single alpha value per pixel > */ > + if (src_count == 1) { > + assert(dst_channels == 1); > + > + lp_build_concat_n(gallivm, alpha_type, src_alpha, 4, src_alpha, > src_count); > + } else { > + /* If there are more srcs than rows then we need to split alpha up */ > + if (src_count > block_height) { > + for (i = src_count; i > 0; --i) { > + unsigned pixels = block_size / src_count; > + unsigned idx = i - 1; > + > + src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx > * pixels) / 4], (idx * pixels) % 4, pixels); > + } > + } > + > + /* If there is a src for each pixel broadcast the alpha across whole > row */ > + if (src_count == block_size) { > + for (i = 0; i < src_count; ++i) { > + src_alpha[i] = lp_build_broadcast(gallivm, > lp_build_vec_type(gallivm, row_type), src_alpha[i]); > + } > + } else { > + unsigned pixels = block_size / src_count; > + unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels; > + unsigned alpha_span = 1; > + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; > + > + /* Check if we need 2 src_alphas for our shuffles */ > + if (pixels > alpha_type.length) { > + alpha_span = 2; > + } > + > + /* Broadcast alpha across all channels, e.g. a1a2 to > a1a1a1a1a2a2a2a2 */ > + for (j = 0; j < row_type.length; ++j) { > + if (j < pixels * channels) { > + shuffles[j] = lp_build_const_int32(gallivm, j / channels); > + } else { > + shuffles[j] = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > + } > + } > + > + for (i = 0; i < src_count; ++i) { > + unsigned idx1 = i, idx2 = i; > + > + if (alpha_span > 1){ > + idx1 *= alpha_span; > + idx2 = idx1 + 1; > + } > + > + src_alpha[i] = LLVMBuildShuffleVector(builder, > + src_alpha[idx1], > + src_alpha[idx2], > + LLVMConstVector(shuffles, > row_type.length), > + ""); > + } > + } > + } > +} > + > > /** > * Generates the blend function for unswizzled colour buffers > @@ -1296,7 +1452,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, > unsigned int num_fs, > struct lp_type fs_type, > LLVMValueRef* fs_mask, > - LLVMValueRef fs_out_color[TGSI_NUM_CHANNELS][4], > + LLVMValueRef > fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][4], > LLVMValueRef context_ptr, > LLVMValueRef color_ptr, > LLVMValueRef stride, > @@ -1311,9 +1467,12 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > > LLVMBuilderRef builder = gallivm->builder; > LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS]; > + LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS]; > LLVMValueRef src_alpha[4 * 4]; > + LLVMValueRef src1_alpha[4 * 4]; > LLVMValueRef src_mask[4 * 4]; > LLVMValueRef src[4 * 4]; > + LLVMValueRef src1[4 * 4]; > LLVMValueRef dst[4 * 4]; > LLVMValueRef blend_color; > LLVMValueRef blend_alpha; > @@ -1323,14 +1482,13 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > struct lp_build_mask_context mask_ctx; > struct lp_type mask_type; > struct lp_type blend_type; > - struct lp_type alpha_type; > struct lp_type row_type; > struct lp_type dst_type; > > unsigned char swizzle[TGSI_NUM_CHANNELS]; > unsigned vector_width; > + unsigned src_channels = TGSI_NUM_CHANNELS; > unsigned dst_channels; > - unsigned src_channels; > unsigned dst_count; > unsigned src_count; > unsigned i, j; > @@ -1341,8 +1499,9 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, > > bool pad_inline = is_arithmetic_format(out_format_desc); > bool has_alpha = false; > + const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable && > + > util_blend_state_is_dual(&variant->key.blend, 0); > > - src_channels = TGSI_NUM_CHANNELS; > mask_type = lp_int32_vec4_type(); > mask_type.length = fs_type.length; > > @@ -1422,17 +1581,20 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > */ > for (i = 0; i < num_fs; ++i) { > /* Always load alpha for use in blending */ > - LLVMValueRef alpha = LLVMBuildLoad(builder, > fs_out_color[alpha_channel][i], ""); > + LLVMValueRef alpha = LLVMBuildLoad(builder, > fs_out_color[rt][alpha_channel][i], ""); > > /* Load each channel */ > for (j = 0; j < dst_channels; ++j) { > - fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[swizzle[j]][i], > ""); > + fs_src[i][j] = LLVMBuildLoad(builder, > fs_out_color[rt][swizzle[j]][i], ""); > } > > /* If 3 channels then pad to include alpha for 4 element transpose */ > + /* > + * XXX If we include that here maybe could actually use it instead of > + * separate alpha for blending? > + */ > if (dst_channels == 3 && !has_alpha) { > fs_src[i][3] = alpha; > - swizzle[3] = 3; > } > > /* We split the row_mask and row_alpha as we want 128bit interleave */ > @@ -1447,6 +1609,25 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > src_alpha[i] = alpha; > } > } > + if (dual_source_blend) { > + /* same as above except different src/dst, skip masks and comments... > */ > + for (i = 0; i < num_fs; ++i) { > + LLVMValueRef alpha = LLVMBuildLoad(builder, > fs_out_color[1][alpha_channel][i], ""); > + > + for (j = 0; j < dst_channels; ++j) { > + fs_src1[i][j] = LLVMBuildLoad(builder, > fs_out_color[1][swizzle[j]][i], ""); > + } > + if (dst_channels == 3 && !has_alpha) { > + fs_src1[i][3] = alpha; > + } > + if (fs_type.length == 8) { > + src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, > src_channels); > + src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, > src_channels, src_channels); > + } else { > + src1_alpha[i] = alpha; > + } > + } > + } > > if (util_format_is_pure_integer(out_format)) { > /* > @@ -1467,11 +1648,14 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > } > } > > - > /* > * Pixel twiddle from fragment shader order to memory order > */ > src_count = generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, > fs_src, src, pad_inline); > + if (dual_source_blend) { > + generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, fs_src1, > src1, pad_inline); > + } > + > src_channels = dst_channels < 3 ? dst_channels : 4; > if (src_count != num_fs * src_channels) { > unsigned ds = src_count / (num_fs * src_channels); > @@ -1480,12 +1664,17 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > } > > blend_type = row_type; > - alpha_type = fs_type; > - alpha_type.length = 4; > mask_type.length = 4; > > /* Convert src to row_type */ > - src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, > src_count, src); > + if (dual_source_blend) { > + struct lp_type old_row_type = row_type; > + lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src); > + src_count = lp_build_conv_auto(gallivm, fs_type, &old_row_type, src1, > src_count, src1); > + } > + else { > + src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, > src_count, src); > + } > > /* If the rows are not an SSE vector, combine them to become SSE size! */ > if ((row_type.width * row_type.length) % 128) { > @@ -1494,6 +1683,9 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, > > dst_count = src_count / (vector_width / bits); > combined = lp_build_concat_n(gallivm, row_type, src, src_count, src, > dst_count); > + if (dual_source_blend) { > + lp_build_concat_n(gallivm, row_type, src1, src_count, src1, > dst_count); > + } > > row_type.length *= combined; > src_count /= combined; > @@ -1569,75 +1761,17 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > * Alpha conversion > */ > if (!has_alpha) { > - unsigned length = row_type.length; > - row_type.length = alpha_type.length; > - > - /* Twiddle the alpha to match pixels */ > - lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, 4, src_alpha); > - > - for (i = 0; i < 4; ++i) { > - lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, > &src_alpha[i], 1); > - } > - > - alpha_type = row_type; > - row_type.length = length; > - > - /* If only one channel we can only need the single alpha value per > pixel */ > - if (src_count == 1) { > - assert(dst_channels == 1); > - > - lp_build_concat_n(gallivm, alpha_type, src_alpha, 4, src_alpha, > src_count); > - } else { > - /* If there are more srcs than rows then we need to split alpha up > */ > - if (src_count > block_height) { > - for (i = src_count; i > 0; --i) { > - unsigned pixels = block_size / src_count; > - unsigned idx = i - 1; > - > - src_alpha[idx] = lp_build_extract_range(gallivm, > src_alpha[(idx * pixels) / 4], (idx * pixels) % 4, pixels); > - } > - } > - > - /* If there is a src for each pixel broadcast the alpha across > whole row */ > - if (src_count == block_size) { > - for (i = 0; i < src_count; ++i) { > - src_alpha[i] = lp_build_broadcast(gallivm, > lp_build_vec_type(gallivm, row_type), src_alpha[i]); > - } > - } else { > - unsigned pixels = block_size / src_count; > - unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : > dst_channels; > - unsigned alpha_span = 1; > - LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; > - > - /* Check if we need 2 src_alphas for our shuffles */ > - if (pixels > alpha_type.length) { > - alpha_span = 2; > - } > - > - /* Broadcast alpha across all channels, e.g. a1a2 to > a1a1a1a1a2a2a2a2 */ > - for (j = 0; j < row_type.length; ++j) { > - if (j < pixels * channels) { > - shuffles[j] = lp_build_const_int32(gallivm, j / channels); > - } else { > - shuffles[j] = > LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); > - } > - } > - > - for (i = 0; i < src_count; ++i) { > - unsigned idx1 = i, idx2 = i; > - > - if (alpha_span > 1){ > - idx1 *= alpha_span; > - idx2 = idx1 + 1; > - } > - > - src_alpha[i] = LLVMBuildShuffleVector(builder, > - src_alpha[idx1], > - src_alpha[idx2], > - > LLVMConstVector(shuffles, row_type.length), > - ""); > - } > - } > + struct lp_type alpha_type = fs_type; > + alpha_type.length = 4; > + convert_alpha(gallivm, row_type, alpha_type, > + block_size, block_height, > + src_count, dst_channels, > + pad_inline, src_alpha); > + if (dual_source_blend) { > + convert_alpha(gallivm, row_type, alpha_type, > + block_size, block_height, > + src_count, dst_channels, > + pad_inline, src1_alpha); > } > } > > @@ -1693,7 +1827,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, > rt, > src[i], > has_alpha ? NULL : src_alpha[i], > - NULL, > + src1[i], > + has_alpha ? NULL : src1_alpha[i], > dst[i], > partial_mask ? src_mask[i] : NULL, > blend_color, > @@ -1788,6 +1923,8 @@ generate_fragment(struct llvmpipe_context *lp, > unsigned cbuf; > boolean cbuf0_write_all; > boolean try_loop = TRUE; > + const boolean dual_source_blend = key->blend.rt[0].blend_enable && > + util_blend_state_is_dual(&key->blend, > 0); > > assert(lp_native_vector_width / 32 >= 4); > > @@ -1951,10 +2088,17 @@ generate_fragment(struct llvmpipe_context *lp, > mask_input, > thread_data_ptr); > > - for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) > + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { > for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) > fs_out_color[cbuf][chan][i] = > out_color[cbuf * !cbuf0_write_all][chan]; > + } > + if (dual_source_blend) { > + /* only support one dual source blend target hence always use > output 1 */ > + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) > + fs_out_color[1][chan][i] = > + out_color[1][chan]; > + } > } > } > else { > @@ -2024,6 +2168,15 @@ generate_fragment(struct llvmpipe_context *lp, > fs_out_color[cbuf][chan][i] = ptr; > } > } > + if (dual_source_blend) { > + /* only support one dual source blend target hence always use > output 1 */ > + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { > + ptr = LLVMBuildGEP(builder, > + color_store[1][chan], > + &indexi, 1, ""); > + fs_out_color[1][chan][i] = ptr; > + } > + } > } > } > > @@ -2052,7 +2205,7 @@ generate_fragment(struct llvmpipe_context *lp, > ""); > > generate_unswizzled_blend(gallivm, cbuf, variant, > key->cbuf_format[cbuf], > - num_fs, fs_type, fs_mask, fs_out_color[cbuf], > + num_fs, fs_type, fs_mask, fs_out_color, > context_ptr, color_ptr, stride, > partial_mask, do_branch); > } > > diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c > b/src/gallium/drivers/llvmpipe/lp_test_blend.c > index 6faaedf..c3d2714 100644 > --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c > +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c > @@ -177,7 +177,8 @@ add_blend_test(struct gallivm_state *gallivm, > dst = LLVMBuildLoad(builder, dst_ptr, "dst"); > con = LLVMBuildLoad(builder, const_ptr, "const"); > > - res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL, > src1, dst, NULL, con, NULL, swizzle, 4); > + res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL, > + src1, NULL, dst, NULL, con, NULL, swizzle, 4); > > lp_build_name(res, "res"); > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev