Mesa (master): llvmpipe: improve rasterization discard logic
Module: Mesa Branch: master Commit: 7b89fcec416ed7e6ddadec2438aab63609d825f8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7b89fcec416ed7e6ddadec2438aab63609d825f8 Author: Roland Scheidegger Date: Tue May 22 02:12:38 2018 +0200 llvmpipe: improve rasterization discard logic This unifies the explicit rasterization discard as well as the implicit rasterization disabled logic (which we need for another state tracker), which really should do the exact same thing. We'll now toss out the prims early on in setup with (implicit or explicit) discard, rather than do setup and binning with them, which was entirely pointless. (We should eventually get rid of implicit discard, which should also enable us to discard stuff already in draw, hence draw would be able to skip the pointless clip and fallback stages in this case.) We still need separate logic for only null ps - this is not the same as rasterization discard. But simplify the logic there and don't count primitives simply when there's an empty fs, regardless of depth/stencil tests, which seems perfectly acceptable by d3d10. While here, also fix statistics for primitives if face culling is enabled. No piglit changes. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_context.h | 1 - src/gallium/drivers/llvmpipe/lp_jit.c | 1 + src/gallium/drivers/llvmpipe/lp_jit.h | 5 +++ src/gallium/drivers/llvmpipe/lp_rast.c | 12 +++- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 src/gallium/drivers/llvmpipe/lp_scene.c | 5 ++- src/gallium/drivers/llvmpipe/lp_scene.h | 10 +++--- src/gallium/drivers/llvmpipe/lp_setup.c | 18 ++- src/gallium/drivers/llvmpipe/lp_setup_line.c| 28 +++-- src/gallium/drivers/llvmpipe/lp_setup_point.c | 22 - src/gallium/drivers/llvmpipe/lp_setup_tri.c | 29 - src/gallium/drivers/llvmpipe/lp_setup_vbuf.c| 2 +- src/gallium/drivers/llvmpipe/lp_state_derived.c | 22 ++--- src/gallium/drivers/llvmpipe/lp_state_fs.c | 41 - src/gallium/drivers/llvmpipe/lp_state_fs.h | 5 --- 15 files changed, 118 insertions(+), 89 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 54d98fdbf7..7a2f253984 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -136,7 +136,6 @@ struct llvmpipe_context { struct blitter_context *blitter; unsigned tex_timestamp; - boolean no_rast; /** List of all fragment shader variants */ struct lp_fs_variant_list_item fs_variants_list; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index a2762f39a0..e2309f4715 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -212,6 +212,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_THREAD_DATA_CACHE] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0); elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); + elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = LLVMInt32TypeInContext(lc); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 9db26f2cba..312d1a1281 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -192,6 +192,7 @@ struct lp_jit_thread_data { struct lp_build_format_cache *cache; uint64_t vis_counter; + uint64_t ps_invocations; /* * Non-interpolated rasterizer state passed through to the fragment shader. @@ -205,6 +206,7 @@ struct lp_jit_thread_data enum { LP_JIT_THREAD_DATA_CACHE = 0, LP_JIT_THREAD_DATA_COUNTER, + LP_JIT_THREAD_DATA_INVOCATIONS, LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, LP_JIT_THREAD_DATA_COUNT }; @@ -216,6 +218,9 @@ enum { #define lp_jit_thread_data_counter(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter") +#define lp_jit_thread_data_invocations(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_INVOCATIONS, "invocs") + #define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, \ LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 939944aa79..9d4f9f8d02 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -107,7 +107,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->scene->fb.heigh
Mesa (master): draw: get rid of special logic to not emit null tris
Module: Mesa Branch: master Commit: 6f558fb0f79d88eb1749740e8bddb7e8b313fdf4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f558fb0f79d88eb1749740e8bddb7e8b313fdf4 Author: Roland Scheidegger Date: Thu May 17 03:45:02 2018 +0200 draw: get rid of special logic to not emit null tris I've confirmed after 77554d220d6d74b4d913dc37ea3a874e9dc550e4 we no longer need this to pass some tests from another api (as we no longer generate the bogus extra null tris in the first place). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 38 - 1 file changed, 38 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 46118b6e67..2a9c944dc1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -210,30 +210,6 @@ static void interp(const struct clip_stage *clip, } /** - * Checks whether the specified triangle is empty and if it is returns - * true, otherwise returns false. - * Triangle is considered null/empty if its area is equal to zero. - */ -static inline boolean -is_tri_null(const struct clip_stage *clip, const struct prim_header *header) -{ - const unsigned pos_attr = clip->pos_attr; - float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0]; - float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1]; - float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2]; - - float x2 = header->v[2]->data[pos_attr][0] - header->v[0]->data[pos_attr][0]; - float y2 = header->v[2]->data[pos_attr][1] - header->v[0]->data[pos_attr][1]; - float z2 = header->v[2]->data[pos_attr][2] - header->v[0]->data[pos_attr][2]; - - float vx = y1 * z2 - z1 * y2; - float vy = x1 * z2 - z1 * x2; - float vz = x1 * y2 - y1 * x2; - - return (vx*vx + vy*vy + vz*vz) == 0.f; -} - -/** * Emit a post-clip polygon to the next pipeline stage. The polygon * will be convex and the provoking vertex will always be vertex[0]. */ @@ -247,7 +223,6 @@ static void emit_poly(struct draw_stage *stage, struct prim_header header; unsigned i; ushort edge_first, edge_middle, edge_last; - boolean tri_emitted = FALSE; if (stage->draw->rasterizer->flatshade_first) { edge_first = DRAW_PIPE_EDGE_FLAG_0; @@ -269,7 +244,6 @@ static void emit_poly(struct draw_stage *stage, header.pad = 0; for (i = 2; i < n; i++, header.flags = edge_middle) { - boolean tri_null; /* order the triangle verts to respect the provoking vertex mode */ if (stage->draw->rasterizer->flatshade_first) { header.v[0] = inlist[0]; /* the provoking vertex */ @@ -282,18 +256,6 @@ static void emit_poly(struct draw_stage *stage, header.v[2] = inlist[0]; /* the provoking vertex */ } - tri_null = is_tri_null(clipper, &header); - /* - * If we ever generated a tri (regardless if it had area or not), - * skip all subsequent null tris. - * FIXME: I think this logic was hiding bugs elsewhere. It should - * be possible now to always emit all tris. - */ - if (tri_null && tri_emitted) { - continue; - } - tri_emitted = TRUE; - if (!edgeflags[i-1]) { header.flags &= ~edge_middle; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: Use alloca_undef with array type instead of alloca_array
Module: Mesa Branch: master Commit: e01af38d6faf5dfd0f4ac6548ae03c27cca1dede URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e01af38d6faf5dfd0f4ac6548ae03c27cca1dede Author: Roland Scheidegger Date: Tue May 15 04:35:50 2018 +0200 gallivm: Use alloca_undef with array type instead of alloca_array Use a single allocation of array type instead of the old-style array allocation for the temp and immediate arrays. Probably only makes a difference if they aren't used indirectly (so, if we used them solely because there's too many temps or immediates). In this case the sroa and early-cse passes can sometimes do some optimizations which they otherwise cannot. (As a side note, for the temp reg array, we actually really should use one allocation per array id, not just one for everything.) Note that the instcombine pass would actually promote such allocations to single alloc of array type as well, but it's too late for some artificial shaders we've seen to help (we don't want to run instcombine at the beginning due to its cost, hence would need another sroa/cse pass after instcombine). sroa/early-cse help there because they can actually eliminate all of the huge shader, reducing it to a single const output (don't ask...). (Interestingly, instcombine also removes all the bitcasts we do on that allocation for single-value gathering, and in the end directly indexes into the single vector elements, which according to spec is only semi-valid, but this happens regardless. Another thing instcombine also does is use inbound GEPs, which is probably something we should do manually as well - for indirectly indexed reg files llvm may not be able to figure it out on its own, but we should be able to guarantee all pointers are always inbound. In any case, by the looks of it using single allocation with array type seems to be the right thing to do even for ordinary shaders.) No piglit change. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 61 + 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index e411f906c7..83d7dbea9a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -741,7 +741,8 @@ static void lp_exec_mask_store(struct lp_exec_mask *mask, assert(lp_check_value(bld_store->type, val)); assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); - assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)); + assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) || + LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind); if (exec_mask) { LLVMValueRef res, dst; @@ -852,7 +853,14 @@ get_file_ptr(struct lp_build_tgsi_soa_context *bld, if (bld->indirect_files & (1 << file)) { LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); - return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); + if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) { + LLVMValueRef gep[2]; + gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0); + gep[1] = lindex; + return LLVMBuildGEP(builder, var_of_array, gep, 2, ""); + } else { + return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); + } } else { assert(index <= bld->bld_base.info->file_max[file]); @@ -1352,21 +1360,20 @@ emit_fetch_immediate( /* Gather values from the immediate register array */ res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); } else { - LLVMValueRef lindex = lp_build_const_int32(gallivm, -reg->Register.Index * 4 + swizzle); - LLVMValueRef imms_ptr = LLVMBuildGEP(builder, -bld->imms_array, &lindex, 1, ""); + LLVMValueRef gep[2]; + gep[0] = lp_build_const_int32(gallivm, 0); + gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); + LLVMValueRef imms_ptr = LLVMBuildGEP(builder, + bld->imms_array, gep, 2, ""); res = LLVMBuildLoad(builder, imms_ptr, ""); if (tgsi_type_is_64bit(stype)) { -LLVMValueRef lindex1; LLVMValueRef imms_ptr2; LLVMValueRef res2; - -lindex1 = lp_build_const_int32(gallivm, - reg->Register.Index * 4 + swizzle + 1); +gep[1] = lp_build_const_int32(gallivm, + reg->
Mesa (master): llvmpipe: Fix random number generation for unit tests
Module: Mesa Branch: master Commit: cf3fb42fb5eb6130693a4be0a7b5ea06b184ce2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf3fb42fb5eb6130693a4be0a7b5ea06b184ce2d Author: Roland Scheidegger Date: Mon May 7 21:04:27 2018 +0200 llvmpipe: Fix random number generation for unit tests We were never producing negative numbers for signed types. Also fix only producing half the valid range for uint32, and properly clamp signed values. Because this now also properly tests snorm with actually negative values, need to increase eps for such conversions. I believe these cannot actually be hit in ordinary operation (e.g. if a snorm texture is sampled and output to snorm RT, it will still go through snorm->float and float->snorm conversion), so don't bother to do anything to fix the bad accuracy (might be quite complex). Basically, the issue is for something like snorm16->snorm8 that in the end this will just use a 8 bit arithmetic right shift. But the math behind it says we should actually do a division by 32767 / 127, which is ~258, not 256. So the result can be one bit off (values have too large magnitude), and furthermore, the shift has incorrect rounding (always rounds down). For positive numbers, these errors have different direction, but for negative ones they have the same, hence for some values the error will be 2 bit in the end. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=106232 --- src/gallium/drivers/llvmpipe/lp_test_conv.c | 8 src/gallium/drivers/llvmpipe/lp_test_main.c | 13 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 6e58a03151..a4f313a0bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -211,6 +211,14 @@ test_one(unsigned verbose, assert(src_type.length * num_srcs == dst_type.length * num_dsts); eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); + if (dst_type.norm && dst_type.sign && src_type.sign && !src_type.floating) { + /* + * This is quite inaccurate due to shift being used. + * I don't think it's possible to hit such conversions with + * llvmpipe though. + */ + eps *= 2; + } context = LLVMContextCreate(); gallivm = gallivm_create("test_module", context); diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 518ca274e7..5ec0dd347b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -147,6 +147,7 @@ write_elem(struct lp_type type, void *dst, unsigned index, double value) if(type.sign) { long long lvalue = (long long)value; lvalue = MIN2(lvalue, ((long long)1 << (type.width - 1)) - 1); + lvalue = MAX2(lvalue, -((long long)1 << (type.width - 1))); switch(type.width) { case 8: *((int8_t *)dst + index) = (int8_t)lvalue; @@ -200,16 +201,24 @@ random_elem(struct lp_type type, void *dst, unsigned index) } else { unsigned long long mask; -if (type.fixed) + if (type.fixed) mask = ((unsigned long long)1 << (type.width / 2)) - 1; else if (type.sign) mask = ((unsigned long long)1 << (type.width - 1)) - 1; else mask = ((unsigned long long)1 << type.width) - 1; value += (double)(mask & rand()); + if (!type.fixed && !type.sign && type.width == 32) { +/* + * rand only returns half the possible range + * XXX 64bit values... + */ +if(rand() & 1) + value += (double)0x8000; + } } } - if(!type.sign) + if(type.sign) if(rand() & 1) value = -value; write_elem(type, dst, index, value); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: simplify clip null tri logic
Module: Mesa Branch: master Commit: 98578df27bbf682f254d59a3a7d63b5f1b98f838 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=98578df27bbf682f254d59a3a7d63b5f1b98f838 Author: Roland Scheidegger Date: Tue Apr 24 18:12:34 2018 +0200 draw: simplify clip null tri logic Simplifies the logic when to emit null tris (albeit the reasons why we have to do this remain unclear). This is strictly just logic simplification, the behavior doesn't change at all. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 20 +--- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 4cfa54b2e1..ff80363a51 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -252,8 +252,7 @@ static void emit_poly(struct draw_stage *stage, struct prim_header header; unsigned i; ushort edge_first, edge_middle, edge_last; - boolean last_tri_was_null = FALSE; - boolean tri_was_not_null = FALSE; + boolean tri_emitted = FALSE; if (stage->draw->rasterizer->flatshade_first) { edge_first = DRAW_PIPE_EDGE_FLAG_0; @@ -289,17 +288,16 @@ static void emit_poly(struct draw_stage *stage, } tri_null = is_tri_null(clipper, &header); - /* If we generated a triangle with an area, aka. non-null triangle, - * or if the previous triangle was also null then skip all subsequent - * null triangles */ - if ((tri_was_not_null && tri_null) || (last_tri_was_null && tri_null)) { - last_tri_was_null = tri_null; + /* + * If we ever generated a tri (regardless if it had area or not), + * skip all subsequent null tris. + * FIXME: it is unclear why we always have to emit at least one + * tri. Maybe this is hiding bugs elsewhere. + */ + if (tri_null && tri_emitted) { continue; } - last_tri_was_null = tri_null; - if (!tri_null) { - tri_was_not_null = TRUE; - } + tri_emitted = TRUE; if (!edgeflags[i-1]) { header.flags &= ~edge_middle; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: fix different sign logic when clipping
Module: Mesa Branch: master Commit: 77554d220d6d74b4d913dc37ea3a874e9dc550e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=77554d220d6d74b4d913dc37ea3a874e9dc550e4 Author: Roland Scheidegger Date: Tue Apr 24 18:25:55 2018 +0200 draw: fix different sign logic when clipping The logic was flawed, since mul(x,y) will be <= 0 (exactly 0) when the sign is the same but both numbers are sufficiently small (if the product is smaller than 2^-128). This could apparently lead to emitting a sufficient amount of additional bogus vertices to overflow the allocated array for them, hitting an assertion (still safe with release builds since we just aborted clipping after the assertion in this case - I'm however unsure if this is now really no longer possible, so that code stays). Not sure if the additional vertices could cause other grief, I didn't see anything wrong even when hitting the assertion. Essentially, both +-0 are treated as positive (the vertex is considered to be inside the clip volume for this plane), so integrate the logic determining different sign into the branch there. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index ff80363a51..46118b6e67 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -47,11 +47,6 @@ /** Set to 1 to enable printing of coords before/after clipping */ #define DEBUG_CLIP 0 - -#ifndef DIFFERENT_SIGNS -#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F) -#endif - #define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1) @@ -291,8 +286,8 @@ static void emit_poly(struct draw_stage *stage, /* * If we ever generated a tri (regardless if it had area or not), * skip all subsequent null tris. - * FIXME: it is unclear why we always have to emit at least one - * tri. Maybe this is hiding bugs elsewhere. + * FIXME: I think this logic was hiding bugs elsewhere. It should + * be possible now to always emit all tris. */ if (tri_null && tri_emitted) { continue; @@ -478,6 +473,7 @@ do_clip_tri(struct draw_stage *stage, for (i = 1; i <= n; i++) { struct vertex_header *vert = inlist[i]; boolean *edge = &inEdges[i]; + boolean different_sign; float dp = getclipdist(clipper, vert, plane_idx); @@ -490,9 +486,12 @@ do_clip_tri(struct draw_stage *stage, return; outEdges[outcount] = *edge_prev; outlist[outcount++] = vert_prev; +different_sign = dp < 0.0f; + } else { +different_sign = !(dp < 0.0f); } - if (DIFFERENT_SIGNS(dp, dp_prev)) { + if (different_sign) { struct vertex_header *new_vert; boolean *new_edge; @@ -510,7 +509,7 @@ do_clip_tri(struct draw_stage *stage, if (dp < 0.0f) { /* Going out of bounds. Avoid division by zero as we -* know dp != dp_prev from DIFFERENT_SIGNS, above. +* know dp != dp_prev from different_sign, above. */ float t = dp / (dp - dp_prev); interp( clipper, new_vert, t, vert, vert_prev, viewport_index ); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: remove LICM pass
Module: Mesa Branch: master Commit: 45b8f620a545bcdb8a4942bafd505c9418f6d9f2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=45b8f620a545bcdb8a4942bafd505c9418f6d9f2 Author: Roland Scheidegger Date: Mon Apr 23 04:39:00 2018 +0200 gallivm: remove LICM pass LICM is simply too expensive, even though it presumably can help quite a bit in some cases. It was definitely cheaper in llvm 3.3, though as far as I can tell with llvm 3.3 it failed to do anything in most cases. early-cse also actually seems to cause licm to be able to move things when it previously couldn't, which causes noticeable compile time increases. There's more loop passes in llvm, but I'm not sure which ones are helpful, and I couldn't find anything which would roughly do what the old licm in llvm 3.3 did, so ditch it. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index ca36b467ca..a9968649c0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -144,7 +144,15 @@ create_pass_manager(struct gallivm_state *gallivm) LLVMAddScalarReplAggregatesPass(gallivm->passmgr); LLVMAddEarlyCSEPass(gallivm->passmgr); LLVMAddCFGSimplificationPass(gallivm->passmgr); - LLVMAddLICMPass(gallivm->passmgr); + /* + * FIXME: LICM is potentially quite useful. However, for some + * rather crazy shaders the compile time can reach _hours_ per shader, + * due to licm implying lcssa (since llvm 3.5), which can take forever. + * Even for sane shaders, the cost of licm is rather high (and not just + * due to lcssa, licm itself too), though mostly only in cases when it + * can actually move things, so having to disable it is a pity. + * LLVMAddLICMPass(gallivm->passmgr); + */ LLVMAddReassociatePass(gallivm->passmgr); LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); LLVMAddConstantPropagationPass(gallivm->passmgr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: (trivial) do division by 1000 with int64
Module: Mesa Branch: master Commit: e89cf59c27e4a66379f21ee0b574deaa078d0975 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e89cf59c27e4a66379f21ee0b574deaa078d0975 Author: Roland Scheidegger Date: Mon Apr 23 04:52:48 2018 +0200 gallivm: (trivial) do division by 1000 with int64 Conversion to int can otherwise overflow if compile times are over ~71min. (Yes this can happen...) Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index a9968649c0..800b2616c0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -604,7 +604,7 @@ gallivm_compile_module(struct gallivm_state *gallivm) if (gallivm_debug & GALLIVM_DEBUG_PERF) { int64_t time_end = os_time_get(); - int time_msec = (int)(time_end - time_begin) / 1000; + int time_msec = (int)((time_end - time_begin) / 1000); assert(gallivm->module_name); debug_printf("optimizing module %s took %d msec\n", gallivm->module_name, time_msec); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: add early cse pass
Module: Mesa Branch: master Commit: 8b9ab674b982289b1c18c93598139ec1e4bf829a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8b9ab674b982289b1c18c93598139ec1e4bf829a Author: Roland Scheidegger Date: Mon Apr 23 04:32:56 2018 +0200 gallivm: add early cse pass This pass is quite cheap, and can simplify the IR quite a bit for our generated IR. In particular on a variety of shaders I've found the time saved by other passes due to the simplified IR more than makes up for the cost of this pass, and on top of that the end result is actually better. The only downside I've found is this enables the LICM pass to move some things out of the main shader loop (in the case I've seen, instanced vertex fetch (which is constant within the jit shader) plus the derived instructions in the shader) which it couldn't do before for some reason. This would actually be desirable but can increase compile time considerably (licm seems to have considerable cost when it actually can move things out of loops, due to alias analysis). But blaming early cse for this seems inappropriate. (Note that the first two sroa / earlycse passes are similar to what a standard llvm opt -O1/-O2 pipeline would do, albeit this has some more passes even before but I don't think they'd do much for us.) It also in particular helps some crazy shader used for driver verification (don't ask...) a lot (about factor of 6 faster in compile time) (due to simplfiying the ir before LICM is run). While here, also move licm behind simplifycfg. For some shaders there seems to be very significant compile time gains (we've seen a factor of 1 albeit that was a really crazy shader you'd certainly never see in a real app), beause LICM is quite expensive and there's cases where running simplifycfg (along with sroa and early-cse) before licm reduces IR complexity significantly. (I'm not entirely sure if it would make sense to also run it afterwards.) Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index dae9d01552..ca36b467ca 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -137,13 +137,14 @@ create_pass_manager(struct gallivm_state *gallivm) } if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. - * TODO: Add more passes. + /* + * TODO: Evaluate passes some more - keeping in mind + * both quality of generated code and compile times. */ LLVMAddScalarReplAggregatesPass(gallivm->passmgr); - LLVMAddLICMPass(gallivm->passmgr); + LLVMAddEarlyCSEPass(gallivm->passmgr); LLVMAddCFGSimplificationPass(gallivm->passmgr); + LLVMAddLICMPass(gallivm->passmgr); LLVMAddReassociatePass(gallivm->passmgr); LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); LLVMAddConstantPropagationPass(gallivm->passmgr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: dump bitcode before optimization
Module: Mesa Branch: master Commit: e8e1d287a3c3030f1a94dcf67381dfd7ae3eba14 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e8e1d287a3c3030f1a94dcf67381dfd7ae3eba14 Author: Roland Scheidegger Date: Mon Apr 23 06:22:45 2018 +0200 gallivm: dump bitcode before optimization If we dump the bitcode for off-line debug purposes, we really want the pre-optimized bitcode, otherwise it's useless in identifying problems with IR optimization (if you have a shader which takes an hour to do IR optimization, it's also nice you don't have to wait that hour...). Also, print out the function passes for opt which correspond to what was used for jit compilation (and also the opt level for codegen). Using opt/llc this way should then pretty much mimic what was done for jit. (When specifying something like -time-passes -debug-pass=[Structure|Arguments] (for either opt or llc) that also gives very useful information in which passes all the time was spent, and which passes are really run along with the order - llvm will add passes due to dependencies on its own, and of course -O2 for llc comes with a ~100 pass list.) Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 33 + 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 800b2616c0..1f0a01cde6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -141,6 +141,10 @@ create_pass_manager(struct gallivm_state *gallivm) * TODO: Evaluate passes some more - keeping in mind * both quality of generated code and compile times. */ + /* + * NOTE: if you change this, don't forget to change the output + * with GALLIVM_DEBUG_DUMP_BC in gallivm_compile_module. + */ LLVMAddScalarReplAggregatesPass(gallivm->passmgr); LLVMAddEarlyCSEPass(gallivm->passmgr); LLVMAddCFGSimplificationPass(gallivm->passmgr); @@ -577,6 +581,22 @@ gallivm_compile_module(struct gallivm_state *gallivm) gallivm->builder = NULL; } + /* Dump bitcode to a file */ + if (gallivm_debug & GALLIVM_DEBUG_DUMP_BC) { + char filename[256]; + assert(gallivm->module_name); + util_snprintf(filename, sizeof(filename), "ir_%s.bc", gallivm->module_name); + LLVMWriteBitcodeToFile(gallivm->module, filename); + debug_printf("%s written\n", filename); + debug_printf("Invoke as \"opt %s %s | llc -O%d %s%s\"\n", + gallivm_debug & GALLIVM_DEBUG_NO_OPT ? "-mem2reg" : + "-sroa -early-cse -simplifycfg -reassociate " + "-mem2reg -constprop -instcombine -gvn", + filename, gallivm_debug & GALLIVM_DEBUG_NO_OPT ? 0 : 2, + (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "", + "[-mattr=<-mattr option(s)>]"); + } + if (gallivm_debug & GALLIVM_DEBUG_PERF) time_begin = os_time_get(); @@ -610,19 +630,6 @@ gallivm_compile_module(struct gallivm_state *gallivm) gallivm->module_name, time_msec); } - /* Dump byte code to a file */ - if (gallivm_debug & GALLIVM_DEBUG_DUMP_BC) { - char filename[256]; - assert(gallivm->module_name); - util_snprintf(filename, sizeof(filename), "ir_%s.bc", gallivm->module_name); - LLVMWriteBitcodeToFile(gallivm->module, filename); - debug_printf("%s written\n", filename); - debug_printf("Invoke as \"llc %s%s -o - %s\"\n", - (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "", - "[-mattr=<-mattr option(s)>]", - filename); - } - if (use_mcjit) { /* Setting the module's DataLayout to an empty string will cause the * ExecutionEngine to copy to the DataLayout string from its target ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): scons: need to split CC or things might fail
Module: Mesa Branch: master Commit: e10dc12f6f2f7513d96bbea87b93b8e338222188 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e10dc12f6f2f7513d96bbea87b93b8e338222188 Author: Jose Fonseca Date: Mon Mar 19 16:41:57 2018 +0100 scons: need to split CC or things might fail We've seen this fail internally. Reviewed-by: Roland Scheidegger --- scons/gallium.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scons/gallium.py b/scons/gallium.py index ef3b2ee81a..75200b89c1 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -134,7 +134,9 @@ def check_cc(env, cc, expr, cpp_opt = '-E'): source.write('#if !(%s)\n#error\n#endif\n' % expr) source.close() -pipe = SCons.Action._subproc(env, [env['CC'], cpp_opt, source.name], +# sys.stderr.write('%r %s %s\n' % (env['CC'], cpp_opt, source.name)); + +pipe = SCons.Action._subproc(env, env.Split(env['CC']) + [cpp_opt, source.name], stdin = 'devnull', stderr = 'devnull', stdout = 'devnull') ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: fix abs for op3 sources
Module: Mesa Branch: master Commit: 274f8bf05ef526d65f01614313dda65bc7ec7a87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=274f8bf05ef526d65f01614313dda65bc7ec7a87 Author: Roland Scheidegger Date: Tue Mar 13 03:40:42 2018 +0100 r600: fix abs for op3 sources If a src was referencing the same temp as the dst, the per-component copy code didn't work. e.g. cndge r0.xy, r0.xx, |r2|, r3 got expanded into mov r12.x, |r2| cndge r0.x, r0.x, r12, r3 mov r12.y, |r2| cndge r0.y, r0.x, r12, r3 hence for the second cndge r0.x was mistakenly the previous cndge result. Fix this by doing all the movs first, so there's no bogus alu.last in between. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=102905 Tested-by: Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 110 + 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6b5c42f86d..bd511c76ac 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -7076,33 +7076,42 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru } static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, - unsigned temp, int chan, + unsigned writemask, struct r600_bytecode_alu_src *bc_src, const struct r600_shader_src *shader_src) { struct r600_bytecode_alu alu; - int r; + int i, r; + int lasti = tgsi_last_instruction(writemask); + int temp_reg = 0; - r600_bytecode_src(bc_src, shader_src, chan); + r600_bytecode_src(&bc_src[0], shader_src, 0); + r600_bytecode_src(&bc_src[1], shader_src, 1); + r600_bytecode_src(&bc_src[2], shader_src, 2); + r600_bytecode_src(&bc_src[3], shader_src, 3); - /* op3 operands don't support abs modifier */ if (bc_src->abs) { - assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_MOV; - alu.dst.sel = temp; - alu.dst.chan = chan; - alu.dst.write = 1; + temp_reg = r600_get_temp(ctx); - alu.src[0] = *bc_src; - alu.last = true; // sufficient? - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; - - memset(bc_src, 0, sizeof(*bc_src)); - bc_src->sel = temp; - bc_src->chan = chan; + for (i = 0; i < lasti + 1; i++) { + if (!(writemask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.sel = temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + alu.src[0] = bc_src[i]; + if (i == lasti) { + alu.last = 1; + } + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&bc_src[i], 0, sizeof(*bc_src)); + bc_src[i].sel = temp_reg; + bc_src[i].chan = i; + } } return 0; } @@ -7111,9 +7120,9 @@ static int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; + struct r600_bytecode_alu_src srcs[4][4]; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - int temp_regs[4]; unsigned op = ctx->inst_info->op; if (op == ALU_OP3_MULADD_IEEE && @@ -7121,10 +7130,12 @@ static int tgsi_op3_dst(struct r600_shader_ctx *ctx, int dst) op = ALU_OP3_MULADD; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - temp_regs[j] = 0; - if (ctx->src[j].abs) - temp_regs[j] = r600_get_temp(ctx); + r = tgsi_make_src_for_op3(ctx, inst->Dst[0].Register.WriteMask, + srcs[j], &ctx->src[j]); + if (r) + return r; } + for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -7132,9 +7143,7 @@ static int tgsi_op3
Mesa (master): u_blit: (trivial) u_blit.h needs to include p_defines.h
Module: Mesa Branch: master Commit: 0f0a6fa21dc240fca99a3a110a6c0dfac6d2a376 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0f0a6fa21dc240fca99a3a110a6c0dfac6d2a376 Author: Roland Scheidegger Date: Sat Mar 10 02:48:42 2018 +0100 u_blit: (trivial) u_blit.h needs to include p_defines.h (For the pipe_tex_filter enum) Reviewed-by: Mathias Fröhlich Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/util/u_blit.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 085ea63570..004ceaecd8 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #ifdef __cplusplus ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: fix alpha value for very short aa lines
Module: Mesa Branch: master Commit: d62f0df3541ab9ee7a4999f0ecedc52f8d1ab8cc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d62f0df3541ab9ee7a4999f0ecedc52f8d1ab8cc Author: Roland Scheidegger Date: Fri Mar 9 05:27:25 2018 +0100 draw: fix alpha value for very short aa lines The logic would not work correctly for line lengths smaller than 1.0, even a degenerated line with length 0 would still produce a fragment with anyhwere between alpha 0.0 and 0.5. Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 25 - src/gallium/auxiliary/draw/draw_pipe_stipple.c | 1 - 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 14a4b2f4b0..66a943aff4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -370,7 +370,30 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) float t_l, t_w; uint i; - half_length = 0.5f * sqrtf(dx * dx + dy * dy) + 0.5f; + half_length = 0.5f * sqrtf(dx * dx + dy * dy); + + if (half_length < 0.5f) { + /* + * The logic we use for "normal" sized segments is incorrect + * for very short segments (basically because we only have + * one value to interpolate, not a distance to each endpoint). + * Therefore, we calculate half_length differently, so that for + * original line length (near) 0, we get alpha 0 - otherwise + * max alpha would still be 0.5. This also prevents us from + * artifacts due to degenerated lines (the endpoints being + * identical, which would still receive anywhere from alpha + * 0-0.5 otherwise) (at least the pstipple stage may generate + * such lines due to float inaccuracies if line length is very + * close to a integer). + * Might not be fully accurate neither (because the "strength" of + * the line is going to be determined by how close to the pixel + * center those 1 or 2 fragments are) but it's probably the best + * we can do. + */ + half_length = 2.0f * half_length; + } else { + half_length = half_length + 0.5f; + } t_w = half_width; t_l = 0.5f; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 3a44e96add..d30572cc61 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -150,7 +150,6 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) if (header->flags & DRAW_PIPE_RESET_STIPPLE) stipple->counter = 0; - /* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table. */ for (i = 0; i < length; i++) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: fix line stippling with aa lines
Module: Mesa Branch: master Commit: 8ba3750d3d953a9e6a2a0564e2d3d5efc42316e1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ba3750d3d953a9e6a2a0564e2d3d5efc42316e1 Author: Roland Scheidegger Date: Tue Mar 6 21:33:16 2018 +0100 draw: fix line stippling with aa lines In contrast to non-aa, where stippling is based on either dx or dy (depending on if it's a x or y major line), stippling is based on actual distance with smooth lines, so adjust for this. (It looks like there's some minor artifacts with mesa demos line-sample and stippling, it looks like the line endpoints aren't quite right with aa + stippling - maybe due to the integer math in the stipple stage, but I can't quite pinpoint it.) Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 3a84d6c3ea..3a44e96add 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -50,6 +50,7 @@ struct stipple_stage { float counter; uint pattern; uint factor; + bool smooth; }; @@ -133,12 +134,19 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) float y0 = pos0[1]; float y1 = pos1[1]; - float dx = x0 > x1 ? x0 - x1 : x1 - x0; - float dy = y0 > y1 ? y0 - y1 : y1 - y0; - - float length = MAX2(dx, dy); + float length; int i; + if (stipple->smooth) { + float dx = x1 - x0; + float dy = y1 - y0; + length = sqrtf(dx*dx + dy*dy); + } else { + float dx = x0 > x1 ? x0 - x1 : x1 - x0; + float dy = y0 > y1 ? y0 - y1 : y1 - y0; + length = MAX2(dx, dy); + } + if (header->flags & DRAW_PIPE_RESET_STIPPLE) stipple->counter = 0; @@ -205,6 +213,7 @@ stipple_first_line(struct draw_stage *stage, stipple->pattern = draw->rasterizer->line_stipple_pattern; stipple->factor = draw->rasterizer->line_stipple_factor + 1; + stipple->smooth = draw->rasterizer->line_smooth; stage->line = stipple_line; stage->line(stage, header); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: simplify (and correct) aaline fallback (v2)
Module: Mesa Branch: master Commit: dbb2cf388b79538ed572ecb3b8b3b5cb1f8fab0e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbb2cf388b79538ed572ecb3b8b3b5cb1f8fab0e Author: Roland Scheidegger Date: Tue Mar 6 19:16:45 2018 +0100 draw: simplify (and correct) aaline fallback (v2) The motivation actually was to get rid of the additional tex instruction, since that requires the draw fallback code to intercept all sampler / view calls (even if the fallback is never hit). Basically, the idea is to use coverage of the pixel to calculate the alpha value, and coverage is simply based on the distance to the center of the line (in both line direction, which is useful for wide lines, as well as perpendicular to the line). This is much closer to what hw supporting this natively actually does. It also fixes an issue with line width not quite being correct, as well as endpoints getting stretched too far (in line direction) with wide lines, which is apparent with mesa demo line-sample. (For llvmpipe, it would probably make sense to do something like this directly when drawing lines, since rendering two tris is twice as expensive as a line, but it would need some changes with state management.) Since we're no longer relying on mipmapping to get the alpha value, we also don't need to draw 3 rects (6 tris), one is sufficient. There's still issues (as before): - quite sure it's not correct without half_pixel_center, but can't test this with GL. - aaline + line stipple is incorrect (evident with line-sample demo). Looking at the spec the stipple pattern should actually be based on distance (not just dx or dy for x/y major lines as without aa). - outputs (other than pos + the one used for line aa) should be reinterpolated since we actually increase line length by half a pixel (but there's no tests which would care). v2: simplify the math (should be equivalent), don't need immediate v3: use float versions of atan2,cos,sin, minor cleanups Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 514 ++ 1 file changed, 105 insertions(+), 409 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index a859dbc02b..14a4b2f4b0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -1,6 +1,6 @@ /** * - * Copyright 2007 VMware, Inc. + * Copyright 2007-2018 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,7 +26,7 @@ **/ /** - * AA line stage: AA lines are converted to texture mapped triangles. + * AA line stage: AA lines are converted triangles (with extra generic) * * Authors: Brian Paul */ @@ -40,7 +40,6 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" @@ -55,19 +54,6 @@ /** - * Size for the alpha texture used for antialiasing - */ -#define TEXTURE_SIZE_LOG2 5 /* 32 x 32 */ - -/** - * Max texture level for the alpha texture used for antialiasing - * - * Don't use the 1x1 and 2x2 mipmap levels. - */ -#define MAX_TEXTURE_LEVEL (TEXTURE_SIZE_LOG2 - 2) - - -/** * Subclass of pipe_shader_state to carry extra fragment shader info. */ struct aaline_fragment_shader @@ -75,8 +61,7 @@ struct aaline_fragment_shader struct pipe_shader_state state; void *driver_fs; void *aaline_fs; - uint sampler_unit; - int generic_attrib; /**< texcoord/generic used for texture */ + int generic_attrib; /**< generic used for distance */ }; @@ -89,26 +74,16 @@ struct aaline_stage float half_line_width; - /** For AA lines, this is the vertex attrib slot for the new texcoords */ - uint tex_slot; + /** For AA lines, this is the vertex attrib slot for new generic */ + uint coord_slot; /** position, not necessarily output zero */ uint pos_slot; - void *sampler_cso; - struct pipe_resource *texture; - struct pipe_sampler_view *sampler_view; - uint num_samplers; - uint num_sampler_views; - /* * Currently bound state */ struct aaline_fragment_shader *fs; - struct { - void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS]; - } state; /* * Driver interface/override functions @@ -117,15 +92,6 @@ struct aaline_stage const struct pipe_shader_state *); void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); -
Mesa (master): tgsi/scan: use wrap-around shift behavior explicitly for file_mask
Module: Mesa Branch: master Commit: 06e724c7b4ade29868531edb20900859f566a077 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=06e724c7b4ade29868531edb20900859f566a077 Author: Roland Scheidegger Date: Fri Mar 2 03:00:41 2018 +0100 tgsi/scan: use wrap-around shift behavior explicitly for file_mask The comment said it will only represent the lowest 32 regs. This was not entirely true in practice, since at least on x86 you'll get masked shifts (unless the compiler could recognize it already and toss it out). It turns out this actually works out alright (presumably noone uses it for temp regs) when increasing max sampler views, so make that behavior explicit. Albeit it feels a bit hacky (but in any case, explicit behavior there is better than undefined behavior). Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 7 +-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 7 ++- src/gallium/drivers/swr/swr_shader.cpp | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index c35eff25ba..4a2b354063 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -585,8 +585,11 @@ scan_declaration(struct tgsi_shader_info *info, int buffer; unsigned index, target, type; - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << reg); + /* + * only first 32 regs will appear in this bitfield, if larger + * bits will wrap around. + */ + info->file_mask[file] |= (1u << (reg & 31)); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 603fd84f6b..66645b07ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -3323,7 +3323,12 @@ make_variant_key(struct llvmpipe_context *lp, if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; for(i = 0; i < key->nr_sampler_views; ++i) { - if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + /* + * Note sview may exceed what's representable by file_mask. + * This will still work, the only downside is that not actually + * used views may be included in the shader key. + */ + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { lp_sampler_static_texture_state(&key->state[i].texture_state, lp->sampler_views[PIPE_SHADER_FRAGMENT][i]); } diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index e5fb679f8b..477fa7f2db 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -98,7 +98,7 @@ swr_generate_sampler_key(const struct lp_tgsi_info &info, key.nr_sampler_views = info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; for (unsigned i = 0; i < key.nr_sampler_views; i++) { - if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { const struct pipe_sampler_view *view = ctx->sampler_views[shader_type][i]; lp_sampler_static_texture_state( ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: increase PIPE_MAX_SHADER_SAMPLER_VIEWS to 128
Module: Mesa Branch: master Commit: cf4a92fda29ca2ab76179287bdd76f4a6183dd0e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf4a92fda29ca2ab76179287bdd76f4a6183dd0e Author: Roland Scheidegger Date: Tue Feb 27 03:38:17 2018 +0100 gallium: increase PIPE_MAX_SHADER_SAMPLER_VIEWS to 128 Some state trackers require 128. (There are no plans to increase PIPE_MAX_SAMPLERS too, since with gl state tracker it's unlikely more than 32 will be needed, if you need more use bindless.) --- src/gallium/include/pipe/p_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 640e6ed26d..4dce399f84 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -64,7 +64,7 @@ extern "C" { #define PIPE_MAX_SAMPLERS 32 #define PIPE_MAX_SHADER_INPUTS80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ -#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32 +#define PIPE_MAX_SHADER_SAMPLER_VIEWS 128 #define PIPE_MAX_SHADER_BUFFERS 32 #define PIPE_MAX_SHADER_IMAGES32 #define PIPE_MAX_TEXTURE_LEVELS 16 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: fix color export mask
Module: Mesa Branch: master Commit: 434523cf2a6738b0250de2a0e36e93b13f88832a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=434523cf2a6738b0250de2a0e36e93b13f88832a Author: Roland Scheidegger Date: Mon Mar 5 20:12:32 2018 +0100 r600: fix color export mask The r600 code (not the eg one) forgot to copy the ps_color_export_mask in commit 5b14e06d8b42e2b08ebc52b6c314ef8647d87a1f when updating the pixel state, leading to misrenderings (probably with MRT). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105262 Tested-by: LoneVVolf Tested-by: Pavel Vinogradov --- src/gallium/drivers/r600/r600_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7f6da1a3ed..923817119f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2525,6 +2525,7 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha } shader->nr_ps_color_outputs = num_cout; + shader->ps_color_export_mask = rshader->ps_color_export_mask; spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1)| ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: don't needlessly iterate through all sampler view slots
Module: Mesa Branch: master Commit: 89ae5def8cea9311727ac80d7274f80650279373 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89ae5def8cea9311727ac80d7274f80650279373 Author: Roland Scheidegger Date: Sun Feb 25 04:26:37 2018 +0100 draw: don't needlessly iterate through all sampler view slots We already stored the highest (potentially) used number. Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 9791ec5506..e887272e15 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -973,7 +973,7 @@ draw_set_sampler_views(struct draw_context *draw, for (i = 0; i < num; ++i) draw->sampler_views[shader_stage][i] = views[i]; - for (i = num; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; ++i) + for (i = num; i < draw->num_sampler_views[shader_stage]; ++i) draw->sampler_views[shader_stage][i] = NULL; draw->num_sampler_views[shader_stage] = num; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): cso: don't cycle through PIPE_MAX_SHADER_SAMPLER_VIEWS on context destroy
Module: Mesa Branch: master Commit: b923f21eaadb77ee70e1bf4c5e2f9aee2a5fa205 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b923f21eaadb77ee70e1bf4c5e2f9aee2a5fa205 Author: Roland Scheidegger Date: Wed Feb 28 03:01:23 2018 +0100 cso: don't cycle through PIPE_MAX_SHADER_SAMPLER_VIEWS on context destroy There's no point, we know the highest non-null one. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/cso_cache/cso_context.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 1b5d4b5598..3fa57f16ff 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -407,8 +407,10 @@ void cso_destroy_context( struct cso_context *ctx ) ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL); } - for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { + for (i = 0; i < ctx->nr_fragment_views; i++) { pipe_sampler_view_reference(&ctx->fragment_views[i], NULL); + } + for (i = 0; i < ctx->nr_fragment_views_saved; i++) { pipe_sampler_view_reference(&ctx->fragment_views_saved[i], NULL); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): softpipe: don't iterate through PIPE_MAX_SHADER_SAMPLER_VIEWS
Module: Mesa Branch: master Commit: 26103487b54a1c1121132cc040927619cce45262 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26103487b54a1c1121132cc040927619cce45262 Author: Roland Scheidegger Date: Wed Feb 28 04:28:29 2018 +0100 softpipe: don't iterate through PIPE_MAX_SHADER_SAMPLER_VIEWS We were setting view to NULL if the iteration was larger than i. But in fact if the view is NULL the code did nothing anyway... Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index c10fd918fd..751eb76e84 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -181,8 +181,8 @@ prepare_shader_sampling( if (!num) return; - for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { - struct pipe_sampler_view *view = i < num ? views[i] : NULL; + for (i = 0; i < num; i++) { + struct pipe_sampler_view *view = views[i]; if (view) { struct pipe_resource *tex = view->texture; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): tgsi: Recognize RET in main for tgsi_transform
Module: Mesa Branch: master Commit: f6718baabc7d6fed0d41f72fb22e57c0d67fbf1d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6718baabc7d6fed0d41f72fb22e57c0d67fbf1d Author: Roland Scheidegger Date: Tue Feb 13 18:56:34 2018 +0100 tgsi: Recognize RET in main for tgsi_transform Shaders coming from dx10 state trackers have a RET before the END. And the epilog needs to be placed before the RET (otherwise it will get ignored). Hence figure out if a RET is in main, in this case we'll place the epilog there rather than before the END. (At a closer look, there actually seem to be problems with control flow in general with output redirection, that would need another look. It's enough however to fix draw's aa line emulation in some internal bug - lines tend to be drawn with trivial shaders, moving either a constant color or a vertex color directly to the output). v2: add assert so buggy handling of RET in main is detected Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/tgsi/tgsi_transform.c | 62 + 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index ffdad1338c..a13cf90a27 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -110,6 +110,9 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, { uint procType; boolean first_instruction = TRUE; + boolean epilog_emitted = FALSE; + int cond_stack = 0; + int call_stack = 0; /* input shader */ struct tgsi_parse_context parse; @@ -166,22 +169,66 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, { struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; +unsigned opcode = fullinst->Instruction.Opcode; if (first_instruction && ctx->prolog) { ctx->prolog(ctx); } -/* XXX Note: we may also want to look for a main/top-level - * TGSI_OPCODE_RET instruction in the future. +/* + * XXX Note: we handle the case of ret in main. + * However, the output redirections done by transform + * have their limits with control flow and will generally + * not work correctly. e.g. + * if (cond) { + *oColor = x; + *ret; + * } + * oColor = y; + * end; + * If the color output is redirected to a temp and modified + * by a transform, this will not work (the oColor assignment + * in the conditional will never make it to the actual output). */ -if (fullinst->Instruction.Opcode == TGSI_OPCODE_END -&& ctx->epilog) { - /* Emit caller's epilog */ - ctx->epilog(ctx); - /* Emit END */ +if ((opcode == TGSI_OPCODE_END || opcode == TGSI_OPCODE_RET) && + call_stack == 0 && ctx->epilog && !epilog_emitted) { + if (opcode == TGSI_OPCODE_RET && cond_stack != 0) { + assert(!"transform ignoring RET in main"); + } else { + assert(cond_stack == 0); + /* Emit caller's epilog */ + ctx->epilog(ctx); + epilog_emitted = TRUE; + } + /* Emit END (or RET) */ ctx->emit_instruction(ctx, fullinst); } else { + switch (opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_UIF: + case TGSI_OPCODE_SWITCH: + case TGSI_OPCODE_BGNLOOP: + cond_stack++; + break; + case TGSI_OPCODE_CAL: + call_stack++; + break; + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ENDSWITCH: + case TGSI_OPCODE_ENDLOOP: + assert(cond_stack > 0); + cond_stack--; + break; + case TGSI_OPCODE_ENDSUB: + assert(call_stack > 0); + call_stack--; + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_RET: + default: + break; + } if (ctx->transform_instruction) ctx->transform_instruction(ctx, fullinst); else @@ -231,6 +278,7 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in, assert( 0 ); } } + assert(call_stack == 0); tgsi_parse_free (&parse); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: partly fix sampleMaskIn value
Module: Mesa Branch: master Commit: b936f4d1ca0d2ab1e828ff6a6e617f12469687fa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b936f4d1ca0d2ab1e828ff6a6e617f12469687fa Author: Roland Scheidegger Date: Sun Feb 4 23:54:26 2018 +0100 r600: partly fix sampleMaskIn value The hw gives us coverage for pixel, not for individual fragment shader invocations, in case execution isn't per pixel (eg, unlike cm, actually cannot do "real" minSampleShading, it's either per-pixel or per-fragment, but it doesn't really make a difference here). Also, with msaa disabled, the hw still gives us a mask corresponding to the number of samples, where GL requires this to be 1. Fix this up by masking the sampleMaskIn bits with the bit corresponding to the sampleID, if we know this shader is always executed at per-sample granularity. (In case of a per-sample frequency shader and msaa disabled, the sampleID will always be 0, so this works just fine there.) Fixing this for the minSampleShading case will need a shader key (radeonsi uses the prolog part for) (for eg, could get away with a single bit, cm would need more bits depending on sample/invocation ratio, or read the bits from a uniform), unless we'd want to always use a sample mask uniform (which is probably not a good idea, as it would make the ordinary common msaa case slower for no good reason). This fixes some parts of piglit arb_sample_shading-samplemask (with fixed test), in particular those which use a sampleID, still failing others as expected. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 54 ++ 1 file changed, 54 insertions(+) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8b9bb47a2e..26f9ddb940 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1138,6 +1138,11 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off tgsi_parse_free(&parse); + if (ctx->info.reads_samplemask && + (ctx->info.uses_linear_sample || ctx->info.uses_linear_sample)) { + inputs[1].enabled = true; + } + if (ctx->bc->chip_class >= EVERGREEN) { int num_baryc = 0; /* assign gpr to each interpolator according to priority */ @@ -3503,8 +3508,57 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, r = eg_load_helper_invocation(&ctx); if (r) return r; + } + + /* +* XXX this relies on fixed_pt_position_gpr only being present when +* this shader should be executed per sample. Should be the case for now... +*/ + if (ctx.fixed_pt_position_gpr != -1 && ctx.info.reads_samplemask) { + /* +* Fix up sample mask. The hw always gives us coverage mask for +* the pixel. However, for per-sample shading, we need the +* coverage for the shader invocation only. +* Also, with disabled msaa, only the first bit should be set +* (luckily the same fixup works for both problems). +* For now, we can only do it if we know this shader is always +* executed per sample (due to usage of bits in the shader +* forcing per-sample execution). +* If the fb is not multisampled, we'd do unnecessary work but +* it should still be correct. +* It will however do nothing for sample shading according +* to MinSampleShading. +*/ + struct r600_bytecode_alu alu; + int tmp = r600_get_temp(&ctx); + assert(ctx.face_gpr != -1); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + + alu.op = ALU_OP2_LSHL_INT; + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[0].value = 0x1; + alu.src[1].sel = ctx.fixed_pt_position_gpr; + alu.src[1].chan = 3; + alu.dst.sel = tmp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_AND_INT; + alu.src[0].sel = tmp; + alu.src[1].sel = ctx.face_gpr; + alu.src[1].chan = 2; + alu.dst.sel = ctx.face_gpr; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; } + if (ctx.fragcoord_input >= 0)
Mesa (master): mesa: (trivial) remove unused ignore_sample_qualifier_parameter
Module: Mesa Branch: master Commit: 6fd3c395907731baadcf70978cf392c673ebc96f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6fd3c395907731baadcf70978cf392c673ebc96f Author: Roland Scheidegger Date: Sun Feb 4 00:32:05 2018 +0100 mesa: (trivial) remove unused ignore_sample_qualifier_parameter This parameter for _mesa_get_min_incations_per_fragment() was once used by the intel driver, but it's long gone. Reviewed-by: Brian Paul Reviewed-by: Dave Airlie --- src/mesa/program/program.c| 11 --- src/mesa/program/program.h| 3 +-- src/mesa/state_tracker/st_atom_msaa.c | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 220efc3539..6aba3cb3f1 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -515,8 +515,7 @@ _mesa_find_free_register(const GLboolean used[], */ GLint _mesa_get_min_invocations_per_fragment(struct gl_context *ctx, - const struct gl_program *prog, - bool ignore_sample_qualifier) + const struct gl_program *prog) { /* From ARB_sample_shading specification: * "Using gl_SampleID in a fragment shader causes the entire shader @@ -534,11 +533,9 @@ _mesa_get_min_invocations_per_fragment(struct gl_context *ctx, * "Use of the "sample" qualifier on a fragment shader input * forces per-sample shading" */ - if (prog->info.fs.uses_sample_qualifier && !ignore_sample_qualifier) - return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); - - if (prog->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID | - SYSTEM_BIT_SAMPLE_POS)) + if (prog->info.fs.uses_sample_qualifier || + (prog->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID | +SYSTEM_BIT_SAMPLE_POS))) return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1); else if (ctx->Multisample.SampleShading) return MAX2(ceil(ctx->Multisample.MinSampleShadingValue * diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index 376da7b2d4..659385f55b 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -108,8 +108,7 @@ _mesa_find_free_register(const GLboolean used[], extern GLint _mesa_get_min_invocations_per_fragment(struct gl_context *ctx, - const struct gl_program *prog, - bool ignore_sample_qualifier); + const struct gl_program *prog); static inline GLuint _mesa_program_enum_to_shader_stage(GLenum v) diff --git a/src/mesa/state_tracker/st_atom_msaa.c b/src/mesa/state_tracker/st_atom_msaa.c index 589e328ac5..556c7c5889 100644 --- a/src/mesa/state_tracker/st_atom_msaa.c +++ b/src/mesa/state_tracker/st_atom_msaa.c @@ -77,5 +77,5 @@ st_update_sample_shading(struct st_context *st) return; cso_set_min_samples(st->cso_context, - _mesa_get_min_invocations_per_fragment(st->ctx, &st->fp->Base, false)); + _mesa_get_min_invocations_per_fragment(st->ctx, &st->fp->Base)); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: clean up fragment shader input scan code
Module: Mesa Branch: master Commit: 07d724326aba7945117e5ee3711df7f73dfb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=07d724326aba7945117e5ee3711df7f73dfb Author: Roland Scheidegger Date: Sun Feb 4 23:38:28 2018 +0100 r600: clean up fragment shader input scan code For some reason, we were iterating through the code twice (first just for instructions needing barycentrics, then for instructions and input dcls). Move things around slightly so this is no longer necessary. There also was a unnedeed enabling of the fixed_pt_position_gpr - this is only needed if the per-sample interpolation comes from an input, not from an instruction (just move the assert where it belongs) (since the sample id to sample from comes from a tgsi src in this case, and isn't sampleID). Otherwise there should be no functional change. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 75 +++--- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c3bcb9b77d..8b9bb47a2e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -,7 +,6 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { location = TGSI_INTERPOLATE_LOC_CENTER; - inputs[1].enabled = true; /* needs SAMPLEID */ } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { location = TGSI_INTERPOLATE_LOC_CENTER; /* Needs sample positions, currently those are always available */ @@ -1139,6 +1138,19 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off tgsi_parse_free(&parse); + if (ctx->bc->chip_class >= EVERGREEN) { + int num_baryc = 0; + /* assign gpr to each interpolator according to priority */ + for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) { + if (ctx->eg_interpolators[i].enabled) { + ctx->eg_interpolators[i].ij_index = num_baryc; + num_baryc++; + } + } + num_baryc = (num_baryc + 1) >> 1; + gpr_offset += num_baryc; + } + for (i = 0; i < ARRAY_SIZE(inputs); i++) { boolean enabled = inputs[i].enabled; int *reg = inputs[i].reg; @@ -1165,18 +1177,21 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off * for evergreen we need to scan the shader to find the number of GPRs we need to * reserve for interpolation and system values * - * we need to know if we are going to emit - * any sample or centroid inputs + * we need to know if we are going to emit any sample or centroid inputs * if perspective and linear are required */ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) { unsigned i; - int num_baryc; - struct tgsi_parse_context parse; memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); + /* +* Could get this information from the shader info. But right now +* we interpolate all declared inputs, whereas the shader info will +* only contain the bits if the inputs are actually used, so it might +* not be safe... +*/ for (i = 0; i < ctx->info.num_inputs; i++) { int k; /* skip position/face/mask/sampleid */ @@ -1193,53 +1208,9 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) ctx->eg_interpolators[k].enabled = TRUE; } - if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { - return 0; - } - - /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { - const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; - if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || - inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || - inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) - { - int interpolate, location, k; - - if (inst->Instruction.Opcode == TGSI_OPCODE_
Mesa (master): r600/cm: (trivial) code cleanup for emitting msaa state
Module: Mesa Branch: master Commit: becc7faae22cee26888e87d0c23c193b86603c5a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=becc7faae22cee26888e87d0c23c193b86603c5a Author: Roland Scheidegger Date: Sat Feb 3 20:11:35 2018 +0100 r600/cm: (trivial) code cleanup for emitting msaa state No functional change (compile tested only). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/cayman_msaa.c | 14 ++ src/gallium/drivers/r600/evergreen_state.c | 10 ++ src/gallium/drivers/r600/r600_pipe_common.h | 6 ++ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/r600/cayman_msaa.c b/src/gallium/drivers/r600/cayman_msaa.c index 6bc307a4bc..f97924ac22 100644 --- a/src/gallium/drivers/r600/cayman_msaa.c +++ b/src/gallium/drivers/r600/cayman_msaa.c @@ -141,7 +141,7 @@ void cayman_init_msaa(struct pipe_context *ctx) cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]); } -void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) +static void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) { switch (nr_samples) { default: @@ -202,9 +202,8 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) } } -void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, -int ps_iter_samples, int overrast_samples, -unsigned sc_mode_cntl_1) +void cayman_emit_msaa_state(struct radeon_winsys_cs *cs, int nr_samples, + int ps_iter_samples, int overrast_samples) { int setup_samples = nr_samples > 1 ? nr_samples : overrast_samples > 1 ? overrast_samples : 0; @@ -216,6 +215,13 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, * endcaps. */ unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); + unsigned sc_mode_cntl_1 = + EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | + EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); + + if (nr_samples > 1) { + cayman_emit_msaa_sample_locs(cs, nr_samples); + } if (setup_samples > 1) { /* indexed by log2(nr_samples) */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 742ca5babb..fcd742c5f9 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1956,14 +1956,8 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (rctx->b.chip_class == EVERGREEN) { evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples); } else { - unsigned sc_mode_cntl_1 = - EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | - EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1); - - if (rctx->framebuffer.nr_samples > 1) - cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples); - cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples, - rctx->ps_iter_samples, 0, sc_mode_cntl_1); + cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples, + rctx->ps_iter_samples, 0); } } diff --git a/src/gallium/drivers/r600/r600_pipe_common.h b/src/gallium/drivers/r600/r600_pipe_common.h index 86a20f8639..ee8eb54920 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.h +++ b/src/gallium/drivers/r600/r600_pipe_common.h @@ -799,10 +799,8 @@ extern const unsigned eg_max_dist_4x; void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count, unsigned sample_index, float *out_value); void cayman_init_msaa(struct pipe_context *ctx); -void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples); -void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples, -int ps_iter_samples, int overrast_samples, -unsigned sc_mode_cntl_1); +void cayman_emit_msaa_state(struct radeon_winsys_cs *cs, int nr_samples, + int ps_iter_samples, int overrast_samples); /* Inline helpers. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): Revert "gallium: build ddebug, noop, rbug, trace as part of auxiliary"
Module: Mesa Branch: master Commit: 09f49b9e50ee86e24c7e975f08535aa7237852a1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09f49b9e50ee86e24c7e975f08535aa7237852a1 Author: Roland Scheidegger Date: Wed Feb 7 23:47:39 2018 +0100 Revert "gallium: build ddebug, noop, rbug, trace as part of auxiliary" This reverts commit 6f82b8d8d0a986aac28e7bec47fc313fb950475c. This broke scons build, and reportedly clover with autotools/meson too. --- src/gallium/Makefile.am| 12 +- src/gallium/auxiliary/Makefile.am | 10 + .../auxiliary/target-helpers/inline_debug_helper.h | 26 ++ src/gallium/drivers/ddebug/Makefile.sources| 14 ++-- src/gallium/drivers/noop/Makefile.sources | 8 +++ src/gallium/drivers/rbug/Makefile.sources | 18 +++ src/gallium/drivers/trace/Makefile.sources | 26 +++--- src/gallium/state_trackers/osmesa/Makefile.am | 3 ++- src/gallium/targets/d3dadapter9/Makefile.am| 8 ++- src/gallium/targets/dri/Makefile.am| 10 - src/gallium/targets/libgl-xlib/Makefile.am | 6 - src/gallium/targets/osmesa/Makefile.am | 4 +++- src/gallium/targets/pipe-loader/Makefile.am| 6 - src/gallium/tests/unit/Makefile.am | 1 + 14 files changed, 98 insertions(+), 54 deletions(-) diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index 81eabef106..af010c89f8 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -11,6 +11,12 @@ SUBDIRS += auxiliary/pipe-loader ## Gallium pipe drivers and their respective winsys' ## +SUBDIRS += \ + drivers/ddebug \ + drivers/noop \ + drivers/trace \ + drivers/rbug + ## freedreno/msm/kgsl if HAVE_GALLIUM_FREEDRENO SUBDIRS += drivers/freedreno winsys/freedreno/drm @@ -182,12 +188,6 @@ endif EXTRA_DIST += \ include \ - drivers/noop/SConscript \ - drivers/rbug/README \ - drivers/rbug/SConscript \ - drivers/trace/trace.xsl \ - drivers/trace/README \ - drivers/trace/SConscript \ state_trackers/README \ state_trackers/wgl targets/libgl-gdi \ targets/graw-gdi targets/graw-null targets/graw-xlib \ diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index 7af3f3ce42..95a325f96b 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -1,8 +1,4 @@ include Makefile.sources -include $(top_srcdir)/src/gallium/drivers/ddebug/Makefile.sources -include $(top_srcdir)/src/gallium/drivers/noop/Makefile.sources -include $(top_srcdir)/src/gallium/drivers/rbug/Makefile.sources -include $(top_srcdir)/src/gallium/drivers/trace/Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc noinst_LTLIBRARIES = libgallium.la @@ -23,11 +19,7 @@ AM_CXXFLAGS = \ libgallium_la_SOURCES = \ $(C_SOURCES) \ $(NIR_SOURCES) \ - $(GENERATED_SOURCES) \ - $(DDEBUG_SOURCES) \ - $(NOOP_SOURCES) \ - $(RBUG_SOURCES) \ - $(TRACE_SOURCES) + $(GENERATED_SOURCES) if HAVE_LIBDRM diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h index 8556376940..2443bf2146 100644 --- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h @@ -11,18 +11,44 @@ * one or more debug driver: rbug, trace. */ +#ifdef GALLIUM_DDEBUG #include "ddebug/dd_public.h" +#endif + +#ifdef GALLIUM_TRACE #include "trace/tr_public.h" +#endif + +#ifdef GALLIUM_RBUG #include "rbug/rbug_public.h" +#endif + +#ifdef GALLIUM_NOOP #include "noop/noop_public.h" +#endif +/* + * TODO: Audit the following *screen_create() - all of + * them should return the original screen on failuire. + */ static inline struct pipe_screen * debug_screen_wrap(struct pipe_screen *screen) { +#if defined(GALLIUM_DDEBUG) screen = ddebug_screen_create(screen); +#endif + +#if defined(GALLIUM_RBUG) screen = rbug_screen_create(screen); +#endif + +#if defined(GALLIUM_TRACE) screen = trace_screen_create(screen); +#endif + +#if defined(GALLIUM_NOOP) screen = noop_screen_create(screen); +#endif if (debug_get_bool_option("GALLIUM_TESTS", FALSE)) util_run_tests(screen); diff --git a/src/gallium/drivers/ddebug/Makefile.sources b/src/gallium/drivers/ddebug/Makefile.sources index d43a75ba40..1bd38274df 100644 --- a/src/gallium/drivers/ddebug/Makefile.sources +++ b/src/gallium/drivers/ddebug/Makefile.sources @@ -1,7 +1,7 @@ -DDEBUG_SOURCES := \ - $(top_srcdir)/src/gallium/drivers/ddebug/dd_context.c \ - $(top_srcdir)/src/gallium/drivers/ddebug/dd_draw.c \ - $(top_srcdir)/src/gallium/drivers/ddebug/dd_pipe
Mesa (master): u_blit: (trivial) fix bogus argument order for set_fragment_shader
Module: Mesa Branch: master Commit: def09f8db0ce77fc41f5188418e0b06356ce59b7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=def09f8db0ce77fc41f5188418e0b06356ce59b7 Author: Roland Scheidegger Date: Wed Feb 7 22:02:54 2018 +0100 u_blit: (trivial) fix bogus argument order for set_fragment_shader Amazingly this still worked sometimes, albeit I'm not even sure why... This fixes d7bec6f7a6a2a35c80be939db8532011af1e9b67. --- src/gallium/auxiliary/util/u_blit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 817eeac9f0..de39422e32 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -603,8 +603,8 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, 1, &src_sampler_view); /* shaders */ - set_fragment_shader(ctx, src_xrbias, - src_sampler_view->format, + set_fragment_shader(ctx, src_sampler_view->format, + src_xrbias, src_sampler_view->texture->target); set_vertex_shader(ctx); cso_set_tessctrl_shader_handle(ctx->cso, NULL); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): u_simple_shaders: fix mask handling in util_make_fragment_tex_shader_writemask
Module: Mesa Branch: master Commit: afd1e9be1714eb95c3b797becb15bebaad9e6646 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=afd1e9be1714eb95c3b797becb15bebaad9e6646 Author: Roland Scheidegger Date: Wed Feb 7 05:03:42 2018 +0100 u_simple_shaders: fix mask handling in util_make_fragment_tex_shader_writemask The writemask handling was busted, since writing defaults to output meant they got overwritten by the tex sampling anyway. Albeit the affected components were undefined, so maybe with some luck it still would have worked with some drivers - if not could as well kill it... (This would have affected u_blitter but not u_blit since the latter always used xyzw mask.) Reviewed-by: Brian Paul --- src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 967954596b..a301c05762 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -275,7 +275,7 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, if (writemask != TGSI_WRITEMASK_XYZW) { struct ureg_src imm = ureg_imm4f( ureg, 0, 0, 0, 1 ); - ureg_MOV( ureg, out, imm ); + ureg_MOV(ureg, temp, imm); } if (tex_target == TGSI_TEXTURE_BUFFER) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): u_blit,u_simple_shaders: add shader to convert from xrbias format
Module: Mesa Branch: master Commit: d7bec6f7a6a2a35c80be939db8532011af1e9b67 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7bec6f7a6a2a35c80be939db8532011af1e9b67 Author: Roland Scheidegger Date: Wed Feb 7 05:18:17 2018 +0100 u_blit,u_simple_shaders: add shader to convert from xrbias format We need this to handle some oddball dx10 format (DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM). What you can do with this format is very limited, hence we don't want to add it as a gallium format (we could not express the properties of this format as ordinary format properties neither, so like all special formats it would need specific code for handling it in any case). While here, also nuke the array for different shaders for different writemasks, as it was not actually used (always full masks are passed in for generating shaders). Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/util/u_blit.c | 40 ++- src/gallium/auxiliary/util/u_blit.h | 3 +- src/gallium/auxiliary/util/u_simple_shaders.c | 47 +++ src/gallium/auxiliary/util/u_simple_shaders.h | 4 +++ 4 files changed, 78 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3f92476f0c..817eeac9f0 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -65,7 +65,7 @@ struct blit_state struct pipe_vertex_element velem[2]; void *vs; - void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1][3]; + void *fs[PIPE_MAX_TEXTURE_TYPES][4]; struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -135,17 +135,15 @@ void util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; - unsigned i, j, k; + unsigned i, j; if (ctx->vs) pipe->delete_vs_state(pipe, ctx->vs); for (i = 0; i < ARRAY_SIZE(ctx->fs); i++) { for (j = 0; j < ARRAY_SIZE(ctx->fs[i]); j++) { - for (k = 0; k < ARRAY_SIZE(ctx->fs[i][j]); k++) { -if (ctx->fs[i][j][k]) - pipe->delete_fs_state(pipe, ctx->fs[i][j][k]); - } + if (ctx->fs[i][j]) +pipe->delete_fs_state(pipe, ctx->fs[i][j]); } } @@ -159,8 +157,9 @@ util_destroy_blit(struct blit_state *ctx) * Helper function to set the fragment shaders. */ static inline void -set_fragment_shader(struct blit_state *ctx, uint writemask, +set_fragment_shader(struct blit_state *ctx, enum pipe_format format, +boolean src_xrbias, enum pipe_texture_target pipe_tex) { enum tgsi_return_type stype; @@ -177,19 +176,29 @@ set_fragment_shader(struct blit_state *ctx, uint writemask, idx = 2; } - if (!ctx->fs[pipe_tex][writemask][idx]) { + if (src_xrbias) { + assert(stype == TGSI_RETURN_TYPE_FLOAT); + idx = 3; + if (!ctx->fs[pipe_tex][idx]) { + enum tgsi_texture_type tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); + ctx->fs[pipe_tex][idx] = +util_make_fragment_tex_shader_xrbias(ctx->pipe, tgsi_tex); + } + } + + else if (!ctx->fs[pipe_tex][idx]) { unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); /* OpenGL does not allow blits from signed to unsigned integer * or vice versa. */ - ctx->fs[pipe_tex][writemask][idx] = + ctx->fs[pipe_tex][idx] = util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex, TGSI_INTERPOLATE_LINEAR, - writemask, + TGSI_WRITEMASK_XYZW, stype, stype, false, false); } - cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask][idx]); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][idx]); } @@ -491,8 +500,8 @@ util_blit_pixels(struct blit_state *ctx, * The sampler view's first_layer indicate the layer to use, but for * cube maps it must point to the first face. Face is passed in src_face. * - * The main advantage over util_blit_pixels is that it allows to specify swizzles in - * pipe_sampler_view::swizzle_?. + * The main advantage over util_blit_pixels is that it allows to specify + * swizzles in pipe_sampler_view::swizzle_?. * * But there is no control over blitting Z and/or stencil. */ @@ -505,7 +514,8 @@ util_blit_pixels_tex(struct blit_state *ctx, struct pipe_surface *dst, int dstX0, int dstY0, int dstX1, int dstY1, - float z, uint filter) + float z, uint filter, + boolean src_xrbias) { boolean normaliz
Mesa (master): r600: don't do stack workarounds for hemlock
Module: Mesa Branch: master Commit: c2f0e0885776f3f0a18b9db08149564d4b98e5b7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c2f0e0885776f3f0a18b9db08149564d4b98e5b7 Author: Roland Scheidegger Date: Tue Jan 30 05:48:27 2018 +0100 r600: don't do stack workarounds for hemlock By the looks of it it seems hemlock is treated separately to cypress, but certainly it won't need the stack workarounds cedar/redwood (and seemingly every other eg chip except cypress/juniper) need. (Discovered by accident.) Acked-by: Alex Deucher --- src/gallium/drivers/r600/sb/sb_bc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index b35671bf0f..a249395474 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -665,6 +665,7 @@ public: return false; switch (hw_chip) { + case HW_CHIP_HEMLOCK: case HW_CHIP_CYPRESS: case HW_CHIP_JUNIPER: return false; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: restrict formats being supported by target type for formatquery
Module: Mesa Branch: master Commit: 21fe02d1d369d25021d0be7f558063e103e2dce7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21fe02d1d369d25021d0be7f558063e103e2dce7 Author: Roland Scheidegger Date: Sat Jan 27 01:25:26 2018 +0100 mesa: restrict formats being supported by target type for formatquery The code just considered all formats as being supported if they were either a valid fbo or texture format. This was quite awkward since then the query would return "supported" for e.g. GL_RGB9E5 or compressed formats and target RENDERBUFFER (albeit the driver could still refuse it in theory). However, when then querying for instance the internalformat sizes, it would just return 0 (due to the checks being more strict there). It was also a problem for texture buffer targets, which have a more restricted list of formats which are allowed (and again, it would return supported but then querying sizes would return 0). So only take validation of formats into account which make sense for a given target. Can also toss out some special checks for rgb9e5 later, since we'd never get there if it wasn't supported in the first place. Reviewed-by: Alejandro Piñeiro --- src/mesa/main/formatquery.c | 31 +-- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 1846fbc688..303e7b2f8c 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -558,15 +558,29 @@ _is_internalformat_supported(struct gl_context *ctx, GLenum target, * implementation accepts it for any texture specification commands, and * - unsized or base internal format, if the implementation accepts * it for texture or image specification. +* +* But also: +* "If the particualar and combination do not make +* sense, or if a particular type of is not supported by the +* implementation the "unsupported" answer should be given. This is not an +* error. */ GLint buffer[1]; - /* At this point an internalformat is valid if it is valid as a texture or -* as a renderbuffer format. The checks are different because those methods -* return different values when passing non supported internalformats */ - if (_mesa_base_tex_format(ctx, internalformat) < 0 && - _mesa_base_fbo_format(ctx, internalformat) == 0) - return false; + if (target == GL_RENDERBUFFER) { + if (_mesa_base_fbo_format(ctx, internalformat) == 0) { + return false; + } + } else if (target == GL_TEXTURE_BUFFER) { + if (_mesa_validate_texbuffer_format(ctx, internalformat) == + MESA_FORMAT_NONE) { + return false; + } + } else { + if (_mesa_base_tex_format(ctx, internalformat) < 0) { + return false; + } + } /* Let the driver have the final word */ ctx->Driver.QueryInternalFormat(ctx, target, internalformat, @@ -969,10 +983,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, * and glGetRenderbufferParameteriv functions. */ if (pname == GL_INTERNALFORMAT_SHARED_SIZE) { - if (_mesa_has_EXT_texture_shared_exponent(ctx) && - target != GL_TEXTURE_BUFFER && - target != GL_RENDERBUFFER && - texformat == MESA_FORMAT_R9G9B9E5_FLOAT) { + if (texformat == MESA_FORMAT_R9G9B9E5_FLOAT) { buffer[0] = 5; } goto end; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: remove misleading gles checks for formatquery
Module: Mesa Branch: master Commit: 09dc4f9012b12c51972e32db653f507448b29490 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09dc4f9012b12c51972e32db653f507448b29490 Author: Roland Scheidegger Date: Sat Jan 27 01:12:52 2018 +0100 mesa: remove misleading gles checks for formatquery Testing for gles there is just confusing - this is about target being supported, if it was valid at all was already determined earlier (in _legal_parameters). It didn't make sense at all in any case, since it would only have said false there for gles for 2d but not 2d arrays etc. Reviewed-by: Alejandro Piñeiro --- src/mesa/main/formatquery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 61f798c88f..9174fb6b27 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -392,14 +392,12 @@ _is_target_supported(struct gl_context *ctx, GLenum target) * implementation the "unsupported" answer should be given. * This is not an error." * -* For OpenGL ES, queries can only be used with GL_RENDERBUFFER or MS. +* Note that legality of targets has already been verified. */ switch(target){ case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - if (!_mesa_is_desktop_gl(ctx)) - return false; break; case GL_TEXTURE_1D_ARRAY: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: skip validation of legality of size/type queries for format queries
Module: Mesa Branch: master Commit: 3c7aa242f5df6ba45acc7338bad088910e4b7330 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c7aa242f5df6ba45acc7338bad088910e4b7330 Author: Roland Scheidegger Date: Sat Jan 27 01:39:35 2018 +0100 mesa: skip validation of legality of size/type queries for format queries The size/type query is always legal (if we made it that far). Removing this causes a difference for GL_TEXTURE_BUFFER - the reason is that these parameters are valid only with GetTexLevelParameter() if gl 3.1 is supported, but not if only ARB_texture_buffer_object is supported. However, while the spec says that these queries return "the same information as querying GetTexLevelParameter" I believe we're not expected to return just zeros here. By definition, these pnames are always valid (unlike for the GetTexLevelParameter() function which would return an error without GL 3.1). The spec is a bit inconsistent there and open to interpretation - while mentioning the "same information as querying GetTexLevelParameter" is returned, it also mentions that 0 is returned for size/type if the target/format is not supported - implying correct results to be returned if it is supported, regardless that GetTexLevelParameter would return an error. (Also, the bit about this returning the same as GetTexLevelParameter also includes querying stencil type, which isn't even possible with GetTexLevelParameter.) This breaks some piglit arb_internalformat_query2 tests (which I believe to be wrong). Reviewed-by: Alejandro Piñeiro § --- src/mesa/main/formatquery.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 303e7b2f8c..834f8e 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -960,9 +960,6 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, mesa_format texformat; if (target != GL_RENDERBUFFER) { - if (!_mesa_legal_get_tex_level_parameter_target(ctx, target, true)) -goto end; - baseformat = _mesa_base_tex_format(ctx, internalformat); } else { baseformat = _mesa_base_fbo_format(ctx, internalformat); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: (trivial) add TODO comment for default results for internal queries
Module: Mesa Branch: master Commit: 272e7e1bd5c12a3ef36027f0071065b7ce04e9e9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=272e7e1bd5c12a3ef36027f0071065b7ce04e9e9 Author: Roland Scheidegger Date: Tue Jan 30 01:03:49 2018 +0100 mesa: (trivial) add TODO comment for default results for internal queries --- src/mesa/main/formatquery.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 9174fb6b27..1846fbc688 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -700,6 +700,12 @@ _mesa_query_internal_format_default(struct gl_context *ctx, GLenum target, case GL_FRAMEBUFFER_RENDERABLE_LAYERED: case GL_FRAMEBUFFER_BLEND: case GL_FILTER: + /* + * TODO seems a tad optimistic just saying yes to everything here. + * Even for combinations which make no sense... + * And things like TESS_CONTROL_TEXTURE should definitely default to + * NONE if the driver doesn't even support tessellation... + */ params[0] = GL_FULL_SUPPORT; break; case GL_NUM_TILING_TYPES_EXT: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: fix crash with seamless cube filtering with different min/mag filter
Module: Mesa Branch: master Commit: 4fe662c58f6453b3558de479e7c2bfe4158dc26c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4fe662c58f6453b3558de479e7c2bfe4158dc26c Author: Roland Scheidegger Date: Thu Jan 25 04:30:41 2018 +0100 gallivm: fix crash with seamless cube filtering with different min/mag filter We are not allowed to modify the incoming coords values, or things may crash (as we may be inside a llvm conditional and the values may be used in another branch). I recently broke this when fixing an issue with NaNs and seamless cube map filtering, and it causes crashes when doing cubemap filtering if the min and mag filters are different. Add const to the pointers passed in to prevent this mishap in the future. Fixes: a485ad0bcd ("gallivm: fix an issue with NaNs with seamless cube filtering") Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 38 +-- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index ff8cbf604c..8f760f59fe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -857,7 +857,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, LLVMValueRef mipoffsets, - LLVMValueRef *coords, + const LLVMValueRef *coords, const LLVMValueRef *offsets, LLVMValueRef colors_out[4]) { @@ -1004,7 +1004,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, LLVMValueRef mipoffsets, - LLVMValueRef *coords, + const LLVMValueRef *coords, const LLVMValueRef *offsets, LLVMValueRef colors_out[4]) { @@ -1106,7 +1106,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, struct lp_build_if_state edge_if; LLVMTypeRef int1t; LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; - LLVMValueRef coord, have_edge, have_corner; + LLVMValueRef coord0, coord1, have_edge, have_corner; LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y; LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; @@ -1130,20 +1130,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * other values might be bogus in the end too). * So kill off the NaNs here. */ - coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero, - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero, - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - coord = lp_build_mul(coord_bld, coords[0], flt_width_vec); + coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero, +GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec); /* instead of clamp, build mask if overflowed */ - coord = lp_build_sub(coord_bld, coord, half); + coord0 = lp_build_sub(coord_bld, coord0, half); /* convert to int, compute lerp weight */ /* not ideal with AVX (and no AVX2) */ - lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart); + lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart); x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); - coord = lp_build_mul(coord_bld, coords[1], flt_height_vec); - coord = lp_build_sub(coord_bld, coord, half); - lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart); + coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero, +GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec); + coord1 = lp_build_sub(coord_bld, coord1, half); + lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart); y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); @@ -1747,7 +1747,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, unsigned img_filter, unsigned mip_filter, boolean is_gather, - LLVMValueRef *coords, + const L
Mesa (master): draw: fix vsplit code when the (post-bias) index value is -1
Module: Mesa Branch: master Commit: 1f462eaf394517dac98b0c41f09e995f2940fdb8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f462eaf394517dac98b0c41f09e995f2940fdb8 Author: Roland Scheidegger Date: Tue Jan 16 03:01:56 2018 +0100 draw: fix vsplit code when the (post-bias) index value is -1 vsplit_add_cache uses the post-bias index for hashing, but the vsplit_add_cache_uint/ushort/ubyte ones used the pre-bias index, therefore the code for handling the special case (because -1 matches the initialization value of the cache) wasn't actually working. Commit 78a997f72841310620d18daa9015633343d04db1 actually simplified the cache logic somewhat, but it looks like this particular problem carried over (and duplicated to the ushort/ubyte cases, since before only uint needed it). This could lead to the vsplit cache doing the wrong thing, in particular later fetch_info might indicate there are 0 values to fetch. This only really affected edge cases which were bogus to begin with, but it could lead to a crash with the jit vertex shader, since it cannot handle this case correctly (the count loop is always executed at least once and we would not allocate any memory for the shader outputs), so add another assert to catch it there. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 1 + src/gallium/auxiliary/draw/draw_pt_vsplit.c| 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index c6492a18cf..5e0c562256 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -368,6 +368,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, unsigned start_or_maxelt, vid_base; const unsigned *elts; + assert(fetch_info->count > 0); llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; llvm_vert_info.stride = fpme->vertex_size; diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c index a68d5bf971..3ff077b760 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -133,7 +133,7 @@ vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts, VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); /* unlike the uint case this can only happen with elt_bias */ if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { - unsigned hash = fetch % MAP_SIZE; + unsigned hash = elt_idx % MAP_SIZE; vsplit->cache.fetches[hash] = 0; vsplit->cache.has_max_fetch = TRUE; } @@ -148,7 +148,7 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts, VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); /* unlike the uint case this can only happen with elt_bias */ if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { - unsigned hash = fetch % MAP_SIZE; + unsigned hash = elt_idx % MAP_SIZE; vsplit->cache.fetches[hash] = 0; vsplit->cache.has_max_fetch = TRUE; } @@ -168,7 +168,7 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts, VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); /* Take care for DRAW_MAX_FETCH_IDX (since cache is initialized to -1). */ if (elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { - unsigned hash = fetch % MAP_SIZE; + unsigned hash = elt_idx % MAP_SIZE; /* force update - any value will do except DRAW_MAX_FETCH_IDX */ vsplit->cache.fetches[hash] = 0; vsplit->cache.has_max_fetch = TRUE; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: remove VSPLIT_CREATE_IDX macro
Module: Mesa Branch: master Commit: b0413cfd8b84634db4a5bf57d550b21d0d2fa8f7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b0413cfd8b84634db4a5bf57d550b21d0d2fa8f7 Author: Roland Scheidegger Date: Tue Jan 16 17:55:00 2018 +0100 draw: remove VSPLIT_CREATE_IDX macro Just inline the little bit of code. Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_pt_vsplit.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c index 3ff077b760..653deab28c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c @@ -116,21 +116,15 @@ vsplit_get_base_idx(unsigned start, unsigned fetch) return draw_overflow_uadd(start, fetch, MAX_ELT_IDX); } -/* - * The final element index is just element index plus element bias. - */ -#define VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias)\ - unsigned elt_idx; \ - elt_idx = vsplit_get_base_idx(start, fetch);\ - elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + (int)elt_bias); - static inline void vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts, unsigned start, unsigned fetch, int elt_bias) { struct draw_context *draw = vsplit->draw; - VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); + unsigned elt_idx; + elt_idx = vsplit_get_base_idx(start, fetch); + elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias); /* unlike the uint case this can only happen with elt_bias */ if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { unsigned hash = elt_idx % MAP_SIZE; @@ -145,7 +139,9 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts, unsigned start, unsigned fetch, int elt_bias) { struct draw_context *draw = vsplit->draw; - VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); + unsigned elt_idx; + elt_idx = vsplit_get_base_idx(start, fetch); + elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias); /* unlike the uint case this can only happen with elt_bias */ if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { unsigned hash = elt_idx % MAP_SIZE; @@ -165,7 +161,12 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts, unsigned start, unsigned fetch, int elt_bias) { struct draw_context *draw = vsplit->draw; - VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias); + unsigned elt_idx; + /* +* The final element index is just element index plus element bias. +*/ + elt_idx = vsplit_get_base_idx(start, fetch); + elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + elt_bias); /* Take care for DRAW_MAX_FETCH_IDX (since cache is initialized to -1). */ if (elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) { unsigned hash = elt_idx % MAP_SIZE; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): util: fix NORETURN for msvc, add HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h
Module: Mesa Branch: master Commit: 85377dc55c55d1c5536cdf9a86ce67ebb59b7e77 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=85377dc55c55d1c5536cdf9a86ce67ebb59b7e77 Author: Roland Scheidegger Date: Thu Jan 11 01:49:00 2018 +0100 util: fix NORETURN for msvc, add HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h We've seen some problems internally due to macro redefinition. Fix this by adding HAVE_FUNC_ATTRIBUTE_NORETURN to c99_compat.h, and defining it for msvc. And avoid redefinition just in case. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- include/c99_compat.h | 1 + src/util/macros.h| 12 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/c99_compat.h b/include/c99_compat.h index cb690c6e2a..81621a7fab 100644 --- a/include/c99_compat.h +++ b/include/c99_compat.h @@ -164,6 +164,7 @@ test_c99_compat_h(const void * restrict a, #define HAVE_FUNC_ATTRIBUTE_FORMAT 1 #define HAVE_FUNC_ATTRIBUTE_PACKED 1 #define HAVE_FUNC_ATTRIBUTE_ALIAS 1 +#define HAVE_FUNC_ATTRIBUTE_NORETURN 1 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) /* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */ diff --git a/src/util/macros.h b/src/util/macros.h index d6e37053b1..432d513930 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -171,10 +171,14 @@ do { \ #define ATTRIBUTE_RETURNS_NONNULL #endif -#ifdef HAVE_FUNC_ATTRIBUTE_NORETURN -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN +#ifndef NORETURN +# ifdef _MSC_VER +#define NORETURN __declspec(noreturn) +# elif defined HAVE_FUNC_ATTRIBUTE_NORETURN +#define NORETURN __attribute__((__noreturn__)) +# else +#define NORETURN +# endif #endif #ifdef __cplusplus ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: require at least 14 UBOs for GL 4.3
Module: Mesa Branch: master Commit: 734bef372d80a2ebf5677eb4fbd0e939f2b3cfb4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=734bef372d80a2ebf5677eb4fbd0e939f2b3cfb4 Author: Roland Scheidegger Date: Thu Jan 11 02:10:25 2018 +0100 mesa: require at least 14 UBOs for GL 4.3 ARB_ubo requires 12 UBOs (per stage) at least, but this limit has been raised by GL 4.3 to 14, so don't advertize GL 4.3 without it (only checking the vertex stage since all drivers probably have the same limit anyway for other stages). (piglit has minmax tests for that kind of thing, but they go only up to 3.3, so this won't really be noticed.) I think this currently should not affect any driver - r600 until very recently only supported 12 but now advertizes 14 too. Reviewed-by: Brian Paul --- src/mesa/main/version.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 90c5c5f84e..68079f4ebb 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -352,6 +352,7 @@ compute_version(const struct gl_extensions *extensions, extensions->ARB_transform_feedback_instanced); const bool ver_4_3 = (ver_4_2 && consts->GLSLVersion >= 430 && + consts->Program[MESA_SHADER_VERTEX].MaxUniformBlocks >= 14 && extensions->ARB_ES3_compatibility && extensions->ARB_arrays_of_arrays && extensions->ARB_compute_shader && ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: fix sampler indexing with texture buffers sampling
Module: Mesa Branch: master Commit: 762ccf483aa0f5f853e75c886d49c4025cebaf00 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=762ccf483aa0f5f853e75c886d49c4025cebaf00 Author: Roland Scheidegger Date: Tue Jan 2 23:03:44 2018 +0100 r600: fix sampler indexing with texture buffers sampling This fixes the new piglit test. While here also fix up the logic for early exit of setting up driver consts. Tested-by: Konstantin Kharlamov Reviewed-by: Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 2 ++ src/gallium/drivers/r600/r600_state_common.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index bb7cc177ae..716a829273 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -6856,6 +6856,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int src_gpr, r, i; int id = tgsi_tex_get_src_gpr(ctx, 1); + int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE src_gpr = tgsi_tex_get_src_gpr(ctx, 0); if (src_requires_loading) { @@ -6887,6 +6888,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ vtx.use_const_fields = 1; + vtx.buffer_index_mode = sampler_index_mode; if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) return r; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index e7fa1bbf57..1d9ff7bd6e 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1380,8 +1380,8 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) } if (!samplers->views.dirty_buffer_constants && - (images && !images->dirty_buffer_constants) && - (buffers && !buffers->dirty_buffer_constants)) + !(images && images->dirty_buffer_constants) && + !(buffers && buffers->dirty_buffer_constants)) return; if (images) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): winsys/radeon: fix up default enabled_rb_mask for r600
Module: Mesa Branch: master Commit: f0dd1b36126ceff8726797f40f56defbf5f82e2c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0dd1b36126ceff8726797f40f56defbf5f82e2c Author: Roland Scheidegger Date: Tue Jan 9 02:53:28 2018 +0100 winsys/radeon: fix up default enabled_rb_mask for r600 The logic had two fatal flaws which completely killed the default value. 1) drm will overwrite the value anyway even if the chip can't be handled 2) the default value logic is relying on num_render_backends, which was filled in later. Luckily noone is relying on it, but it's a bit confusing seeing the chip clock printed out there (as hex) with R600_DEBUG=info... (Albeit radeonsi does not appear to fix up the value. If kernels which don't handle this query are still supported, radeonsi will still end up with a broken enabled_rb_mask, I have no idea of the potential results of this there.) Reviewed-by: Dave Airlie --- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index e600199d26..10f2ecc900 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -369,12 +369,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.max_shader_clock); ws->info.max_shader_clock /= 1000; -/* Default value. */ -ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); -/* This fails on non-GCN or older kernels: */ -radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, - &ws->info.enabled_rb_mask); - ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); /* Generation-specific queries. */ @@ -433,6 +427,16 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.r600_gb_backend_map)) ws->info.r600_gb_backend_map_valid = true; +/* Default value. */ +ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); +/* + * This fails (silently) on non-GCN or older kernels, overwriting the + * default enabled_rb_mask with the result of the last query. +*/ +if (ws->gen >= DRV_SI) +radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, + &ws->info.enabled_rb_mask); + ws->info.has_virtual_memory = false; if (ws->info.drm_minor >= 13) { uint32_t ib_vm_max_size; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: hack up num_render_backends on Juniper to 8
Module: Mesa Branch: master Commit: 76baf997371dc8678cbea51fe5d4651aa59af741 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76baf997371dc8678cbea51fe5d4651aa59af741 Author: Roland Scheidegger Date: Tue Jan 9 03:28:45 2018 +0100 r600: hack up num_render_backends on Juniper to 8 Juniper really has a maximum of 4 RBEs (16 pixels). However, predication always locks up on my HD 5750, and through experiments it looks like if we're pretending it has a maximum of 8, with 4 disabled, it works correctly. My conclusion would be that there's a bug (likely firmware, not hw) which causes the predication logic to try to read 8 results out of the query buffer instead of just 4, and since of course noone ever writes the upper 4, the status bit is never set and hence it will wait for it forever. Ideally this would be fixed in firmware, but I'd guess chances of that happening are slim. This will double the size of (occlusion) query result buffers, write the status bit for the disabled rbs in these buffers, and will also add 8 results together instead of just 4 when reading them back. The latter is unnecessary, but it's probably not worth bothering - luckily num_render_backends isn't used outside of occlusion queries, so don't need separate value for the "real" maximum. Also print out the enabled_rb_mask if it changed from the pre-fixed value (which is already printed out), just in case there's some more problems with chips which have some rbs disabled... This fixes all the lockups with piglit nv_conditional_render tests on my HD 5750 (all pass). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_query.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 8f87c51cca..b4519830cc 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1818,7 +1818,19 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) struct r600_resource *buffer; uint32_t *results; unsigned i, mask = 0; - unsigned max_rbs = ctx->screen->info.num_render_backends; + unsigned max_rbs; + + if (ctx->family == CHIP_JUNIPER) { + /* +* Fix for predication lockups - the chip can only ever have +* 4 RBs, however it looks like the predication logic assumes +* there's 8, trying to read results from query buffers never +* written to. By increasing this number we'll write the +* status bit for these as per the normal disabled rb logic. +*/ + ctx->screen->info.num_render_backends = 8; + } + max_rbs = ctx->screen->info.num_render_backends; assert(rscreen->chip_class <= CAYMAN); @@ -1890,8 +1902,13 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) r600_resource_reference(&buffer, NULL); - if (mask) + if (mask) { + if (rscreen->debug_flags & DBG_INFO && + mask != rscreen->info.enabled_rb_mask) { + printf("enabled_rb_mask (fixed) = 0x%x\n", mask); + } rscreen->info.enabled_rb_mask = mask; + } } #define XFULL(name_, query_type_, type_, result_type_, group_id_) \ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: don't emit tes samplers/views when tes isn't active
Module: Mesa Branch: master Commit: ea227f4322debd68380feaad1de44a2feaf3d2a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea227f4322debd68380feaad1de44a2feaf3d2a9 Author: Roland Scheidegger Date: Wed Jan 3 03:23:13 2018 +0100 r600: don't emit tes samplers/views when tes isn't active Similar to const buffers. The driver must not emit any tes-related state if tes is disabled, since the hw slots are all shared by VS, therefore it would overwrite them (the mesa state tracker might not do this, but it would be perfectly legal to do so). Nevertheless I think the dirty state tracking logic in the driver is fundamentally flawed when tes is disabled/enabled, since it looks to me like the VS (and TES) state would not get reemitted to the correct slots (if it's not dirty anyway). Unless I'm missing something... Theoretically, the overwrite problem could be solved by using non-overlapping resource slots for TES and VS (since we're not even close to using half the resource slots), but it wouldn't work for constant buffers nor samplers, and for VS would still need to propagate changes to both LS and VS, so probably not a useful idea. Unfortunately there's zero coverage of this with piglit, since all tessellation shader tests are just shader_runner tests, which are unsuitable for testing any kind of state dependency tracking issues (so I can't even quickly hack something up to proove it and fix it...). TCS otoh is just fine - like GS it has its own hw slots. Tested-by: Konstantin Kharlamov Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 4 src/gallium/drivers/r600/r600_state_common.c | 15 +++ 2 files changed, 19 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4cc48dfa11..fb1de9cbf4 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2334,6 +2334,8 @@ static void evergreen_emit_tcs_sampler_views(struct r600_context *rctx, struct r static void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { + if (!rctx->tes_shader) + return; evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views, EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0); } @@ -2404,6 +2406,8 @@ static void evergreen_emit_tcs_sampler_states(struct r600_context *rctx, struct static void evergreen_emit_tes_sampler_states(struct r600_context *rctx, struct r600_atom *atom) { + if (!rctx->tes_shader) + return; evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 7f4d9f3e33..b49b05608d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1724,6 +1724,21 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } + /* +* XXX: I believe there's some fatal flaw in the dirty state logic when +* enabling/disabling tes. +* VS/ES share all buffer/resource/sampler slots. If TES is enabled, +* it will therefore overwrite the VS slots. If it now gets disabled, +* the VS needs to rebind all buffer/resource/sampler slots - not only +* has TES overwritten the corresponding slots, but when the VS was +* operating as LS the things with correpsonding dirty bits got bound +* to LS slots and won't reflect what is dirty as VS stage even if the +* TES didn't overwrite it. The story for re-enabled TES is similar. +* In any case, we're not allowed to submit any TES state when +* TES is disabled (the state tracker may not do this but this looks +* like an optimization to me, not something which can be relied on). +*/ + /* Update clip misc state. */ if (clip_so_current) { r600_update_clip_state(rctx, clip_so_current); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: set up constants needed for txq for buffers and cube maps with tes
Module: Mesa Branch: master Commit: 43292c78b7f6b496cd568005c8fa14b5b1d6375f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43292c78b7f6b496cd568005c8fa14b5b1d6375f Author: Roland Scheidegger Date: Mon Jan 1 03:04:38 2018 +0100 r600: set up constants needed for txq for buffers and cube maps with tes We only did this for the other stages, but obviously tess eval/ctrl need it too. This fixes the (newly modified) piglit texturing/textureSize test when run with tes stage and bufferSampler. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_state_common.c | 16 1 file changed, 16 insertions(+) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 1d9ff7bd6e..4429246d31 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1812,6 +1812,22 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } + if (rctx->tes_shader) { + assert(rctx->b.chip_class >= EVERGREEN); + need_buf_const = rctx->tes_shader->current->shader.uses_tex_buffers || + rctx->tes_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + eg_setup_buffer_constants(rctx, PIPE_SHADER_TESS_EVAL); + } + if (rctx->tcs_shader) { + need_buf_const = rctx->tcs_shader->current->shader.uses_tex_buffers || + rctx->tcs_shader->current->shader.has_txq_cube_array_z_comp; + if (need_buf_const) { + eg_setup_buffer_constants(rctx, PIPE_SHADER_TESS_CTRL); + } + } + } + r600_update_driver_const_buffers(rctx, false); if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: fix enabled_rb_mask on eg/cm
Module: Mesa Branch: master Commit: 7c0bc495f1e467562c4b47da1c2821fd323a45b1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c0bc495f1e467562c4b47da1c2821fd323a45b1 Author: Roland Scheidegger Date: Tue Jan 9 01:38:27 2018 +0100 r600: fix enabled_rb_mask on eg/cm For eg/cm, the r600_gb_backend_map will always be 0. This is a bug in the drm kernel driver, as it just just never fills the information in (it is now being fixed - the history shows it was being filled in when the query was brand new but got lost shortly thereafter with backend_map fixes). This causes r600_query_hw_prepare_buffer to write the "status bit" (just the highest bit of the occlusion query result) even for active rbes (all but the first). This doesn't make much sense, albeit I suppose it's mostly safe. According to the commit history, it's necessary to set these bits for inactive rbes since otherwise predication will lock up - presumably the hw just is waiting for the status bit to appear, which will never happen with inactive rbes. I'd guess potentially predication could be wrong (due to not waiting for the actual result if the status bit is already there) if this is set for active rbes. Discovered while trying to fix predication lockups on Juniper (needs another patch). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_query.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 0d22bc5216..8f87c51cca 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -1822,8 +1822,15 @@ void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) assert(rscreen->chip_class <= CAYMAN); - /* if backend_map query is supported by the kernel */ - if (rscreen->info.r600_gb_backend_map_valid) { + /* +* if backend_map query is supported by the kernel. +* Note the kernel drm driver for a long time never filled in the +* associated data on eg/cm, only on r600/r700, hence ignore the valid +* bit there if the map is zero. +* (Albeit some chips with just one active rb can have a valid 0 map.) +*/ + if (rscreen->info.r600_gb_backend_map_valid && + (ctx->chip_class < EVERGREEN || rscreen->info.r600_gb_backend_map != 0)) { unsigned num_tile_pipes = rscreen->info.num_tile_pipes; unsigned backend_map = rscreen->info.r600_gb_backend_map; unsigned item_width, item_mask; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: use GET_BUFFER_RESINFO vtx fetch on eg instead of setting up consts
Module: Mesa Branch: master Commit: c5162fd3c4b55f9a9e7d0ec253bb2be6f55ee777 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5162fd3c4b55f9a9e7d0ec253bb2be6f55ee777 Author: Roland Scheidegger Date: Tue Jan 2 23:39:34 2018 +0100 r600: use GET_BUFFER_RESINFO vtx fetch on eg instead of setting up consts Contrary to what the comment said, this appears to work just fine on my rv770 (tested with piglit textureSize 140 fs/vs samplerBuffer). Dave Airlie confirmed it working on cayman too. I have no clue though if it's actually preferrable to use it (unfortunately we cannot get rid of the tex constants completely, as we still require them for cube map txq). Albeit filling in the format (1 channels or 4?) and the stuff related to mega- or mini-fetch (what the hell is this...) is just a guess based on other usage of vtx fetch instructions... v2: it really needs to be done through texture cache (I botched the testing because sb optimizations turned it automatically into tc, but can't rely on it and isn't happening on tes). Tested-by: Konstantin Kharlamov Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 7 ++-- src/gallium/drivers/r600/r600_asm.c | 3 +- src/gallium/drivers/r600/r600_shader.c | 59 ++-- src/gallium/drivers/r600/r600_state_common.c | 39 +++--- 4 files changed, 50 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f5b8e7115d..f645791a2c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -653,11 +653,12 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx, S_030008_ENDIAN_SWAP(endian); tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached); /* -* in theory dword 4 is for number of elements, for use with resinfo, -* but it seems to utterly fail to work, the amd gpu shader analyser +* dword 4 is for number of elements, for use with resinfo, +* albeit the amd gpu shader analyser * uses a const buffer to store the element sizes for buffer txq */ - tex_resource_words[4] = 0; + tex_resource_words[4] = params->size / stride; + tex_resource_words[5] = tex_resource_words[6] = 0; tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); } diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index d6bd561f01..92c2bdf27c 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1510,7 +1510,8 @@ int cm_bytecode_add_cf_end(struct r600_bytecode *bc) /* common to all 3 families */ static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) { - bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(vtx->op) | + S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 716a829273..f2bc34660f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -6949,31 +6949,48 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bytecode_alu alu; int r; int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset; + int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_MOV; - alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; - if (ctx->bc->chip_class >= EVERGREEN) { - /* with eg each dword is either buf size or number of cubes */ - alu.src[0].sel += id / 4; - alu.src[0].chan = id % 4; - } else { + if (ctx->bc->chip_class < EVERGREEN) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; /* r600 we have them at channel 2 of the second dword */ alu.src[0].sel += (id * 2) + 1; alu.src[0].chan = 1; + alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; +
Mesa (master): r600: increase number of ubos by one to 14
Module: Mesa Branch: master Commit: 0be1dc25cf72da49fc767f2cd6560f738c0449e0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0be1dc25cf72da49fc767f2cd6560f738c0449e0 Author: Roland Scheidegger Date: Mon Jan 1 04:20:41 2018 +0100 r600: increase number of ubos by one to 14 Ideally we'd support 16 (d3d11 requires 15, and mesa subtracts one for non-ubo constants), but that's kind of impossible (it would be only doable if either we'd somehow merge the mesa non-ubo constants with the driver constants, or only use the driver constants with vtx fetch instead of through the kcache mechanism - the latter probably wouldn't be too bad). For now just do as the comment already said, place the gs ring (not really a const buffer in any case) which is only ever referred to through vc fetch clauses at index 16. Throw in a couple asserts for good measure to make sure the hw limit isn't exceeded. Tested-by: Konstantin Kharlamov Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 1 + src/gallium/drivers/r600/r600_asm.c| 1 + src/gallium/drivers/r600/r600_pipe.h | 10 ++ src/gallium/drivers/r600/r600_state.c | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 81b7c4a285..f5b8e7115d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2168,6 +2168,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, va = rbuffer->gpu_address + cb->buffer_offset; if (!gs_ring_buffer) { + assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 69b2d142c1..d6bd561f01 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1008,6 +1008,7 @@ static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, continue; bank = alu->src[i].kc_bank; + assert(bank < R600_MAX_HW_CONST_BUFFERS); line = (sel-512)>>4; index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e042edf2b4..cb84bc1998 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -69,11 +69,12 @@ #define R600_MAX_DRAW_CS_DWORDS58 #define R600_MAX_PFP_SYNC_ME_DWORDS16 -#define R600_MAX_USER_CONST_BUFFERS 13 +#define EG_MAX_ATOMIC_BUFFERS 8 + +#define R600_MAX_USER_CONST_BUFFERS 14 #define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) - -#define EG_MAX_ATOMIC_BUFFERS 8 +#define R600_MAX_HW_CONST_BUFFERS 16 /* start driver buffers after user buffers */ #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) @@ -84,7 +85,8 @@ #define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) /* * Note GS doesn't use a constant buffer binding, just a resource index, - * so it's fine to have it exist at index 16. + * so it's fine to have it exist at index 16. I.e. it's not actually + * a const buffer, just a buffer resource. */ #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 253ff57a98..89cf7d2e50 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1712,6 +1712,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, offset = cb->buffer_offset; if (!gs_ring_buffer) { + assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256)); radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: increase number of UBOs to 15
Module: Mesa Branch: master Commit: 523b6c87048ddc5b49be4ca985bf91d8585aef47 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=523b6c87048ddc5b49be4ca985bf91d8585aef47 Author: Roland Scheidegger Date: Wed Jan 3 02:09:01 2018 +0100 r600: increase number of UBOs to 15 With the exception of the default tess levels only ever accessed by the default tcs shader, the LDS_INFO const buffer was only accessed by vtx instructions, and not through kcache. No idea why really, but use this to our advantage by not using a constant buffer slot for it. This just requires us to throw the default tess levels into the "normal" driver const buffer instead. Alternatively, could acesss those constants via vtx instructions too, but then we couldn't use a ordinary ureg prog accessing them as constants and would have to generate that directly when compiling the default tcs shader. (Another alternative would be to put all lds info into the ordinary driver const buffer, albeit we'd maybe need to increase the fixed size as it can't fit alongside the ucp since vs needs access to the lds info too.) Tested-by: Konstantin Kharlamov Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 15 -- src/gallium/drivers/r600/r600_pipe.h | 13 src/gallium/drivers/r600/r600_state_common.c | 31 +--- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f645791a2c..4cc48dfa11 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2168,8 +2168,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, va = rbuffer->gpu_address + cb->buffer_offset; - if (!gs_ring_buffer) { - assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); + if (buffer_index < R600_MAX_HW_CONST_BUFFERS) { radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, @@ -3880,7 +3879,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx, memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); - rctx->tess_state_dirty = true; + rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true; } static void evergreen_setup_immed_buffer(struct r600_context *rctx, @@ -4344,7 +4343,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe unsigned input_vertex_size, output_vertex_size; unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; unsigned output_patch0_offset, perpatch_output_offset, lds_size; - uint32_t values[16]; + uint32_t values[8]; unsigned num_waves; unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; unsigned wave_divisor = (16 * num_pipes); @@ -4364,7 +4363,6 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe if (rctx->lds_alloc != 0 && rctx->last_ls == ls && - !rctx->tess_state_dirty && rctx->last_num_tcs_input_cp == num_tcs_input_cp && rctx->last_tcs == tcs) return; @@ -4411,17 +4409,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe rctx->lds_alloc = (lds_size | (num_waves << 14)); - memcpy(&values[8], rctx->tess_state, 6 * sizeof(float)); - values[14] = 0; - values[15] = 0; - - rctx->tess_state_dirty = false; rctx->last_ls = ls; rctx->last_tcs = tcs; rctx->last_num_tcs_input_cp = num_tcs_input_cp; constbuf.user_buffer = values; - constbuf.buffer_size = 16 * 4; + constbuf.buffer_size = 8 * 4; rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, R600_LDS_INFO_CONST_BUFFER, &constbuf); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index cb84bc1998..112b5cbb83 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -71,7 +71,7 @@ #define EG_MAX_ATOMIC_BUFFERS 8 -#define R600_MAX_USER_CONST_BUFFERS 14 +#define R600_MAX_USER_CONST_BUFFERS 15 #define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) #define R600_MAX_HW_CONST_BUFFERS 16 @@ -80,12 +80,17
Mesa (master): r600: don't emit reloc for ring buffer out into the blue
Module: Mesa Branch: master Commit: 22ba4ebb1877a86c560533f5e162aa84389597e9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=22ba4ebb1877a86c560533f5e162aa84389597e9 Author: Roland Scheidegger Date: Sun Dec 31 19:21:04 2017 +0100 r600: don't emit reloc for ring buffer out into the blue It looks like this reloc belongs to setting the constant reg, which is skipped for gs ring. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 7 +++ src/gallium/drivers/r600/r600_state.c | 7 +++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0da665f634..81b7c4a285 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2172,12 +2172,11 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, pkt_flags); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); } - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); - radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); radeon_emit(cs, (buffer_id_base + buffer_index) * 8); radeon_emit(cs, va); /* RESOURCEi_WORD0 */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index cbf860f45f..253ff57a98 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1715,12 +1715,11 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256)); radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); } - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); - radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); radeon_emit(cs, (buffer_id_base + buffer_index) * 7); radeon_emit(cs, offset); /* RESOURCEi_WORD0 */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: don't use vtx offset for load_sample_position
Module: Mesa Branch: master Commit: 6c8d6ce982d3ce9dfde02a59db23b138df26ae55 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c8d6ce982d3ce9dfde02a59db23b138df26ae55 Author: Roland Scheidegger Date: Mon Jan 1 19:40:56 2018 +0100 r600: don't use vtx offset for load_sample_position The offset looks bogus to me. Albeit in the end it doesn't matter, by the looks of it offsets smaller than 4 get ignored there (not sure of the rules, I suppose either non-dword aligned offsets never work there or the offset must be at least aligned to the size of a single element). Tested-by: Konstantin Kharlamov Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f6ff2055ee..bb7cc177ae 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1284,7 +1284,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_ vtx.num_format_all = 2; vtx.format_comp_all = 1; vtx.use_const_fields = 0; - vtx.offset = 1; // first element is size of buffer + vtx.offset = 0; vtx.endian = r600_endian_swap(32); vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: fix textureSize queries with tbos
Module: Mesa Branch: master Commit: 878bc4a5ae6215a5d84c3e3a5c9575ccd1ae27e2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=878bc4a5ae6215a5d84c3e3a5c9575ccd1ae27e2 Author: Roland Scheidegger Date: Sat Dec 23 04:50:13 2017 +0100 r600: fix textureSize queries with tbos piglit doesn't care, but I'm quite confident that the size actually bound as range should be reported and not the base size of the resource (and some quick piglit test hacking confirms this). Also, the array in the constant buffer looks overallocated by a factor of 4. For eg, also decrease the size by another factor of 2 by using the same constant slot for both buffer size (required for txq for TBOs) and the number of layers for cube arrays, as these are mutually exclusive. Could of course use some more logic and only actually do this for the samplers/images/buffers where it's required rather than for all, but ah well... Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 18 ++--- src/gallium/drivers/r600/r600_state_common.c | 39 +--- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c9247e1c28..e28882b2e5 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -6955,9 +6955,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offs alu.op = ALU_OP1_MOV; alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; if (ctx->bc->chip_class >= EVERGREEN) { - /* channel 0 or 2 of each word */ - alu.src[0].sel += (id / 2); - alu.src[0].chan = (id % 2) * 2; + /* with eg each dword is either buf size or number of cubes */ + alu.src[0].sel += id / 4; + alu.src[0].chan = id % 4; } else { /* r600 we have them at channel 2 of the second dword */ alu.src[0].sel += (id * 2) + 1; @@ -7615,9 +7615,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; if (ctx->bc->chip_class >= EVERGREEN) { - /* channel 1 or 3 of each word */ - alu.src[0].sel += (id / 2); - alu.src[0].chan = ((id % 2) * 2) + 1; + /* with eg each dword is either buf size or number of cubes */ + alu.src[0].sel += id / 4; + alu.src[0].chan = id % 4; } else { /* r600 we have them at channel 2 of the second dword */ alu.src[0].sel += (id * 2) + 1; @@ -8782,9 +8782,9 @@ static int tgsi_resq(struct r600_shader_ctx *ctx) alu.op = ALU_OP1_MOV; alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; - /* channel 1 or 3 of each word */ - alu.src[0].sel += (id / 2); - alu.src[0].chan = ((id % 2) * 2) + 1; + /* with eg each dword is either buf size or number of cubes */ + alu.src[0].sel += id / 4; + alu.src[0].chan = id % 4; alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index e5a5a33367..e7fa1bbf57 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -902,7 +902,6 @@ struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_co unsigned pipe_shader_type) { struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector); - int i; sel->type = pipe_shader_type; sel->tokens = tgsi_dup_tokens(tokens); @@ -1326,7 +1325,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty samplers->views.dirty_buffer_constants = FALSE; bits = util_last_bit(samplers->views.enabled_mask); - array_size = bits * 8 * sizeof(uint32_t) * 4; + array_size = bits * 8 * sizeof(uint32_t); constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset); @@ -1349,15 +1348,16 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty } else constants[offset + 4] = 0; - constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + constants[offset + 5] = samplers->views.views[i]->base.u.buf.size
Mesa (master): r600: kill off native_integer shader ctx flag
Module: Mesa Branch: master Commit: eafaf136862db1c5c6a63e2127d553a38dcc63f2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eafaf136862db1c5c6a63e2127d553a38dcc63f2 Author: Roland Scheidegger Date: Fri Dec 22 23:31:43 2017 +0100 r600: kill off native_integer shader ctx flag Maybe upon a time it wasn't always true. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 18 -- 1 file changed, 18 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 2650a33846..c9247e1c28 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -350,7 +350,6 @@ struct r600_shader_ctx { int cs_grid_size_reg; bool cs_block_size_loaded, cs_grid_size_loaded; int fragcoord_input; - int native_integers; int next_ring_offset; int gs_out_ring_offset; int gs_next_vertex; @@ -998,22 +997,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { break; /* Already handled from allocate_system_value_inputs */ } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { - if (!ctx->native_integers) { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - - alu.op = ALU_OP1_INT_TO_FLT; - alu.src[0].sel = 0; - alu.src[0].chan = 3; - - alu.dst.sel = 0; - alu.dst.chan = 3; - alu.dst.write = 1; - alu.last = 1; - - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } break; } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) break; @@ -3128,7 +3111,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.bc = &shader->bc; ctx.shader = shader; - ctx.native_integers = true; r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: implement accurate corner behavior for textureGather with cube maps
Module: Mesa Branch: master Commit: 1ae48963f7648bb4e98faacfa3dd63906b26a518 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ae48963f7648bb4e98faacfa3dd63906b26a518 Author: Roland Scheidegger Date: Wed Dec 13 03:33:07 2017 +0100 gallivm: implement accurate corner behavior for textureGather with cube maps The spec says the missing texel (when we wrap around both x and y axis) should be synthesized as the average of the 3 other texels. For bilinear filtering however we instead adjusted the filter weights (because, while the complexity looks similar, there would be 4 times as many color values to fix up than weights). Obviously this could not work for gather (hence accurate corner filtering was disabled with gather). Implement this by just doing it as the spec implies - calculate the 4th texel as the average of the other 3. With gather of course there's only one color to worry about, so it's not all that many instructions neither (albeit surely the whole cube map filtering is hilariously complex). Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 304 ++ 1 file changed, 201 insertions(+), 103 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 6b1509c7cf..ff8cbf604c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1030,20 +1030,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef neighbors[2][2][4]; int chan, texel_index; boolean seamless_cube_filter, accurate_cube_corners; + unsigned chan_swiz = bld->static_texture_state->swizzle_r; seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && bld->static_sampler_state->seamless_cube_map; - /* -* XXX I don't know how this is really supposed to work with gather. From GL -* spec wording (not gather specific) it sounds like the 4th missing texel -* should be an average of the other 3, hence for gather could return this. -* This is however NOT how the code here works, which just fixes up the -* weights used for filtering instead. And of course for gather there is -* no filter to tweak... -*/ - accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter && - !is_gather; + + accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter; lp_build_extract_image_sizes(bld, &bld->int_size_bld, @@ -1382,94 +1375,191 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * as well) here. */ if (accurate_cube_corners) { - LLVMValueRef w00, w01, w10, w11, wx0, wy0; - LLVMValueRef c_weight, c00, c01, c10, c11; - LLVMValueRef have_corner, one_third, tmp; + LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f; + LLVMValueRef have_corner, one_third; - colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); - colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); + colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0"); + colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1"); + colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2"); + colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3"); have_corner = LLVMBuildLoad(builder, have_corners, ""); lp_build_if(&corner_if, bld->gallivm, have_corner); - /* - * we can't use standard 2d lerp as we need per-element weight - * in case of corners, so just calculate bilinear result as - * w00*s00 + w01*s01 + w10*s10 + w11*s11. - * (This is actually less work than using 2d lerp, 7 vs. 9 instructions, - * however calculating the weights needs another 6, so actually probably - * not slower than 2d lerp only for 4 channels as weights only need - * to be calculated once - of course fixing the weights has additional cost.) - */ - wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); - wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); - w00 = lp_build_mul(coord_bld, wx0, wy0); - w01 = lp_build_mul(coord_bld, s_
Mesa (master): gallivm: fix an issue with NaNs with seamless cube filtering
Module: Mesa Branch: master Commit: a485ad0bcdcab865e14a54133a271198c86e41ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a485ad0bcdcab865e14a54133a271198c86e41ab Author: Roland Scheidegger Date: Wed Dec 13 03:33:21 2017 +0100 gallivm: fix an issue with NaNs with seamless cube filtering Cube texture wrapping is a bit special since the values (post face projection) always are within [0,1], so we took advantage of that and omitted some clamps. However, we can still get NaNs (either because the coords already had NaNs, or the face projection generated them), and in fact we didn't handle them quite safely. I've seen -INT_MAX + 1 been propagated through as the final int coord value, albeit I didn't observe a crash. (Not quite a coincidence, since any stride mul with -INT_MAX or -INT_MAX+1 will turn up as a small positive number - nevertheless, I'd rather not try my luck, I'm not entirely sure it can't really turn up negative neither due to seamless coord swapping, plus ifloor of a NaN is not guaranteed to return -INT_MAX by any standard. And we kill off NaNs similarly with ordinary texture wrapping too.) So kill off the NaNs by using the common max against zero method. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index def731e9d9..6b1509c7cf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -1130,6 +1130,17 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, */ /* should always have normalized coords, and offsets are undefined */ assert(bld->static_sampler_state->normalized_coords); + /* + * The coords should all be between [0,1] however we can have NaNs, + * which will wreak havoc. In particular the y1_clamped value below + * can be -INT_MAX (on x86) and be propagated right through (probably + * other values might be bogus in the end too). + * So kill off the NaNs here. + */ + coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero, + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero, + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); coord = lp_build_mul(coord_bld, coords[0], flt_width_vec); /* instead of clamp, build mask if overflowed */ coord = lp_build_sub(coord_bld, coord, half); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: fix texture wrapping for texture gather for mirror modes
Module: Mesa Branch: master Commit: 84c363fb09167bc45aeba95423b20bee7293f44a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=84c363fb09167bc45aeba95423b20bee7293f44a Author: Roland Scheidegger Date: Tue Dec 12 04:22:28 2017 +0100 gallivm: fix texture wrapping for texture gather for mirror modes Care must be taken that all coords end up correct, the tests are very sensitive that everything is correctly rounded. This doesn't matter for bilinear filter (since picking a wrong texel with weight zero is ok), and we could also switch the per-sample coords mistakenly. While here, also optimize the coord_mirror helper a bit (we can do the mirroring directly by exploiting float rounding, no need for fixing up odd/even manually). I did not touch the mirror_clamp and mirror_clamp_to_border modes. In contrast to mirror_clamp_to_edge and mirror_repeat these are legacy modes. They are specified against old gl rules, which actually does the mirroring not per sample (so you get swapped order if the coord is in the mirrored section). I think the idea though is that they should follow the respecified mirror_clamp_to_edge rules so the order would be correct. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 245 +++--- 1 file changed, 171 insertions(+), 74 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index b67a089c47..def731e9d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -218,34 +218,42 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, /** - * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. + * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode. + * (Note that with pot sizes could do this much more easily post-scale + * with some bit arithmetic.) */ static LLVMValueRef lp_build_coord_mirror(struct lp_build_sample_context *bld, - LLVMValueRef coord) + LLVMValueRef coord, boolean posOnly) { struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef fract, flr, isOdd; - - lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); - /* kill off NaNs */ - /* XXX: not safe without arch rounding, fract can be anything. */ - fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero, -GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - - /* isOdd = flr & 1 */ - isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, ""); + LLVMValueRef fract; + LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); - /* make coord positive or negative depending on isOdd */ - /* XXX slight overkill masking out sign bit is unnecessary */ - coord = lp_build_set_sign(coord_bld, fract, isOdd); + /* +* We can just use 2*(x - round(0.5*x)) to do all the mirroring, +* it all works out. (The result is in range [-1, 1.0], negative if +* the coord is in the "odd" section, otherwise positive.) +*/ - /* convert isOdd to float */ - isOdd = lp_build_int_to_float(coord_bld, isOdd); + coord = lp_build_mul(coord_bld, coord, half); + fract = lp_build_round(coord_bld, coord); + fract = lp_build_sub(coord_bld, coord, fract); + coord = lp_build_add(coord_bld, fract, fract); - /* add isOdd to coord */ - coord = lp_build_add(coord_bld, coord, isOdd); + if (posOnly) { + /* + * Theoretically it's not quite 100% accurate because the spec says + * that ultimately a scaled coord of -x.0 should map to int coord + * -x + 1 with mirroring, not -x (this does not matter for bilinear + * filtering). + */ + coord = lp_build_abs(coord_bld, coord); + /* kill off NaNs */ + /* XXX: not safe without arch rounding, fract can be anything. */ + coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero, + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); + } return coord; } @@ -362,7 +370,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_add(coord_bld, coord, offset); } - /* clamp to [0, length] */ + /* + * clamp to [0, length] + * + * Unlike some other wrap modes, this should be correct for gather + * too. GL_CLAMP explicitly does this clamp on the coord prior to + * actual wrapping (which is per sample). + */ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); coord = lp_build_sub(coord_bld, coord, half); @@ -426,8 +440,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, offset = lp_build_int_to_float(coord_bld, offset);
Mesa (master): r600: set DX10_CLAMP for compute shader too
Module: Mesa Branch: master Commit: 71e630753ebbee82e8f8709da5488296b2c070c8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=71e630753ebbee82e8f8709da5488296b2c070c8 Author: Roland Scheidegger Date: Wed Nov 22 03:11:33 2017 +0100 r600: set DX10_CLAMP for compute shader too I really intended to set this for all shader stages by 3835009796166968750ff46cf209f6d4208cda86 but missed it for compute shaders (because it's in a different source file...). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_compute.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 6e87539cfe..48c4a9ca45 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -746,8 +746,9 @@ void evergreen_emit_cs_shader(struct r600_context *rctx, radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */ radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ - S_0288D4_NUM_GPRS(ngpr) - | S_0288D4_STACK_SIZE(nstack)); + S_0288D4_NUM_GPRS(ngpr) | + S_0288D4_DX10_CLAMP(1) | + S_0288D4_STACK_SIZE(nstack)); radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: fix snorm blending
Module: Mesa Branch: master Commit: b5957cee920cd7a62e4e726538dbbe44c12e33ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5957cee920cd7a62e4e726538dbbe44c12e33ab Author: Roland Scheidegger Date: Sat Nov 18 06:23:35 2017 +0100 llvmpipe: fix snorm blending The blend math gets a bit funky due to inverse blend factors being in range [0,2] rather than [-1,1], our normalized math can't really cover this. src_alpha_saturate blend factor has a similar problem too. (Note that piglit fbo-blending-formats test is mostly useless for anything but unorm formats, since not just all src/dst values are between [0,1], but the tests are crafted in a way that the results are between [0,1] too.) v2: some formatting fixes, and fix a fairly obscure (to debug) issue with alpha-only formats (not related to snorm at all), where blend optimization would think it could simplify the blend equation if the blend factors were complementary, however was using the completely unrelated rgb blend factors instead of the alpha ones... Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 50 - src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++ src/gallium/drivers/llvmpipe/lp_bld_blend.c | 134 ++-- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 53 ++ 4 files changed, 191 insertions(+), 53 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a1edd349f1..321c6e4edf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(a == bld->zero) + if (a == bld->zero) return b; - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(a == bld->one || b == bld->one) + if (!type.sign && (a == bld->one || b == bld->one)) return bld->one; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { -if(util_cpu_caps.has_sse2) { - if(type.width == 8) +if (util_cpu_caps.has_sse2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; } } if (type.width * type.length == 256) { -if(util_cpu_caps.has_avx2) { - if(type.width == 8) +if (util_cpu_caps.has_avx2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w"; } } @@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(a == b) + if (a == b) return bld->zero; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(b == bld->one) + if (!type.sign && b == bld->one) return bld->zero; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } el
Mesa (master): r600: use min_dx10/max_dx10 instead of min/max
Module: Mesa Branch: master Commit: aab0bfc648bf1be50b81a25224970015f1dc78b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aab0bfc648bf1be50b81a25224970015f1dc78b8 Author: Roland Scheidegger Date: Thu Nov 9 19:37:54 2017 +0100 r600: use min_dx10/max_dx10 instead of min/max I believe this is the safe thing to do, especially ever since the driver actually generates NaNs for muls too. The ISA docs are not very helpful here, however the dx10 versions will pick a non-nan result over a NaN one (this is also the ieee754 behavior), whereas the non-dx10 ones will pick the NaN (verified by newly changed piglit isinf-and-isnan test). Other "modern" drivers will most likely do the same. This was shown to make some difference for bug 103544, albeit it is not required to fix it. Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 13 +++-- src/gallium/drivers/r600/sb/sb_expr.cpp | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0fa2a1f0d1..805b3b6b3d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -9175,8 +9175,9 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */ + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9373,8 +9374,8 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, @@ -9596,8 +9597,8 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, - [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, - [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, + [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2}, + [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 3dd3a4815b..7a5d62c8e8 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) { n.bc.src[0].abs == n.bc.src[1].abs) { switch (n.bc.op) { case ALU_OP2_MIN: // (MIN x, x) => (MOV x) + case ALU_OP2_MIN_DX10: case ALU_OP2_MAX: + case ALU_OP2_MAX_DX10: convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); return fold_alu_op1(n); case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: use ieee version of rsq
Module: Mesa Branch: master Commit: 570d5b79929554a45d8aebd294bbd67969396ba5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=570d5b79929554a45d8aebd294bbd67969396ba5 Author: Roland Scheidegger Date: Thu Nov 9 19:50:41 2017 +0100 r600: use ieee version of rsq Both r600 and evergreen used the clamped version, whereas cayman used the ieee one. I don't think there's a valid reason for this discrepancy, so let's switch to the ieee version for r600 and evergreen too, since we generally want to stick to ieee arithmetic. With this, behavior for both rcp and rsq should now be the same for all of r600, eg, cm, all using ieee versions (albeit note rsq retains the abs behavior for everybody, which may not be a good idea ultimately). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 547eebac12..b1a164e594 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4865,11 +4865,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - /* XXX: -* For state trackers other than OpenGL, we'll want to use -* _RECIPSQRT_IEEE instead. -*/ - alu.op = ALU_OP1_RECIPSQRT_CLAMPED; + alu.op = ALU_OP1_RECIPSQRT_IEEE; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: use DX10_CLAMP bit in shader setup
Module: Mesa Branch: master Commit: 3835009796166968750ff46cf209f6d4208cda86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3835009796166968750ff46cf209f6d4208cda86 Author: Roland Scheidegger Date: Thu Nov 9 19:41:29 2017 +0100 r600: use DX10_CLAMP bit in shader setup The docs are not very concise in what this really does, however both Alex Deucher and Nicolai Hähnle suggested this only really affects instructions using the CLAMP output modifier, and I've confirmed that with the newly changed piglit isinf_and_isnan test. So, with this bit set, if an instruction has the CLAMP modifier bit (which clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result will be NaN. D3D10 would require this, glsl doesn't have modifiers (with mesa clamp(x,0,1) would get converted to such a modifier) coupled with a whatever-floats-your-boat specified NaN behavior, but the clamp behavior should probably always be used (this also matches what a decomposition into min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of picking the non-nan result). Some apps may in fact rely on this, as this prevents misrenderings in This War of Mine since using ieee muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped rcp opcode, which would also fix this bug there. radeonsi also seems to set this bit nowadays if I see that righ (albeit the llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0" instead of "Do not clamp NAN to 0" since it was changed, which also looks a bit misleading). v2: set it in all shader stages. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544 Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 6 ++ src/gallium/drivers/r600/r600_state.c | 9 + 2 files changed, 15 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index b02d7eeca6..7c2dfa092d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3244,6 +3244,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */ S_028844_NUM_GPRS(rshader->bc.ngpr) | S_028844_PRIME_CACHE_ON_DRAW(1) | +S_028844_DX10_CLAMP(1) | S_028844_STACK_SIZE(rshader->bc.nstack)); /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ @@ -3264,6 +3265,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, S_028890_NUM_GPRS(rshader->bc.ngpr) | + S_028890_DX10_CLAMP(1) | S_028890_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, shader->bo->gpu_address >> 8); @@ -3326,6 +3328,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, S_028878_NUM_GPRS(rshader->bc.ngpr) | + S_028878_DX10_CLAMP(1) | S_028878_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, shader->bo->gpu_address >> 8); @@ -3366,6 +3369,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader S_0286C4_VS_EXPORT_COUNT(nparams - 1)); r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS, S_028860_NUM_GPRS(rshader->bc.ngpr) | + S_028860_DX10_CLAMP(1) | S_028860_STACK_SIZE(rshader->bc.nstack)); if (rshader->vs_position_window_space) { r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL, @@ -3400,6 +3404,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader r600_init_command_buffer(cb, 32); r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS, S_0288BC_NUM_GPRS(rshader->bc.ngpr) | + S_0288BC_DX10_CLAMP(1) | S_0288BC_STACK_SIZE(rshader->bc.nstack)); r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS, shader->bo->gpu_address >> 8); @@ -3413,6 +3418,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader r600_init_command_buffer(cb, 32); r600_store_context_r
Mesa (master): r600: set the number type correctly for float rts in cb setup
Module: Mesa Branch: master Commit: 65123ee62cd66cdffe2c4193a3e28e811d73ff65 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65123ee62cd66cdffe2c4193a3e28e811d73ff65 Author: Roland Scheidegger Date: Thu Nov 9 19:53:49 2017 +0100 r600: set the number type correctly for float rts in cb setup Float rts were always set as unorm instead of float. Not sure of the consequences, but at least it looks like the blend clamp would have been enabled, which is against the rules (only eg really bothered to even attempt to specify this correctly, r600 always used clamp anyway). Albeit r600 (not r700) setup still looks bugged to me due to never setting BLEND_FLOAT32 which must be set according to docs... Not sure if the hw really cares, no piglit change (on eg/juniper). Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 7 ++- src/gallium/drivers/r600/r600_state.c | 10 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 7c2dfa092d..2cd162629d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1050,7 +1050,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, } } ntype = V_028C70_NUMBER_UNORM; - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { if (desc->channel[i].normalized) @@ -1062,7 +1062,10 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx, ntype = V_028C70_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_028C70_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_028C70_NUMBER_FLOAT; } + pitch = (pitch / 8) - 1; color->pitch = S_028C64_PITCH_TILE_MAX(pitch); @@ -1188,6 +1191,8 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx, ntype = V_028C70_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_028C70_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_028C70_NUMBER_FLOAT; } if (R600_BIG_ENDIAN) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0e266aef42..3102905537 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -817,7 +817,7 @@ static void r600_init_color_surface(struct r600_context *rctx, unsigned offset; const struct util_format_description *desc; int i; - bool blend_bypass = 0, blend_clamp = 1, do_endian_swap = FALSE; + bool blend_bypass = 0, blend_clamp = 0, do_endian_swap = FALSE; if (rtex->db_compatible && !r600_can_sample_zs(rtex, false)) { r600_init_flushed_depth_texture(&rctx->b.b, surf->base.texture, NULL); @@ -869,6 +869,8 @@ static void r600_init_color_surface(struct r600_context *rctx, ntype = V_0280A0_NUMBER_UNORM; else if (desc->channel[i].pure_integer) ntype = V_0280A0_NUMBER_UINT; + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { + ntype = V_0280A0_NUMBER_FLOAT; } if (R600_BIG_ENDIAN) @@ -883,6 +885,11 @@ static void r600_init_color_surface(struct r600_context *rctx, endian = r600_colorformat_endian_swap(format, do_endian_swap); + /* blend clamp should be set for all NORM/SRGB types */ + if (ntype == V_0280A0_NUMBER_UNORM || ntype == V_0280A0_NUMBER_SNORM || + ntype == V_0280A0_NUMBER_SRGB) + blend_clamp = 1; + /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */ if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT || @@ -917,6 +924,7 @@ static void r600_init_color_surface(struct r600_context *rctx, ntype != V_0280A0_NUMBER_UINT && ntype != V_0280A0_NUMBER_SINT) && G_0280A0_BLEND_CLAMP(color_info) && + /* XXX this condition is always true since BLEND_FLOAT32 is never set (bug?). */ !G_0280A0_BLEND_FLOAT32(color_info)) { color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM); surf->export_16bpc = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: use ieee version of rcp
Module: Mesa Branch: master Commit: 1c8d57a008861f856a7fad8feaf14ec412a29d3e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c8d57a008861f856a7fad8feaf14ec412a29d3e Author: Roland Scheidegger Date: Thu Nov 9 19:44:23 2017 +0100 r600: use ieee version of rcp r600 used the clamped version for rcp, whereas both evergreen and cayman used the ieee version. I don't know why that discrepancy exists (it does so since day 1) but there does not seem to be a valid reason for this, so make it consistent. This seems now safer than before the previous commit (using the dx10 clamp bit). Note that rsq still uses clamped version (as before even though the table may have suggested otherwise for evergreen) for r600/eg, but not for cayman. Will be changed separately for better regression tracking... Reviewed-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 805b3b6b3d..547eebac12 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -9161,11 +9161,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, - /* XXX: -* For state trackers other than OpenGL, we'll want to use -* _RECIP_IEEE instead. -*/ - [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, + [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, @@ -9366,7 +9362,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, - [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, + [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): docs: Fix GL_MESA_program_debug enums
Module: Mesa Branch: master Commit: dd38a4ee0d0b6b7addb341fe327c245bf64903e5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd38a4ee0d0b6b7addb341fe327c245bf64903e5 Author: Roland Scheidegger Date: Tue Nov 7 01:43:51 2017 +0100 docs: Fix GL_MESA_program_debug enums 13b303ff9265b89bdd9100e32f905e9cdadfad81 added the actual enums but didn't remove the already existing ones. (And also duplicated the "fragment" names instead of using the "vertex" names.) Fixes: 13b303ff9265b89bdd91 "docs: Update the list of used MESA GL enums." Reviewed-by: Eric Engestrom Reviewed-by: Brian Paul --- docs/specs/enums.txt | 26 -- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/docs/specs/enums.txt b/docs/specs/enums.txt index 4b0485f349..eb4aa396c5 100644 --- a/docs/specs/enums.txt +++ b/docs/specs/enums.txt @@ -46,14 +46,14 @@ GL_MESA_shader_debug.spec: (obsolete) GL_DEBUG_ASSERT_MESA 0x875B GL_MESA_program_debug: (obsolete) - GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x - GL_VERTEX_PROGRAM_CALLBACK_MESA0x - GL_FRAGMENT_PROGRAM_POSITION_MESA 0x - GL_VERTEX_PROGRAM_POSITION_MESA0x - GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x - GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x - GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA 0x - GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA 0x +GL_FRAGMENT_PROGRAM_POSITION_MESA 0x8BB0 +GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x8BB1 +GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x8BB2 +GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x8BB3 +GL_VERTEX_PROGRAM_POSITION_MESA 0x8BB4 +GL_VERTEX_PROGRAM_CALLBACK_MESA 0x8BB5 +GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA0x8BB6 +GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA0x8BB7 GL_MESAX_texture_stack: GL_TEXTURE_1D_STACK_MESAX0x8759 @@ -63,16 +63,6 @@ GL_MESAX_texture_stack: GL_TEXTURE_1D_STACK_BINDING_MESAX0x875D GL_TEXTURE_2D_STACK_BINDING_MESAX0x875E -GL_MESA_program_debug - GL_FRAGMENT_PROGRAM_POSITION_MESA 0x8BB0 - GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x8BB1 - GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x8BB2 - GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x8BB3 - GL_FRAGMENT_PROGRAM_POSITION_MESA 0x8BB4 - GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x8BB5 - GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x8BB6 - GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x8BB7 - GL_MESA_tile_raster_order GL_TILE_RASTER_ORDER_FIXED_MESA 0x8BB8 GL_TILE_RASTER_ORDER_INCREASING_X_MESA 0x8BB9 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: don't cull tris with zero area
Module: Mesa Branch: master Commit: 3e4fd2d4b185dac55a481384f8ce3a8c93d78f87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e4fd2d4b185dac55a481384f8ce3a8c93d78f87 Author: Roland Scheidegger Date: Thu Oct 26 21:23:27 2017 +0200 draw: don't cull tris with zero area Culling tris with zero area seems like a great idea, but apparently with fill mode line (and point) we're supposed to draw them, at least some tests for some other state tracker complained otherwise. Such tris also always seem to be back facing (not sure if this can be inferred from anything, since in a mathematical sense it cannot really be determined), so make sure to account for this when filling in the face information. (For solid tris, this is of course unnecessary, drivers will throw the tris away later in any case.) Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_pipe_cull.c | 10 ++ src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 5 ++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 3e8e458959..318d743dbb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -181,6 +181,16 @@ static void cull_tri( struct draw_stage *stage, /* triangle is not culled, pass to next stage */ stage->next->tri( stage->next, header ); } + } else { + /* + * With zero area, this is back facing (because the spec says + * it's front facing if sign is positive?). + * Some apis apparently do not allow us to cull zero area tris + * here, in case of fill mode line (which is rather lame). + */ + if ((PIPE_FACE_BACK & cull_stage(stage)->cull_face) == 0) { +stage->next->tri( stage->next, header ); + } } } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index c465c7526f..f39db0e6a0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -63,10 +63,9 @@ inject_front_face_info(struct draw_stage *stage, struct prim_header *header) { struct unfilled_stage *unfilled = unfilled_stage(stage); - unsigned ccw = header->det < 0.0; boolean is_front_face = ( - (stage->draw->rasterizer->front_ccw && ccw) || - (!stage->draw->rasterizer->front_ccw && !ccw)); + (stage->draw->rasterizer->front_ccw && header->det < 0.0f) || + (!stage->draw->rasterizer->front_ccw && header->det > 0.0f)); int slot = unfilled->face_slot; unsigned i; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/util: remove some block alignment assertions
Module: Mesa Branch: master Commit: 20c77ae6390451a74e2463f02c49bd7fec3dd29c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=20c77ae6390451a74e2463f02c49bd7fec3dd29c Author: Roland Scheidegger Date: Wed Oct 25 02:39:20 2017 +0200 gallium/util: remove some block alignment assertions These assertions were revisited a couple of times in the past, and they still weren't quite right. The problem I was seeing (with some other state tracker) was a copy between two 512x512 s3tc textures, but from mip level 0 to mip level 8. Therefore, the destination has only size 2x2 (not a full block), so the box width/height was only 2, causing the assertion to trigger for src alignment. As far as I can tell, such a copy is completely legal, and because a correct assertion would get ridiculously complicated just get rid of it for good. Reviewed-by: Brian Paul --- src/gallium/auxiliary/util/u_surface.c | 8 1 file changed, 8 deletions(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 5abf96625e..0a79a25a43 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -324,16 +324,8 @@ util_resource_copy_region(struct pipe_context *pipe, /* check that region boxes are block aligned */ assert(src_box.x % src_bw == 0); assert(src_box.y % src_bh == 0); - assert(src_box.width % src_bw == 0 || - src_box.x + src_box.width == u_minify(src->width0, src_level)); - assert(src_box.height % src_bh == 0 || - src_box.y + src_box.height == u_minify(src->height0, src_level)); assert(dst_box.x % dst_bw == 0); assert(dst_box.y % dst_bh == 0); - assert(dst_box.width % dst_bw == 0 || - dst_box.x + dst_box.width == u_minify(dst->width0, dst_level)); - assert(dst_box.height % dst_bh == 0 || - dst_box.y + dst_box.height == u_minify(dst->height0, dst_level)); /* check that region boxes are not out of bounds */ assert(src_box.x + src_box.width <= u_minify(src->width0, src_level)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): tgsi: fix tgsi_util_get_inst_usage_mask
Module: Mesa Branch: master Commit: 77b8392858815625ee7909cf9e866043dab9d074 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=77b8392858815625ee7909cf9e866043dab9d074 Author: Roland Scheidegger Date: Wed Oct 18 23:13:58 2017 +0200 tgsi: fix tgsi_util_get_inst_usage_mask The logic for handling shadow coords was completely broken. Fixes be3ab867bd444594f9d9e0f8e59d305d15769afd. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103265 Reviewed-by: Marek Olšák --- src/gallium/auxiliary/tgsi/tgsi_util.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index be8bcdf123..cfce59093c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -292,17 +292,17 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_TXL2: case TGSI_OPCODE_LODQ: case TGSI_OPCODE_TG4: { - unsigned dim_layer_shadow = + unsigned dim_layer = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); - unsigned dim_layer, dim; + unsigned dim_layer_shadow, dim; - /* Remove shadow. */ + /* Add shadow. */ if (tgsi_is_shadow_target(inst->Texture.Texture)) { - dim_layer = dim_layer_shadow - 1; + dim_layer_shadow = dim_layer + 1; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) -dim_layer = 1; +dim_layer_shadow = 3; } else { - dim_layer = dim_layer_shadow; + dim_layer_shadow = dim_layer; } /* Remove layer. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: handle shader sample mask output
Module: Mesa Branch: master Commit: 3d0deed12ab3982cc183189f39c0df2793c2d94a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d0deed12ab3982cc183189f39c0df2793c2d94a Author: Roland Scheidegger Date: Tue Oct 17 21:55:03 2017 +0200 llvmpipe: handle shader sample mask output This probably isn't all that useful for GL, but there are apis where sample_mask is a valid output even without msaa. Just discard the pixel if the sample_mask doesn't include the bit for sample 0. Reviewed-by: Brian Paul --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 05984b346e..9223ce63e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -84,6 +84,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_pack.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_quad.h" @@ -347,7 +348,8 @@ generate_fs_loop(struct gallivm_state *gallivm, if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) { if (key->alpha.enabled || key->blend.alpha_to_coverage || - shader->info.base.uses_kill) { + shader->info.base.uses_kill || + shader->info.base.writes_samplemask) { /* With alpha test and kill, can do the depth test early * and hopefully eliminate some quads. But need to do a * special deferred depth write once the final mask value @@ -516,6 +518,25 @@ generate_fs_loop(struct gallivm_state *gallivm, } } + if (shader->info.base.writes_samplemask) { + int smaski = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_SAMPLEMASK, + 0); + LLVMValueRef smask; + struct lp_build_context smask_bld; + lp_build_context_init(&smask_bld, gallivm, int_type); + + assert(smaski >= 0); + smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask"); + /* + * Pixel is alive according to the first sample in the mask. + */ + smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, ""); + smask = lp_build_and(&smask_bld, smask, smask_bld.one); + smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero); + lp_build_mask_update(&mask, smask); + } + /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { int pos0 = find_output_by_semantic(&shader->info.base, @@ -2818,7 +2839,8 @@ generate_variant(struct llvmpipe_context *lp, !key->alpha.enabled && !key->blend.alpha_to_coverage && !key->depth.enabled && - !shader->info.base.uses_kill + !shader->info.base.uses_kill && + !shader->info.base.writes_samplemask ? TRUE : FALSE; if ((shader->info.base.num_tokens <= 1) && ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: don't use pabs intrinsic with llvm version >= 6
Module: Mesa Branch: master Commit: 52b73caaf40e79c90a105ec6d349abb3398e3c6b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52b73caaf40e79c90a105ec6d349abb3398e3c6b Author: Roland Scheidegger Date: Sat Oct 7 00:52:58 2017 +0200 gallivm: don't use pabs intrinsic with llvm version >= 6 The intrinsic is gone, causing shader compilation to crash. While here, also change the fallback code to match what llvm's auto-updater of these intrinsics would do (except that there will still be zext/trunc instructions in there), which should ensure that the sequence gets recognized and fused back into a pabs in the end (I didn't test this, and it's possible even the old sequence would get recognized, but I don't see a reason why we shouldn't use the same sequence in any case). Tested-by: Vinson Lee --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 04f86bef28..cf1958b3b6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1799,7 +1799,7 @@ lp_build_abs(struct lp_build_context *bld, } } - if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && HAVE_LLVM < 0x0600) { switch(type.width) { case 8: return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); @@ -1809,7 +1809,7 @@ lp_build_abs(struct lp_build_context *bld, return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } - else if (type.width*type.length == 256 && util_cpu_caps.has_avx2) { + else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && HAVE_LLVM < 0x0600) { switch(type.width) { case 8: return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a); @@ -1819,14 +1819,9 @@ lp_build_abs(struct lp_build_context *bld, return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.d", vec_type, a); } } - else if (type.width*type.length == 256 && util_cpu_caps.has_ssse3 && -(gallivm_debug & GALLIVM_DEBUG_PERF) && -(type.width == 8 || type.width == 16 || type.width == 32)) { - debug_printf("%s: inefficient code, should split vectors manually\n", - __FUNCTION__); - } - return lp_build_max(bld, a, LLVMBuildNeg(builder, a, "")); + return lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero), + a, LLVMBuildNeg(builder, a, "")); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm/ppc64le: adjust VSX code generation control.
Module: Mesa Branch: master Commit: e93f056a4e5babde516c9ef53ae3547f68f1b824 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e93f056a4e5babde516c9ef53ae3547f68f1b824 Author: Ben Crocker Date: Thu Sep 28 14:09:12 2017 -0400 gallivm/ppc64le: adjust VSX code generation control. In lp_build_create_jit_compiler_for_module(), advance the minimum version of LLVM for VSX code generation to 4.0; this is the minimum revision at which several known VSX code generation bugs are fixed: https://llvm.org/bugs/show_bug.cgi?id=25503 (fixed in 3.8.1) https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1) https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0) An llc performance bug introduced in LLVM 4.0, https://llvm.org/bugs/show_bug.cgi?id=34647 is still pending as of LLVM 5.0, but only has a pronounced effect on one of the Piglit tests: ext_transform_feedback-max-varyings. All changes tested via Piglit. Cc: "17.2" Signed-off-by: Ben Crocker Acked-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 37 ++- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 60d88269e5..d988910a7e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -630,23 +630,46 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #if defined(PIPE_ARCH_PPC) MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); #if (HAVE_LLVM >= 0x0304) -#if (HAVE_LLVM <= 0x0307) || (HAVE_LLVM == 0x0308 && MESA_LLVM_VERSION_PATCH == 0) +#if (HAVE_LLVM < 0x0400) /* * Make sure VSX instructions are disabled -* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7 +* See LLVM bugs: +* https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1) +* https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1) +* https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0) +* https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0) */ if (util_cpu_caps.has_altivec) { MAttrs.push_back("-vsx"); } #else /* -* However, bug 25503 is fixed, by the same fix that fixed -* bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1): -* Make sure VSX instructions are ENABLED -* See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=26775 +* Bug 25503 is fixed, by the same fix that fixed +* bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1). +* BZ 33531 actually comprises more than one bug, all of +* which are fixed in LLVM 4.0. +* +* With LLVM 4.0 or higher: +* Make sure VSX instructions are ENABLED, unless +* a) the entire -mattr option is overridden via GALLIVM_MATTRS, or +* b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0. */ if (util_cpu_caps.has_altivec) { - MAttrs.push_back("+vsx"); + char *env_mattrs = getenv("GALLIVM_MATTRS"); + if (env_mattrs) { + MAttrs.push_back(env_mattrs); + } + else { + boolean enable_vsx = true; + char *env_vsx = getenv("GALLIVM_VSX"); + if (env_vsx && env_vsx[0] == '0') { +enable_vsx = false; + } + if (enable_vsx) +MAttrs.push_back("+vsx"); + else +MAttrs.push_back("-vsx"); + } } #endif #endif ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: allow additional llc options
Module: Mesa Branch: master Commit: 5c75f0c8bb876c1f4c85cda5ed10a4d632d24f56 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c75f0c8bb876c1f4c85cda5ed10a4d632d24f56 Author: Ben Crocker Date: Thu Sep 28 14:09:11 2017 -0400 gallivm: allow additional llc options In init_native_targets, allow the passing of additional options to the LLC compiler via new GALLIVM_LLC_OPTIONS environmental control. This option is available only #ifdef DEBUG, initially. At top, add #include for LLVMParseCommandLineOptions() declaration. v2: Fix compile error with old llvm versions (sroland) Cc: "17.2" Signed-off-by: Ben Crocker Acked-by: Nicolai Hähnle Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 23 +++ 1 file changed, 23 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 342cb386d6..60d88269e5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -49,6 +49,9 @@ #endif #include +#if HAVE_LLVM >= 0x0306 +#include +#endif #include #include #include @@ -122,6 +125,26 @@ static void init_native_targets() llvm::InitializeNativeTargetAsmPrinter(); llvm::InitializeNativeTargetDisassembler(); +#if DEBUG && HAVE_LLVM >= 0x0306 + { + char *env_llc_options = getenv("GALLIVM_LLC_OPTIONS"); + if (env_llc_options) { + char *option; + char *options[64] = {(char *) "llc"}; // Warning without cast + int n; + for (n = 0, option = strtok(env_llc_options, " "); option; n++, option = strtok(NULL, " ")) { +options[n + 1] = option; + } + if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) { +debug_printf("llc additional options (%d):\n", n); +for (int i = 1; i <= n; i++) + debug_printf("\t%s\n", options[i]); +debug_printf("\n"); + } + LLVMParseCommandLineOptions(n + 1, options, NULL); + } + } +#endif } extern "C" void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: fix typo in debug_printf message
Module: Mesa Branch: master Commit: 3a9feb4db8ad1e87a70c761987798b7575d522aa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a9feb4db8ad1e87a70c761987798b7575d522aa Author: Ben Crocker Date: Thu Sep 28 14:09:10 2017 -0400 gallivm: fix typo in debug_printf message In gallivm_compile_module, fix a typo in the debug_printf("Invoke as \"llc ..." message. Cc: "17.2" Signed-off-by: Ben Crocker Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 9f1ade68c4..c456a97eb6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -606,7 +606,7 @@ gallivm_compile_module(struct gallivm_state *gallivm) LLVMWriteBitcodeToFile(gallivm->module, filename); debug_printf("%s written\n", filename); debug_printf("Invoke as \"llc %s%s -o - %s\"\n", - (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option] " : "", + (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option>] " : "", "[-mattr=<-mattr option(s)>]", filename); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm/ppc64le: allow environmental control of Altivec code generation
Module: Mesa Branch: master Commit: 1359af930ee5baf8444b0acc3d55b1e5e1a3879e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1359af930ee5baf8444b0acc3d55b1e5e1a3879e Author: Ben Crocker Date: Thu Sep 28 14:09:13 2017 -0400 gallivm/ppc64le: allow environmental control of Altivec code generation In check_os_altivec_support(), allow control of Altivec (first PPC vector instruction set) code generation via a new environmental control, GALLIVM_ALTIVEC, which is expected to take on a value of 1 or 0. The default is to enable Altivec code generation. This environmental control of Altivec code generation is initially available only #ifdef DEBUG. Cc: "17.2" Signed-off-by: Ben Crocker Acked-by: Roland Scheidegger --- src/gallium/auxiliary/util/u_cpu_detect.c | 32 +++ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index 4e71041bc9..6a59f271a8 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -132,16 +132,32 @@ check_os_altivec_support(void) if (setjmp(__lv_powerpc_jmpbuf)) { signal(SIGILL, SIG_DFL); } else { - __lv_powerpc_canjump = 1; + boolean enable_altivec = TRUE;/* Default: enable if available, and if not overridden */ +#ifdef DEBUG + /* Disabling Altivec code generation is not the same as disabling VSX code generation, + * which can be done simply by passing -mattr=-vsx to the LLVM compiler; cf. + * lp_build_create_jit_compiler_for_module(). + * If you want to disable Altivec code generation, the best place to do it is here. + */ + char *env_control = getenv("GALLIVM_ALTIVEC");/* 1=enable (default); 0=disable */ + if (env_control && env_control[0] == '0') { + enable_altivec = FALSE; + } +#endif + if (enable_altivec) { + __lv_powerpc_canjump = 1; - __asm __volatile - ("mtspr 256, %0\n\t" - "vand %%v0, %%v0, %%v0" - : - : "r" (-1)); + __asm __volatile +("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); - signal(SIGILL, SIG_DFL); - util_cpu_caps.has_altivec = 1; + signal(SIGILL, SIG_DFL); + util_cpu_caps.has_altivec = 1; + } else { + util_cpu_caps.has_altivec = 0; + } } #endif /* !PIPE_OS_APPLE */ } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: add new LOD opcode
Module: Mesa Branch: master Commit: 740a1618c34c095f85d4929e11ef107d560f7450 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=740a1618c34c095f85d4929e11ef107d560f7450 Author: Roland Scheidegger Date: Thu Sep 28 03:45:04 2017 +0200 gallium: add new LOD opcode The operation performed is all the same as LODQ, but with the usual differences between dx10 and GL texture opcodes, that is separate resource and sampler indices (plus result swizzling, and setting z/w channels to zero). Reviewed-by: Jose Fonseca Acked-by: Nicolai Hähnle --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 14 src/gallium/auxiliary/tgsi/tgsi_exec.c | 48 ++--- src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 1 + src/gallium/docs/source/tgsi.rst| 12 +++ src/gallium/include/pipe/p_shader_tokens.h | 4 ++- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index e5d0293b8f..de18f629cd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -3284,6 +3284,18 @@ sviewinfo_emit( emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); } +static void +lod_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, + FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); +} + static LLVMValueRef mask_vec(struct lp_build_tgsi_context *bld_base) { @@ -3898,6 +3910,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; + bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; + if (gs_iface) { /* There's no specific value for this because it should always diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9c019a311d..afed96c9b1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2351,15 +2351,22 @@ static void exec_lodq(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - uint unit; + uint resource_unit, sampler_unit; int dim; int i; union tgsi_exec_channel coords[4]; const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; union tgsi_exec_channel r[2]; - unit = fetch_sampler_unit(mach, inst, 1); - dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + resource_unit = fetch_sampler_unit(mach, inst, 1); + if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { + uint target = mach->SamplerViews[resource_unit].Resource; + dim = tgsi_util_get_texture_coord_dim(target); + sampler_unit = fetch_sampler_unit(mach, inst, 2); + } else { + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + sampler_unit = resource_unit; + } assert(dim <= ARRAY_SIZE(coords)); /* fetch coordinates */ for (i = 0; i < dim; i++) { @@ -2369,7 +2376,7 @@ exec_lodq(struct tgsi_exec_machine *mach, for (i = dim; i < ARRAY_SIZE(coords); i++) { args[i] = &ZeroVec; } - mach->Sampler->query_lod(mach->Sampler, unit, unit, + mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, args[0]->f, args[1]->f, args[2]->f, @@ -2386,6 +2393,35 @@ exec_lodq(struct tgsi_exec_machine *mach, store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); } + if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { + unsigned char swizzles[4]; + unsigned chan; + swizzles[0] = inst->Src[1].Register.SwizzleX; + swizzles[1] = inst->Src[1].Register.SwizzleY; + swizzles[2] = inst->Src[1].Register.SwizzleZ; + swizzles[3] = inst->Src[1].Register.SwizzleW; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { +if (swizzles[chan] >= 2) { + store_dest(mach, &ZeroVec, + &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); +} else { + store_dest(mach, &r[swizzles[chan]], + &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); +} + } + } + } else { + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest
Mesa (master): llvmpipe, gallivm: implement lod queries (LODQ opcode)
Module: Mesa Branch: master Commit: 886626960bca51bdfc0880e3830c0a95ea143c4c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=886626960bca51bdfc0880e3830c0a95ea143c4c Author: Roland Scheidegger Date: Mon Sep 18 04:52:26 2017 +0200 llvmpipe, gallivm: implement lod queries (LODQ opcode) This uses all the existing code to calculate lod values for mip linear filtering. Though we'll have to disable the simplifications (if we know some parts of the lod calculation won't actually matter for filtering purposes due to mip clamps etc.). For better or worse, we'll also disable lod calculation hacks (mostly should make a difference for cube maps) always - the issue with per-pixel lod being difficult is mostly because we then have different mipmaps needed for the actual texel fetch, which isn't a problem with lodq. We still use approximation for the log2 - for that reason I believe the float part of the lod is only accurate to about 4-5 bits (and one bit less with 1d textures actually) which is hopefully good enough (though d3d10 technically requires 6 bits - could use quadratic interpolation instead of linear to get 8 bits or so). Since lodq requires unclamped lod, we also have to move some sampler key calculations to texture sampling code - even if we know we're going to access mipmap 0 we still have to calculate lod and apply lod_bias for lodq. Passes piglit ARB_texture_query_lod tests (after having fixed the test). Reviewed-by: Jose Fonseca --- docs/features.txt | 2 +- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 40 --- src/gallium/auxiliary/gallivm/lp_bld_sample.h | 10 +- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 137 +++--- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 13 ++ src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- 6 files changed, 145 insertions(+), 59 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index fe412f6607..c186dc70da 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -131,7 +131,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi GL_ARB_texture_buffer_object_rgb32DONE (i965/gen6+, llvmpipe, softpipe, swr) GL_ARB_texture_cube_map_array DONE (i965/gen6+, nv50, llvmpipe, softpipe) GL_ARB_texture_gather DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr) - GL_ARB_texture_query_lod DONE (i965, nv50, softpipe) + GL_ARB_texture_query_lod DONE (i965, nv50, llvmpipe, softpipe) GL_ARB_transform_feedback2DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr) GL_ARB_transform_feedback3DONE (i965/gen7+, llvmpipe, softpipe, swr) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index a1dc61d40f..db3d9d65c9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -156,19 +156,19 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state, state->wrap_r= sampler->wrap_r; state->min_img_filter= sampler->min_img_filter; state->mag_img_filter= sampler->mag_img_filter; + state->min_mip_filter= sampler->min_mip_filter; state->seamless_cube_map = sampler->seamless_cube_map; if (sampler->max_lod > 0.0f) { - state->min_mip_filter = sampler->min_mip_filter; - } else { - state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + state->max_lod_pos = 1; + } + + if (sampler->lod_bias != 0.0f) { + state->lod_bias_non_zero = 1; } if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE || state->min_img_filter != state->mag_img_filter) { - if (sampler->lod_bias != 0.0f) { - state->lod_bias_non_zero = 1; - } /* If min_lod == max_lod we can greatly simplify mipmap selection. * This is a case that occurs during automatic mipmap generation. @@ -234,7 +234,7 @@ lp_build_rho(struct lp_build_sample_context *bld, unsigned length = coord_bld->type.length; unsigned num_quads = length / 4; boolean rho_per_quad = rho_bld->type.length != length; - boolean no_rho_opt = (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && (dims > 1); + boolean no_rho_opt = bld->no_rho_approx && (dims > 1); unsigned i; LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); LLVMValueRef rho_xvec, rho_yvec; @@ -694,6 +694,7 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld, */ void lp_build_lod_selector(struct lp_build_sample_context *bld, + boolean is_lodq, unsigned texture_unit, unsigne
Mesa (master): gallivm: fix gather implementation a bit
Module: Mesa Branch: master Commit: dcf2feadc336a1d81bf1b03d0b9c6dd68ea61441 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dcf2feadc336a1d81bf1b03d0b9c6dd68ea61441 Author: Roland Scheidegger Date: Sat Sep 9 02:58:21 2017 +0200 gallivm: fix gather implementation a bit gather is defined in terms of bilinear filtering, just without the filtering part. However, there's actually some subtle differences required in our implementation, because we use some tricks to simplify coord wrapping for the two coords per direction. For bilinear filtering, we don't care if we end up with an incorrect texel, as long as the filter weight is 0.0 for it. Likewise, the order of the texels doesn't actually matter (as long as they still have the correct filter weight). But for gather, these tricks lead to incorrect results. Fix this for CLAMP_TO_EDGE, and add some comments to the other wrap functions which look broken (the 3 mirror_clamp plus mirror_repeat) (too complex to fix right now, and noone really seems to care...). Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 58 +++ 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index cb4660e424..1539849b2d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -299,6 +299,7 @@ lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, */ static void lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, +boolean is_gather, LLVMValueRef coord, LLVMValueRef length, LLVMValueRef length_f, @@ -388,13 +389,29 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* clamp to length max */ coord = lp_build_min_ext(coord_bld, coord, length_f, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - /* subtract 0.5 */ - coord = lp_build_sub(coord_bld, coord, half); - /* clamp to [0, length - 0.5] */ - coord = lp_build_max(coord_bld, coord, coord_bld->zero); - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + if (!is_gather) { +/* subtract 0.5 */ +coord = lp_build_sub(coord_bld, coord, half); +/* clamp to [0, length - 0.5] */ +coord = lp_build_max(coord_bld, coord, coord_bld->zero); +/* convert to int, compute lerp weight */ +lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); +coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + } else { +/* + * The non-gather path will end up with coords 0, 1 if coord was + * smaller than 0.5 (with corresponding weight 0.0 so it doesn't + * really matter what the second coord is). But for gather, we + * really need to end up with coords 0, 0. + */ +coord = lp_build_max(coord_bld, coord, coord_bld->zero); +coord0 = lp_build_sub(coord_bld, coord, half); +coord1 = lp_build_add(coord_bld, coord, half); +/* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */ +coord0 = lp_build_itrunc(coord_bld, coord0); +coord1 = lp_build_itrunc(coord_bld, coord1); +weight = coord_bld->undef; + } /* coord1 = min(coord1, length-1) */ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); break; @@ -424,6 +441,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, coord = lp_build_add(coord_bld, coord, offset); } /* compute mirror function */ + /* + * XXX: This looks incorrect wrt gather. Due to wrap specification, + * it is possible the first coord ends up larger than the second one. + * However, with our simplifications the coordinates will be swapped + * in this case. (Albeit some other api tests don't like it even + * with this fixed...) + */ coord = lp_build_coord_mirror(bld, coord); /* scale coord to length */ @@ -474,6 +498,20 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, offset = lp_build_int_to_float(coord_bld, offset); coord = lp_build_add(coord_bld, coord, offset); } + /* + * XXX: This looks incorrect wrt gather. Due to wrap specification, + * the first and second texel actually end up with "different order" +
Mesa (master): llvmpipe, draw: improve shader cache debugging
Module: Mesa Branch: master Commit: 57a341b0a94d37e2aee5380703d171c422d8550e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57a341b0a94d37e2aee5380703d171c422d8550e Author: Roland Scheidegger Date: Fri Sep 8 02:59:11 2017 +0200 llvmpipe, draw: improve shader cache debugging With GALLIVM_DEBUG=perf set, output the relevant stats for shader cache usage whenever we have to evict shader variants. Also add some output when shaders are deleted (but not with the perf setting to keep this one less noisy). While here, also don't delete that many shaders when we have to evict. For fs, there's potentially some cost if we have to evict due to the required flush, however certainly shader recompiles have a high cost too so I don't think evicting one quarter of the cache size makes sense (and, if we're evicting based on IR count, we probably typically evict only very few or just one shader too). For vs, I'm not sure it even makes sense to evict more than one shader at a time, but keep the logic the same for now. Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_llvm.c | 10 .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 55 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 25 ++ 3 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 203572010f..8de29ea1fd 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -2156,6 +2156,11 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant) { struct draw_llvm *llvm = variant->llvm; + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n", +variant->shader->variants_cached, llvm->nr_variants); + } + gallivm_destroy(variant->gallivm); remove_from_list(&variant->list_item_local); @@ -2418,6 +2423,11 @@ draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant) { struct draw_llvm *llvm = variant->llvm; + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n", +variant->shader->variants_cached, llvm->nr_gs_variants); + } + gallivm_destroy(variant->gallivm); remove_from_list(&variant->list_item_local); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 0277cbfc83..c6492a18cf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -37,6 +37,7 @@ #include "draw/draw_vs.h" #include "draw/draw_llvm.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_debug.h" struct llvm_middle_end { @@ -71,6 +72,7 @@ static void llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) { struct draw_context *draw = fpme->draw; + struct draw_llvm *llvm = fpme->llvm; struct draw_geometry_shader *gs = draw->gs.geometry_shader; struct draw_gs_llvm_variant_key *key; struct draw_gs_llvm_variant *variant = NULL; @@ -79,7 +81,7 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE]; unsigned i; - key = draw_gs_llvm_make_variant_key(fpme->llvm, store); + key = draw_gs_llvm_make_variant_key(llvm, store); /* Search shader's list of variants for the key */ li = first_elem(&shader->variants); @@ -93,38 +95,42 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) if (variant) { /* found the variant, move to head of global list (for LRU) */ - move_to_head(&fpme->llvm->gs_variants_list, - &variant->list_item_global); + move_to_head(&llvm->gs_variants_list, &variant->list_item_global); } else { /* Need to create new variant */ /* First check if we've created too many variants. If so, free - * 25% of the LRU to avoid using too much memory. + * 3.125% of the LRU to avoid using too much memory. */ - if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) { + if (llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) { + if (gallivm_debug & GALLIVM_DEBUG_PERF) { +debug_printf("Evicting GS: %u gs variants,\t%u total variants\n", + shader->variants_cached, llvm->nr_gs_variants); + } + /* * XXX: should we flush here ? */ - for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { +
Mesa (master): llvmpipe: enable PIPE_CAP_QUERY_PIPELINE_STATISTICS
Module: Mesa Branch: master Commit: 772f475351d63067f8fd0251e2fe6a33aedf1f56 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=772f475351d63067f8fd0251e2fe6a33aedf1f56 Author: Roland Scheidegger Date: Fri Sep 8 02:23:05 2017 +0200 llvmpipe: enable PIPE_CAP_QUERY_PIPELINE_STATISTICS This was implemented since forever, but not enabled. It passes all piglit tests except one, arb_pipeline_statistics_query-frag. The reason is that the test (for drawing a 10x10 rect) expects between 100 and 150 pixel shader invocations. But since llvmpipe counts this with 4x4 granularity (and due to the rect being 2 tris) we end up with 224 invocations. I believe however what llvmpipe is doing violates neither the spirit nor the letter of the spec (our fragment shader granularity really is 4x4 pixels, albeit we will bail out early on 2x2 or 4x2 (the latter if AVX is available) granularity), the spec allows to count additional invocations due to implementation reasons. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- docs/features.txt| 2 +- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 0435ce61ff..fe412f6607 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -225,7 +225,7 @@ GL 4.6, GLSL 4.60 GL_ARB_gl_spirv in progress (Nicolai Hähnle, Ian Romanick) GL_ARB_indirect_parametersDONE (nvc0, radeonsi) - GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, softpipe, swr) + GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, llvmpipe, softpipe, swr) GL_ARB_polygon_offset_clamp DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, swr) GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe) GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi) diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 32a405088f..dba7ae3d01 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -132,7 +132,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_QUERY_TIMESTAMP: return 1; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: - return 0; + return 1; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe, draw: increase shader cache limits
Module: Mesa Branch: master Commit: de6810d9be9d1e6426881774458f8a6f3bed17ee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=de6810d9be9d1e6426881774458f8a6f3bed17ee Author: Roland Scheidegger Date: Tue Sep 5 00:17:31 2017 +0200 llvmpipe, draw: increase shader cache limits We're not particularly concerned with memory usage, if the tradeoff is shader recompiles. And it's common for apps to have a lot of shaders nowadays (and, since our shaders include a LOT of context state of course we may create quite a bit more shaders even). So quadruple the amount of shaders draw will cache (from 128 to 512). For llvmpipe (fs shaders) quadruple the number of instructions, keep the number of variants the same for now (only with very simple, non-texturing shaders the variant limit could really be reached), and simplify the definition, it's probably easier to just have one different definition per branch... Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/drivers/llvmpipe/lp_limits.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 030bb2cece..06ad7372a7 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -103,7 +103,7 @@ struct vertex_header { /* maximum number of shader variants we can cache */ -#define DRAW_MAX_SHADER_VARIANTS 128 +#define DRAW_MAX_SHADER_VARIANTS 512 /** * Private context for the drawing module. diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index 5294ced3c4..c2808162c7 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -78,10 +78,8 @@ /** * Max number of instructions (for all fragment shaders combined per context) * that will be kept around (counted in terms of llvm ir). - * Note: the definition looks odd, but there's branches which use a different - * number of max shader variants. */ -#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS) +#define LP_MAX_SHADER_INSTRUCTIONS (2048 * LP_MAX_SHADER_VARIANTS) /** * Max number of setup variants that will be kept around. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe, tgsi: hook up dx10 gather4 opcode
Module: Mesa Branch: master Commit: 6d9d6071ee961acc82543b321764a0ffec8cd39a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d9d6071ee961acc82543b321764a0ffec8cd39a Author: Roland Scheidegger Date: Tue Sep 5 17:59:37 2017 +0200 llvmpipe, tgsi: hook up dx10 gather4 opcode Trivial. We already support tg4 for legacy tex opcodes, so the actual texture sampling code already handles it. (Just like TG4, we don't handle additional capabilities and always sample red channel.) Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 28 ++--- src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 - 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index b7f1140135..f16c579f38 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -2232,6 +2232,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, enum lp_build_tex_modifier modifier, boolean compare, +enum lp_sampler_op_type sample_type, LLVMValueRef *texel) { struct gallivm_state *gallivm = bld->bld_base.base.gallivm; @@ -2245,7 +2246,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld, unsigned num_offsets, num_derivs, i; unsigned layer_coord = 0; - unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT; + unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT; memset(¶ms, 0, sizeof(params)); @@ -3186,7 +3187,7 @@ sample_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, - FALSE, emit_data->output); + FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); } static void @@ -3198,7 +3199,7 @@ sample_b_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, - FALSE, emit_data->output); + FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); } static void @@ -3210,7 +3211,7 @@ sample_c_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, - TRUE, emit_data->output); + TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); } static void @@ -3222,7 +3223,7 @@ sample_c_lz_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, - TRUE, emit_data->output); + TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); } static void @@ -3234,7 +3235,7 @@ sample_d_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, - FALSE, emit_data->output); + FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); } static void @@ -3246,7 +3247,19 @@ sample_l_emit( struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, - FALSE, emit_data->output); + FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); +} + +static void +gather4_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, + FALSE, LP_SAMPLER_OP_GATHER, emit_data->output); } static void @@ -3871,6 +3884,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; + bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; if (gs_iface) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c58ea6ad09..1264df0c62 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2631,6 +2631,9 @@ exec_sample(struct tgsi_exec_machine *mach, lod = &c1; control = TGSI_SAMPLER_LOD_EXPLICIT; } + else if (modifier == TEX_MODIFIER_GATHER) { + control = TGSI_SAMPLER_GATHER; + } else { assert(modifier == TEX_MODIFIER_LEVEL_ZERO); control = TGSI_SAMPLER_LOD_ZERO; @@ -5687,7 +5690,
Mesa (master): st/mesa: fix view template initialization in try_pbo_readpixels
Module: Mesa Branch: master Commit: 2b2c61f0df5c18355b65772d21be36339ba5e1d9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2b2c61f0df5c18355b65772d21be36339ba5e1d9 Author: Roland Scheidegger Date: Fri Sep 1 01:48:42 2017 +0200 st/mesa: fix view template initialization in try_pbo_readpixels I think this is what the code was meant to do, albeit as far as I can tell the redundant initialization some analyzers complain about should work as well just fine (only the first layer will be used, if the view contains one or more layers doesn't really matter). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102467 Reviewed-by: Brian Paul Reviewed-by: Marek Olšák Cc: mesa-sta...@lists.freedesktop.org --- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 0bcf2eb4fd..84dd2d548e 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -175,7 +175,7 @@ try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb, if (view_target != PIPE_TEXTURE_3D) { templ.u.tex.first_layer = surface->u.tex.first_layer; - templ.u.tex.last_layer = templ.u.tex.last_layer; + templ.u.tex.last_layer = templ.u.tex.first_layer; } else { addr.constants.layer_offset = surface->u.tex.first_layer; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): util: only use SCHED_IDLE in pthread_setschedparam() when it's defined
Module: Mesa Branch: master Commit: c92fe8a8c50968a6ac37cbecdd54208f0eea246c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c92fe8a8c50968a6ac37cbecdd54208f0eea246c Author: Roland Scheidegger Date: Sat Aug 26 17:08:07 2017 +0200 util: only use SCHED_IDLE in pthread_setschedparam() when it's defined Fixes build error when it's not. Reviewed-by: Jose Fonseca --- src/util/u_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/u_queue.c b/src/util/u_queue.c index 49361c3dad..449da7dc9a 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -246,7 +246,7 @@ util_queue_init(struct util_queue *queue, } if (flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) { - #if defined(__linux__) + #if defined(__linux__) && defined(SCHED_IDLE) struct sched_param sched_param = {0}; /* The nice() function can only set a maximum of 19. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load
Module: Mesa Branch: master Commit: 57c8ead0cd08e6aaf88a389f7ce528c4f0face65 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57c8ead0cd08e6aaf88a389f7ce528c4f0face65 Author: Ben Crocker Date: Wed Aug 23 16:32:43 2017 -0400 llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load Fix loading of a 3x16 vector as a single 48-bit load on big-endian systems (PPC64, S390). Roland Scheidegger's commit e827d9175675aaa6cfc0b981e2a80685fb7b3a74 plus Ray Strode's patch reduce pre-Roland Piglit failures from ~4000 to ~2000. This patch fixes three of the four regressions observed by Ray: - draw-vertices - draw-vertices-half-float - draw-vertices-half-float_gles2 One regression remains: - draw-vertices-2101010 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613 Cc: "17.2" "17.1" Signed-off-by: Ben Crocker Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_gather.c | 30 +-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c index ccd03765c7..7d11dcd3b6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -234,13 +234,39 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm, */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); - if (vector_justify) { #ifdef PIPE_ARCH_BIG_ENDIAN + if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); -#endif } + if (src_width == 48) { +/* Load 3x16 bit vector. + * The sequence of loads on big-endian hardware proceeds as follows. + * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence + * of three fields appears in the order X, Y, Z. + * + * Load 32-bit word: 0.0.X.Y + * Load 16-bit halfword: 0.0.0.Z + * Rotate left: 0.X.Y.0 + * Bitwise OR: 0.X.Y.Z + * + * The order in which we need the fields in the result is 0.Z.Y.X, + * the same as on little-endian; permute 16-bit fields accordingly + * within 64-bit register: + */ +LLVMValueRef shuffles[4] = { + lp_build_const_int32(gallivm, 2), + lp_build_const_int32(gallivm, 1), + lp_build_const_int32(gallivm, 0), + lp_build_const_int32(gallivm, 3), +}; +res = LLVMBuildBitCast(gallivm->builder, res, + lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); +res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); +res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); + } +#endif } } return res; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: correct channel shift logic on big endian
Module: Mesa Branch: master Commit: 75cb6e36178b9474bbb59b76cbbcce2a67bf88d2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=75cb6e36178b9474bbb59b76cbbcce2a67bf88d2 Author: Ray Strode Date: Wed Aug 23 16:32:42 2017 -0400 gallivm: correct channel shift logic on big endian lp_build_fetch_rgba_soa fetches a texel from a texture. Part of that process involves first gathering the element together from memory into a packed format, and then breaking out the individual color channels into separate, parallel arrays. The code fails to account for endianess when reading the packed values. This commit attempts to correct the problem by reversing the order the packed values are read on big endian systems. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100613 Cc: "17.2" "17.1" Signed-off-by: Ray Strode Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 98eb694c1f..22c19b10db 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, for (i = 0; i < format_desc->nr_channels; i++) { struct util_format_channel_description chan_desc = format_desc->channel[i]; unsigned blockbits = type.width; -unsigned vec_nr = chan_desc.shift / type.width; +unsigned vec_nr; + +#ifdef PIPE_ARCH_BIG_ENDIAN +vec_nr = (format_desc->block.bits - (chan_desc.shift + chan_desc.size)) / type.width; +#else +vec_nr = chan_desc.shift / type.width; +#endif chan_desc.shift %= type.width; output[i] = lp_build_extract_soa_chan(&bld, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): softpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW
Module: Mesa Branch: master Commit: 26d46b94b4f03a8a5203539949e19124e3cdefad URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26d46b94b4f03a8a5203539949e19124e3cdefad Author: Roland Scheidegger Date: Tue Aug 15 17:52:41 2017 +0200 softpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW The driver was supposed to support this since way before the GL spec for it existed, albeit it was apparently broken, so fix and enable it. Reviewed-by: Jose Fonseca --- docs/features.txt| 2 +- src/gallium/drivers/softpipe/sp_query.c | 7 ++- src/gallium/drivers/softpipe/sp_screen.c | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index ac7645d069..ace46692b5 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60 GL_ARB_shader_group_vote DONE (i965, nvc0, radeonsi) GL_ARB_spirv_extensions in progress (Nicolai Hähnle, Ian Romanick) GL_ARB_texture_filter_anisotropic not started - GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+, radeonsi) + GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+, radeonsi, softpipe) GL_KHR_no_error started (Timothy Arceri) These are the extensions cherry-picked to make GLES 3.1 diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index bec0116a56..63f6c4be04 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -63,6 +63,7 @@ softpipe_create_query(struct pipe_context *pipe, type == PIPE_QUERY_TIME_ELAPSED || type == PIPE_QUERY_SO_STATISTICS || type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || + type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || type == PIPE_QUERY_PRIMITIVES_EMITTED || type == PIPE_QUERY_PRIMITIVES_GENERATED || type == PIPE_QUERY_PIPELINE_STATISTICS || @@ -102,7 +103,9 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - sq->end = FALSE; + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written; + sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_PRIMITIVES_EMITTED: sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written; @@ -153,6 +156,7 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) sq->end = os_time_get_nano(); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written - sq->so.num_primitives_written; sq->so.primitives_storage_needed = @@ -230,6 +234,7 @@ softpipe_get_query_result(struct pipe_context *pipe, vresult->b = TRUE; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: vresult->b = sq->end != 0; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 0feef2189a..2988095eec 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -220,6 +220,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 31; case PIPE_CAP_DRAW_INDIRECT: return 1; + case PIPE_CAP_QUERY_SO_OVERFLOW: + return 1; case PIPE_CAP_VENDOR_ID: return 0x; @@ -307,7 +309,6 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: return 0; case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW
Module: Mesa Branch: master Commit: 3e9623145760883b431c0902b198d71d003ef7a0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e9623145760883b431c0902b198d71d003ef7a0 Author: Roland Scheidegger Date: Tue Aug 15 17:53:49 2017 +0200 llvmpipe: enable PIPE_CAP_QUERY_SO_OVERFLOW The driver supported this since way before the GL spec for it existed. Just need to support both the per-stream and for all streams variants (which are identical due to only supporting 1 stream). Passes piglit arb_transform_feedback_overflow_query-basic. Reviewed-by: Jose Fonseca --- docs/features.txt| 2 +- src/gallium/drivers/llvmpipe/lp_query.c | 3 +++ src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index ace46692b5..6f57ec26fd 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60 GL_ARB_shader_group_vote DONE (i965, nvc0, radeonsi) GL_ARB_spirv_extensions in progress (Nicolai Hähnle, Ian Romanick) GL_ARB_texture_filter_anisotropic not started - GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+, radeonsi, softpipe) + GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+, radeonsi, llvmpipe, softpipe) GL_KHR_no_error started (Timothy Arceri) These are the extensions cherry-picked to make GLES 3.1 diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index d5ed6561b8..6f8ce94e5d 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -155,6 +155,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, *result = pq->num_primitives_written; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: vresult->b = pq->num_primitives_generated > pq->num_primitives_written; break; case PIPE_QUERY_SO_STATISTICS: { @@ -215,6 +216,7 @@ llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written; pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; break; @@ -264,6 +266,7 @@ llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written; pq->num_primitives_generated = diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6c64133b90..32a405088f 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -270,6 +270,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DOUBLES: case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_QUERY_SO_OVERFLOW: return 1; case PIPE_CAP_VENDOR_ID: @@ -357,7 +358,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: return 0; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: handle call attributes for llvm < 4.0 in lp_add_function_attr
Module: Mesa Branch: master Commit: dbde58dd311a77c08d316362f9365b4c0b6852fe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbde58dd311a77c08d316362f9365b4c0b6852fe Author: Roland Scheidegger Date: Fri Jul 21 20:27:43 2017 +0200 gallivm: handle call attributes for llvm < 4.0 in lp_add_function_attr We had some caller using LLVMAddInstrAttributes, which couldn't be converted to lp_add_function_attr, because attributes were only handled for functions in this case, so fix this. For llvm >= 4.0, this already works correctly. (radeonsi seems to avoid setting call site attributes prior to llvm 4.0, the patch then citing it doesn't work when calling intrinsics. But at least for calling external functions we always used that, albeit only for actual call attributes, not call parameter attributes, though some quick test shows llvm seems to handle that as well. The attribute index is sort of iffy though, since attribute 0 of the call is the actual function, attribute 1 corresponds to the first parameter of the called function.) (Verified with GALLIVM_DEBUG=dumpbc plus llvm-dis that the correct attributes are shown for calls, both for llvm 4.0 and 3.3.) Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul --- src/gallium/auxiliary/gallivm/lp_bld_intr.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 19f98bb781..b92455593f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -168,10 +168,14 @@ lp_add_function_attr(LLVMValueRef function_or_call, #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr); - if (attr_idx == -1) { - LLVMAddFunctionAttr(function_or_call, llvm_attr); + if (LLVMIsAFunction(function_or_call)) { + if (attr_idx == -1) { + LLVMAddFunctionAttr(function_or_call, llvm_attr); + } else { + LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), llvm_attr); + } } else { - LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), llvm_attr); + LLVMAddInstrAttribute(function_or_call, attr_idx, llvm_attr); } #else ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: handle more TGSI_SEMANTIC_COLOR indices
Module: Mesa Branch: master Commit: 4db72852a16fc4a2a559255f9965e1d02e4f2b9c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4db72852a16fc4a2a559255f9965e1d02e4f2b9c Author: Roland Scheidegger Date: Sat Jul 8 00:14:35 2017 +0200 draw: handle more TGSI_SEMANTIC_COLOR indices It could only handle indices 0/1, otherwise what happened was bad (accessing array out of bounds, no crash but kind of random). This is enough for the gl state tracker (primary/secondary color) but not enough for some other state trackers (d3d9 has no limits on the number of color interpolants). The complexity with color semantics are all due to the front/back mapping (2 outputs in the vs map to one input in the fs) so this isn't extended to indices > 1 - d3d9 has no use for back colors, therefore this isn't needed and still only 2 back colors can be handled correctly. Reviewed-by: Brian Paul --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 17 ++--- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 11 +++ src/gallium/auxiliary/draw/draw_pipe_twoside.c | 9 ++--- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index cf2b41738b..4cfa54b2e1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -771,8 +771,9 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp, int interp; /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode * from the array we've filled before. */ - if (semantic_name == TGSI_SEMANTIC_COLOR || - semantic_name == TGSI_SEMANTIC_BCOLOR) { + if ((semantic_name == TGSI_SEMANTIC_COLOR || +semantic_name == TGSI_SEMANTIC_BCOLOR) && + semantic_index < 2) { interp = indexed_interp[semantic_index]; } else if (semantic_name == TGSI_SEMANTIC_POSITION || semantic_name == TGSI_SEMANTIC_CLIPVERTEX) { @@ -851,7 +852,8 @@ clip_init_state(struct draw_stage *stage) if (fs) { for (i = 0; i < fs->info.num_inputs; i++) { - if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR && + fs->info.input_semantic_index[i] < 2) { if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR) indexed_interp[fs->info.input_semantic_index[i]] = fs->info.input_interpolate[i]; } @@ -881,6 +883,15 @@ clip_init_state(struct draw_stage *stage) clipper->perspect_attribs[clipper->num_perspect_attribs] = i; clipper->num_perspect_attribs++; break; + case TGSI_INTERPOLATE_COLOR: + if (draw->rasterizer->flatshade) { +clipper->const_attribs[clipper->num_const_attribs] = i; +clipper->num_const_attribs++; + } else { +clipper->perspect_attribs[clipper->num_perspect_attribs] = i; +clipper->num_perspect_attribs++; + } + break; default: assert(interp == -1); break; diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index cd285e6f97..2830435b99 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -170,8 +170,9 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp, int interp; /* If it's gl_{Front,Back}{,Secondary}Color, pick up the mode * from the array we've filled before. */ - if (semantic_name == TGSI_SEMANTIC_COLOR || - semantic_name == TGSI_SEMANTIC_BCOLOR) { + if ((semantic_name == TGSI_SEMANTIC_COLOR || +semantic_name == TGSI_SEMANTIC_BCOLOR) && + semantic_index < 2) { interp = indexed_interp[semantic_index]; } else { /* Otherwise, search in the FS inputs, with a decent default @@ -216,7 +217,8 @@ static void flatshade_init_state( struct draw_stage *stage ) if (fs) { for (i = 0; i < fs->info.num_inputs; i++) { - if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR && + fs->info.input_semantic_index[i] < 2) { if (fs->info.input_interpolate[i] != TGSI_INTERPOLATE_COLOR) indexed_interp[fs->info.input_semantic_index[i]] = fs->info.input_interpolate[i]; } @@ -236,7 +238,8 @@ static void flatshade_init_state( struct draw_stage *stage ) info->output_semantic_index[i]); /* If it's flat, add it to the flat vector. */ - if (interp == TGSI_INTERPOLATE_CONSTANT) { + if (interp == TGSI_INTERPOLATE_CONSTANT || +
Mesa (master): llvmpipe: initialize default fb correctly in setup
Module: Mesa Branch: master Commit: 8bfe451ed30918244618608871423b2a72cf9767 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bfe451ed30918244618608871423b2a72cf9767 Author: Roland Scheidegger Date: Fri Jun 23 19:40:18 2017 +0200 llvmpipe: initialize default fb correctly in setup If lp_setup_bind_framebuffer() is never called, then setup fb x1/y1 was not correctly initialized. This can happen if there's never a fb set - both cso and llvmpipe would consider setting this with no cbufs and no zsbuf a redundant change and therefore it would never get set. We rely on this setup fb rect being initialized correctly for the tri intersect tests, throwing away tris which don't intersect. Not initializing it meant we'd then say it intersected, and we'd try to bin that despite that we have no actual tiles to bin it to, leading to assertion failures (pretty harmless since tile 0/0 always exists nevertheless as tiles are statically allocated, albeit that should change at some point). (Note probably not an issue with gl state tracker) Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_setup.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 38d91385cf..32387ab553 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -1347,6 +1347,10 @@ lp_setup_create( struct pipe_context *pipe, setup->dirty = ~0; + /* Initialize empty default fb correctly, so the rect is empty */ + setup->framebuffer.x1 = -1; + setup->framebuffer.y1 = -1; + return setup; no_scenes: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: fill in debug vertex info for tri rasterization
Module: Mesa Branch: master Commit: 672d245ffe85e85afe6ddd36868c145bb528c79b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=672d245ffe85e85afe6ddd36868c145bb528c79b Author: Roland Scheidegger Date: Fri Jun 23 04:57:57 2017 +0200 llvmpipe: fill in debug vertex info for tri rasterization This is pretty useful for debugging rasterization issues, so turn it on based on DEBUG (the actual existence of the fields is also conditionalized on DEBUG, lines fill it out the same too). Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a7a5d05c32..324e93841f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -358,7 +358,7 @@ do_triangle_ccw(struct lp_setup_context *setup, if (!tri) return FALSE; -#if 0 +#ifdef DEBUG tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe:fix using 32bit rasterization mistakenly, causing overflows
Module: Mesa Branch: master Commit: c7688d2de5bb0861965e6e7b76a396ab6eec253f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7688d2de5bb0861965e6e7b76a396ab6eec253f Author: Roland Scheidegger Date: Fri Jun 23 19:35:50 2017 +0200 llvmpipe:fix using 32bit rasterization mistakenly, causing overflows We use the bounding box (triangle extents) to figure out if 32bit rasterization could potentially overflow. However, we used the bounding box which already got rounded up to 0 for negative coords for this, which is incorrect, leading to overflows and hence bogus rendering in some of our private use. It might be possible to simplify this somehow (we're now using 3 different boxes for binning) but I don't quite see how. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_setup_context.h | 11 --- src/gallium/drivers/llvmpipe/lp_setup_line.c| 20 ++-- src/gallium/drivers/llvmpipe/lp_setup_point.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 41 - 4 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 9714691270..4b55fd922c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -215,10 +215,11 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned *tri_size); boolean -lp_setup_bin_triangle( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - const struct u_rect *bbox, - int nr_planes, - unsigned scissor_index ); +lp_setup_bin_triangle(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bboxorig, + const struct u_rect *bbox, + int nr_planes, + unsigned scissor_index); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 018130c319..d0bac5efb9 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -288,7 +288,9 @@ try_setup_line( struct lp_setup_context *setup, struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); - struct u_rect bbox; + const struct u_rect *scissor; + struct u_rect bbox, bboxpos; + boolean s_planes[4]; unsigned tri_bytes; int x[4]; int y[4]; @@ -579,10 +581,12 @@ try_setup_line( struct lp_setup_context *setup, return TRUE; } + bboxpos = bbox; + /* Can safely discard negative regions: */ - bbox.x0 = MAX2(bbox.x0, 0); - bbox.y0 = MAX2(bbox.y0, 0); + bboxpos.x0 = MAX2(bboxpos.x0, 0); + bboxpos.y0 = MAX2(bboxpos.y0, 0); nr_planes = 4; /* @@ -591,8 +595,8 @@ try_setup_line( struct lp_setup_context *setup, */ if (setup->scissor_test) { /* why not just use draw_regions */ - boolean s_planes[4]; - scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]); + scissor = &setup->scissors[viewport_index]; + scissor_planes_needed(s_planes, &bboxpos, scissor); nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; } @@ -718,11 +722,7 @@ try_setup_line( struct lp_setup_context *setup, * (easier to evaluate) to ordinary planes.) */ if (nr_planes > 4) { - /* why not just use draw_regions */ - const struct u_rect *scissor = &setup->scissors[viewport_index]; struct lp_rast_plane *plane_s = &plane[4]; - boolean s_planes[4]; - scissor_planes_needed(s_planes, &bbox, scissor); if (s_planes[0]) { plane_s->dcdx = -1 << 8; @@ -755,7 +755,7 @@ try_setup_line( struct lp_setup_context *setup, assert(plane_s == &plane[nr_planes]); } - return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index); + return lp_setup_bin_triangle(setup, line, &bbox, &bboxpos, nr_planes, viewport_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index ddb6f0e73b..8cb6b83f91 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -513,7 +513,7 @@ try_setup_point( struct lp_setup_context *setup, plane[3].eo = 0; } - return lp_setup_bin_triangle(setup, point, &bbox, nr_planes, viewport_index); + return lp_setup_bin_triangle(setup, point, &bbox, &bbox, nr_planes, viewport_index); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 324e93841f..39755d6b58 100644 --- a/src/gallium/drive
Mesa (master): gallium: fixed modulo zero crashes in tgsi interpreter (v2)
Module: Mesa Branch: master Commit: f3c0bbe18ac65d22b2630f89fc1628bfe79695d4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3c0bbe18ac65d22b2630f89fc1628bfe79695d4 Author: Marius Gräfe Date: Fri Jun 9 15:39:00 2017 +0200 gallium: fixed modulo zero crashes in tgsi interpreter (v2) softpipe throws integer division by zero exceptions on windows when using % with integers in a geometry shader. v2: Made error results consistent with existing div/mod zero handling in tgsi. 64 bit signed integer division by zero returns zero like in micro_idiv, unsigned returns ~0u like in micro_udiv. Modulo operations always set all result bits to one (like in micro_umod). Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 40 +- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c41954cbf7..97c75e999c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -846,40 +846,40 @@ static void micro_u64div(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->u64[0] = src[0].u64[0] / src[1].u64[0]; - dst->u64[1] = src[0].u64[1] / src[1].u64[1]; - dst->u64[2] = src[0].u64[2] / src[1].u64[2]; - dst->u64[3] = src[0].u64[3] / src[1].u64[3]; + dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; + dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; + dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; + dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; } static void micro_i64div(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->i64[0] = src[0].i64[0] / src[1].i64[0]; - dst->i64[1] = src[0].i64[1] / src[1].i64[1]; - dst->i64[2] = src[0].i64[2] / src[1].i64[2]; - dst->i64[3] = src[0].i64[3] / src[1].i64[3]; + dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; + dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; + dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; + dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; } static void micro_u64mod(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->u64[0] = src[0].u64[0] % src[1].u64[0]; - dst->u64[1] = src[0].u64[1] % src[1].u64[1]; - dst->u64[2] = src[0].u64[2] % src[1].u64[2]; - dst->u64[3] = src[0].u64[3] % src[1].u64[3]; + dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; + dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; + dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; + dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; } static void micro_i64mod(union tgsi_double_channel *dst, const union tgsi_double_channel *src) { - dst->i64[0] = src[0].i64[0] % src[1].i64[0]; - dst->i64[1] = src[0].i64[1] % src[1].i64[1]; - dst->i64[2] = src[0].i64[2] % src[1].i64[2]; - dst->i64[3] = src[0].i64[3] % src[1].i64[3]; + dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; + dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; + dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; + dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; } static void @@ -4653,10 +4653,10 @@ micro_mod(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1) { - dst->i[0] = src0->i[0] % src1->i[0]; - dst->i[1] = src0->i[1] % src1->i[1]; - dst->i[2] = src0->i[2] % src1->i[2]; - dst->i[3] = src0->i[3] % src1->i[3]; + dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; + dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; + dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; + dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; } static void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: add LP_NEW_GS flag for updating vertex info
Module: Mesa Branch: master Commit: d2724fe5bddb1ca9cb61c79ddfe78a09b92eebc5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d2724fe5bddb1ca9cb61c79ddfe78a09b92eebc5 Author: Roland Scheidegger Date: Sat May 27 04:34:14 2017 +0200 llvmpipe: add LP_NEW_GS flag for updating vertex info The vertex information we compute here is really dependent on the last stage before FS. It just happened to work most of the time because new GS tend to come with new VS and/or FS... (The LP_NEW_GS flag was previously set but never used.) Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index fa9d4fb2fd..3e75d44dac 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -194,6 +194,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) /* This needs LP_NEW_RASTERIZER because of draw_prepare_shader_outputs(). */ if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | + LP_NEW_GS | LP_NEW_VS)) compute_vertex_info(llvmpipe); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallivm: (trivial) remove duplicated line
Module: Mesa Branch: master Commit: e1f9e9bafdb6da44c6bd6be8414913e481f8b031 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1f9e9bafdb6da44c6bd6be8414913e481f8b031 Author: Roland Scheidegger Date: Thu Mar 16 04:01:41 2017 +0100 gallivm: (trivial) remove duplicated line pointed out by clang (stored value never read) --- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index cd17040..98eb694 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -505,7 +505,6 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, * First, figure out fetch order. */ fetch_width = util_next_power_of_two(format_desc->block.bits); - num_gather = fetch_width / type.width; /* * fp64 are treated like fp32 except we fetch twice wide values * (as we shuffle after trunc). The shuffles for that work out ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): draw: (trivial) remove a unnecessary lp_build_alloca()
Module: Mesa Branch: master Commit: 9d104dfd55afa4477fcc4037b992a8c99ac97431 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9d104dfd55afa4477fcc4037b992a8c99ac97431 Author: Roland Scheidegger Date: Thu Mar 16 03:59:52 2017 +0100 draw: (trivial) remove a unnecessary lp_build_alloca() pointed out by clang (stored value never read) --- src/gallium/auxiliary/draw/draw_llvm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 104965b..bb08f66 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1670,8 +1670,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, ""); } - fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max"); - have_elts = LLVMBuildICmp(builder, LLVMIntNE, LLVMConstPointerNull(arg_types[10]), fetch_elts, ""); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/util: (trivial) fix util_clear_render_target
Module: Mesa Branch: master Commit: c3a94d9195bff3a870d5a78dd53bd69c26eb23af URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3a94d9195bff3a870d5a78dd53bd69c26eb23af Author: Roland Scheidegger Date: Fri Feb 24 18:40:40 2017 +0100 gallium/util: (trivial) fix util_clear_render_target the format of the rt can be different than the one of the texture, so must propagate the format explicitly to the helper. Broken since 3f9c5d62441eba38e8b1592aba965ed5db6fd89b (but unused by st/mesa). --- src/gallium/auxiliary/util/u_surface.c | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index f2a471d..5abf966 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -423,6 +423,7 @@ util_clear_color_texture_helper(struct pipe_transfer *dst_trans, static void util_clear_color_texture(struct pipe_context *pipe, struct pipe_resource *texture, + enum pipe_format format, const union pipe_color_union *color, unsigned level, unsigned dstx, unsigned dsty, unsigned dstz, @@ -430,7 +431,6 @@ util_clear_color_texture(struct pipe_context *pipe, { struct pipe_transfer *dst_trans; ubyte *dst_map; - enum pipe_format format = texture->format; dst_map = pipe_transfer_map_3d(pipe, texture, @@ -491,16 +491,16 @@ util_clear_render_target(struct pipe_context *pipe, dx, 0, w, 1, &dst_trans); if (dst_map) { - util_clear_color_texture_helper(dst_trans, dst_map, dst->format, color, - width, height, 1); + util_clear_color_texture_helper(dst_trans, dst_map, dst->format, + color, width, height, 1); pipe->transfer_unmap(pipe, dst_trans); } } else { unsigned depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1; - util_clear_color_texture(pipe, dst->texture, color, dst->u.tex.level, - dstx, dsty, dst->u.tex.first_layer, - width, height, depth); + util_clear_color_texture(pipe, dst->texture, dst->format, color, + dst->u.tex.level, dstx, dsty, + dst->u.tex.first_layer, width, height, depth); } } @@ -674,7 +674,8 @@ util_clear_texture(struct pipe_context *pipe, else desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); - util_clear_color_texture(pipe, tex, &color, level, box->x, box->y, box->z, + util_clear_color_texture(pipe, tex, tex->format, &color, level, + box->x, box->y, box->z, box->width, box->height, box->depth); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: implement util_clear_texture
Module: Mesa Branch: master Commit: 3f9c5d62441eba38e8b1592aba965ed5db6fd89b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3f9c5d62441eba38e8b1592aba965ed5db6fd89b Author: Lars Hamre Date: Wed Feb 22 10:56:41 2017 -0500 gallium: implement util_clear_texture v3: have util_clear_texture mirror the pipe function (Roland Scheidegger) v2: rework util clear functions such that they operate on a resource instead of a surface (Roland Scheidegger) Creates a util_clear_texture function for implementing the GL_ARB_clear_texture in softpipe and llvmpipe. Signed-off-by: Lars Hamre Reviewed-by: Roland Scheidegger Reviewed-by: Edward O'Callaghan --- src/gallium/auxiliary/util/u_surface.c | 386 - src/gallium/auxiliary/util/u_surface.h | 7 + 2 files changed, 248 insertions(+), 145 deletions(-) diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index a9ed006..f2a471d 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -388,6 +388,66 @@ no_src_map: ; } +static void +util_clear_color_texture_helper(struct pipe_transfer *dst_trans, +ubyte *dst_map, +enum pipe_format format, +const union pipe_color_union *color, +unsigned width, unsigned height, unsigned depth) +{ + union util_color uc; + + assert(dst_trans->stride > 0); + + if (util_format_is_pure_integer(format)) { + /* + * We expect int/uint clear values here, though some APIs + * might disagree (but in any case util_pack_color() + * couldn't handle it)... + */ + if (util_format_is_pure_sint(format)) { + util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1); + } else { + assert(util_format_is_pure_uint(format)); + util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1); + } + } else { + util_pack_color(color->f, format, &uc); + } + + util_fill_box(dst_map, format, + dst_trans->stride, dst_trans->layer_stride, + 0, 0, 0, width, height, depth, &uc); +} + +static void +util_clear_color_texture(struct pipe_context *pipe, + struct pipe_resource *texture, + const union pipe_color_union *color, + unsigned level, + unsigned dstx, unsigned dsty, unsigned dstz, + unsigned width, unsigned height, unsigned depth) +{ + struct pipe_transfer *dst_trans; + ubyte *dst_map; + enum pipe_format format = texture->format; + + dst_map = pipe_transfer_map_3d(pipe, + texture, + level, + PIPE_TRANSFER_WRITE, + dstx, dsty, dstz, + width, height, depth, + &dst_trans); + if (!dst_map) + return; + + if (dst_trans->stride > 0) { + util_clear_color_texture_helper(dst_trans, dst_map, format, color, + width, height, depth); + } + pipe->transfer_unmap(pipe, dst_trans); +} #define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) @@ -410,8 +470,6 @@ util_clear_render_target(struct pipe_context *pipe, { struct pipe_transfer *dst_trans; ubyte *dst_map; - union util_color uc; - unsigned max_layer; assert(dst->texture); if (!dst->texture) @@ -426,56 +484,202 @@ util_clear_render_target(struct pipe_context *pipe, unsigned pixstride = util_format_get_blocksize(dst->format); dx = (dst->u.buf.first_element + dstx) * pixstride; w = width * pixstride; - max_layer = 0; dst_map = pipe_transfer_map(pipe, dst->texture, 0, 0, PIPE_TRANSFER_WRITE, dx, 0, w, 1, &dst_trans); + if (dst_map) { + util_clear_color_texture_helper(dst_trans, dst_map, dst->format, color, + width, height, 1); + pipe->transfer_unmap(pipe, dst_trans); + } } else { - max_layer = dst->u.tex.last_layer - dst->u.tex.first_layer; - dst_map = pipe_transfer_map_3d(pipe, - dst->texture, - dst->u.tex.level, - PIPE_TRANSFER_WRITE, - dstx, dsty, dst->u.tex.first_layer, - width, height, max_layer + 1, &dst_trans); + unsigned depth = dst->u.tex.
Mesa (master): docs: update features.txt for GL_ARB_clear_texture with llvmpipe and softpipe
Module: Mesa Branch: master Commit: caf4252a01dc95fbc47e8a50988040f67396b278 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=caf4252a01dc95fbc47e8a50988040f67396b278 Author: Lars Hamre Date: Wed Feb 22 10:56:44 2017 -0500 docs: update features.txt for GL_ARB_clear_texture with llvmpipe and softpipe Signed-off-by: Lars Hamre Reviewed-by: Roland Scheidegger Reviewed-by: Edward O'Callaghan --- docs/features.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 346ba28..d9528e9 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -192,7 +192,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, radeonsi GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storage DONE (i965, nv50, r600) - GL_ARB_clear_texture DONE (i965, nv50, r600) + GL_ARB_clear_texture DONE (i965, nv50, r600, llvmpipe, softpipe) GL_ARB_enhanced_layouts DONE (i965, nv50, llvmpipe, softpipe) - compile-time constant expressions DONE - explicit byte offsets for blocksDONE ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: enable clear_texture with util_clear_texture
Module: Mesa Branch: master Commit: 12f2058b47c51f1357b622e77c703d5eb05bce50 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=12f2058b47c51f1357b622e77c703d5eb05bce50 Author: Lars Hamre Date: Wed Feb 22 10:56:42 2017 -0500 llvmpipe: enable clear_texture with util_clear_texture Passes all corresponding piglit tests. Signed-off-by: Lars Hamre Reviewed-by: Roland Scheidegger Reviewed-by: Edward O'Callaghan --- src/gallium/drivers/llvmpipe/lp_screen.c | 3 ++- src/gallium/drivers/llvmpipe/lp_surface.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 76a30a6..2633b0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -307,6 +307,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: return 1; + case PIPE_CAP_CLEAR_TEXTURE: + return 1; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: @@ -315,7 +317,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: - case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_MULTI_DRAW_INDIRECT: diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 784db7f..953b26e 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -231,7 +231,8 @@ llvmpipe_init_surface_functions(struct llvmpipe_context *lp) lp->pipe.clear_depth_stencil = llvmpipe_clear_depth_stencil; lp->pipe.create_surface = llvmpipe_create_surface; lp->pipe.surface_destroy = llvmpipe_surface_destroy; - /* These two are not actually functions dealing with surfaces */ + /* These are not actually functions dealing with surfaces */ + lp->pipe.clear_texture = util_clear_texture; lp->pipe.resource_copy_region = lp_resource_copy; lp->pipe.blit = lp_blit; lp->pipe.flush_resource = lp_flush_resource; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit