VERSION | 2 debian/changelog | 2 src/compiler/Makefile.sources | 1 src/compiler/glsl/ast_to_hir.cpp | 17 src/compiler/glsl/builtin_variables.cpp | 13 src/compiler/glsl/linker.cpp | 20 - src/compiler/nir/nir.h | 2 src/compiler/nir/nir_lower_alu_to_scalar.c | 2 src/compiler/nir/nir_opt_algebraic.py | 2 src/compiler/nir/nir_propagate_invariant.c | 196 ++++++++++ src/gallium/auxiliary/gallivm/lp_bld_arit.c | 6 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 34 + src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 5 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 4 src/gallium/drivers/radeonsi/si_pm4.h | 2 src/gallium/drivers/radeonsi/si_state.c | 13 src/intel/vulkan/anv_cmd_buffer.c | 2 src/intel/vulkan/anv_device.c | 2 src/intel/vulkan/anv_meta_clear.c | 1 src/intel/vulkan/anv_pipeline.c | 5 src/intel/vulkan/anv_private.h | 4 src/intel/vulkan/gen7_pipeline.c | 1 src/intel/vulkan/gen8_cmd_buffer.c | 41 +- src/intel/vulkan/gen8_pipeline.c | 6 src/intel/vulkan/genX_cmd_buffer.c | 16 src/mesa/drivers/dri/i965/brw_context.c | 2 src/mesa/drivers/dri/i965/brw_fs.cpp | 5 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++ src/mesa/drivers/dri/i965/brw_tcs.c | 6 src/mesa/main/image.c | 8 src/mesa/main/mtypes.h | 2 src/mesa/main/pipelineobj.c | 17 src/mesa/program/prog_statevars.c | 19 src/mesa/program/prog_statevars.h | 2 src/mesa/state_tracker/st_cb_compute.c | 3 src/mesa/state_tracker/st_cb_copyimage.c | 3 src/mesa/state_tracker/st_cb_texture.c | 9 src/mesa/state_tracker/st_gen_mipmap.c | 3 38 files changed, 454 insertions(+), 77 deletions(-)
New commits: commit 5ee64a01c106975169727603b64129e8ed460003 Author: Timo Aaltonen <[email protected]> Date: Wed Jun 22 15:07:20 2016 +0300 bump version diff --git a/debian/changelog b/debian/changelog index b5ce92a..ed2141c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -mesa (12.0.0~rc3-1) UNRELEASED; urgency=medium +mesa (12.0.0~rc4-1) UNRELEASED; urgency=medium * New upstream release candidate. * symbols: Updated. commit 5e0b11cb6dbeab7ca6a1ba2edca56701cdfde96c Author: Emil Velikov <[email protected]> Date: Tue Jun 21 13:32:04 2016 +0100 Update version to 12.0.0-rc4 Signed-off-by: Emil Velikov <[email protected]> diff --git a/VERSION b/VERSION index 6ef7d92..076ead7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -12.0.0-rc3 +12.0.0-rc4 commit 6306930c3f0cd790e9d8995468559453b6c75b2c Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 10 15:59:58 2016 +0200 st/mesa: flush bitmap cache before CopyImageSubData Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit f9ddd52317caf14a21ec7c040fd4bb944f9842e4) diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c index 617e470..8afb861 100644 --- a/src/mesa/state_tracker/st_cb_copyimage.c +++ b/src/mesa/state_tracker/st_cb_copyimage.c @@ -23,6 +23,7 @@ */ #include "state_tracker/st_context.h" +#include "state_tracker/st_cb_bitmap.h" #include "state_tracker/st_cb_copyimage.h" #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_texture.h" @@ -547,6 +548,8 @@ st_CopyImageSubData(struct gl_context *ctx, struct pipe_box box; int src_level, dst_level; + st_flush_bitmap_cache(st); + if (src_image) { struct st_texture_image *src = st_texture_image(src_image); src_res = src->pt; commit 76377387c2d44d0fbae21763386ac86ffb54c635 Author: Nicolai Hähnle <[email protected]> Date: Thu Jun 9 12:22:31 2016 +0200 st/mesa: flush bitmap cache before texture functions As far as I can tell, a sequence of glBitmap followed by texture functions that refer to a texture bound as the framebuffer is well within what should be allowed. Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit e7fff3cfe156e13198107e5e76a77fb79ed02173) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index cfe9c4b..cf1c351 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -48,6 +48,7 @@ #include "state_tracker/st_debug.h" #include "state_tracker/st_context.h" +#include "state_tracker/st_cb_bitmap.h" #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_cb_flush.h" #include "state_tracker/st_cb_texture.h" @@ -1716,6 +1717,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, unsigned dstz = texImage->Face + texImage->TexObject->MinLayer; unsigned dst_level = 0; + st_flush_bitmap_cache(st); + if (stObj->pt == stImage->pt) dst_level = texImage->TexObject->MinLevel + texImage->Level; @@ -2181,6 +2184,8 @@ st_GetTexSubImage(struct gl_context * ctx, assert(!_mesa_is_format_etc2(texImage->TexFormat) && texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); + st_flush_bitmap_cache(st); + if (!st->prefer_blit_based_texture_transfer && !_mesa_is_format_compressed(texImage->TexFormat)) { /* Try to avoid the fallback if we're doing texture decompression here */ @@ -2644,6 +2649,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims, unsigned bind; GLint srcY0, srcY1; + st_flush_bitmap_cache(st); + assert(!_mesa_is_format_etc2(texImage->TexFormat) && texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); @@ -3166,6 +3173,8 @@ st_ClearTexSubImage(struct gl_context *ctx, if (!pt) return; + st_flush_bitmap_cache(st); + u_box_3d(xoffset, yoffset, zoffset + texImage->Face, width, height, depth, &box); if (texImage->TexObject->Immutable) { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index a14bbfa..adf02e7 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -40,6 +40,7 @@ #include "st_context.h" #include "st_texture.h" #include "st_gen_mipmap.h" +#include "st_cb_bitmap.h" #include "st_cb_texture.h" @@ -96,6 +97,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, if (lastLevel == 0) return; + st_flush_bitmap_cache(st); + /* The texture isn't in a "complete" state yet so set the expected * lastLevel here, since it won't get done in st_finalize_texture(). */ commit 6775b169cdffecd373d57847e5d71db3fe39409a Author: Nicolai Hähnle <[email protected]> Date: Thu Jun 9 12:12:34 2016 +0200 st/mesa: flush bitmap cache before compute dispatch In the unlikely case that a program uses glBitmap to render to a framebuffer whose texture is bound in a compute shader. Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit c542b7e43d3a504456518c9f407e21c4e7e5fa88) diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c index bfc6d96..063d750 100644 --- a/src/mesa/state_tracker/st_cb_compute.c +++ b/src/mesa/state_tracker/st_cb_compute.c @@ -28,6 +28,7 @@ #include "main/state.h" #include "st_atom.h" #include "st_context.h" +#include "st_cb_bitmap.h" #include "st_cb_bufferobjects.h" #include "st_cb_compute.h" @@ -44,6 +45,8 @@ static void st_dispatch_compute_common(struct gl_context *ctx, struct pipe_context *pipe = st->pipe; struct pipe_grid_info info = { 0 }; + st_flush_bitmap_cache(st); + if (ctx->NewState) _mesa_update_state(ctx); commit a0235eb0f716e05c290cad66292b703c2178af91 Author: Kenneth Graunke <[email protected]> Date: Wed Jun 8 16:09:02 2016 -0700 i965: Fix multiplication of immediates on Cherryview/Broxton. Cherryview and Broxton don't support DW x DW multiplication. We have piles of code to handle this, but apparently weren't retyping in the immediate case. For example, tests/spec/arb_tessellation_shader/execution/dvec3-vs-tcs-tes makes the simulator angry about instructions such as: mul(8) r18<1>:D r10.0<8;8,1>:D 0x00000003:D Just retype to W or UW. It should be safe on all platforms. Cc: "12.0" <[email protected]> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 Reviewed-by: Matt Turner <[email protected]> Reviewed-by: Jordan Justen <[email protected]> Signed-off-by: Kenneth Graunke <[email protected]> (cherry picked from commit cd89c834a8b3b4e5f5874c8e1f90c9b01d541181) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0347b0a..8337774 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3564,7 +3564,10 @@ fs_visitor::lower_integer_multiplication() ibld.MOV(imm, inst->src[1]); ibld.MUL(inst->dst, imm, inst->src[0]); } else { - ibld.MUL(inst->dst, inst->src[0], inst->src[1]); + const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD); + ibld.MUL(inst->dst, inst->src[0], + ud ? brw_imm_uw(inst->src[1].ud) + : brw_imm_w(inst->src[1].d)); } } else { /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot commit 09a098bdeb89baacd6bbadc4180daf9c2ffaa840 Author: Jason Ekstrand <[email protected]> Date: Tue Jun 14 08:40:49 2016 -0700 anv: Add proper support for depth clamping Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit eb6764c4a73006eee32e19e3afc6eab100a2ce16) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index f864248..97300c3 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -380,7 +380,7 @@ void anv_GetPhysicalDeviceFeatures( .logicOp = true, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, - .depthClamp = false, + .depthClamp = true, .depthBiasClamp = false, .fillModeNonSolid = true, .depthBounds = false, diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index fe750c8..7ec0608 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -173,6 +173,7 @@ create_pipeline(struct anv_device *device, .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, .depthBiasEnable = false, + .depthClampEnable = true, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index e41f623..32594f7 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1165,6 +1165,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->batch.relocs = &pipeline->batch_relocs; copy_non_dynamic_state(pipeline, pCreateInfo); + pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState && + pCreateInfo->pRasterizationState->depthClampEnable; pipeline->use_repclear = extra && extra->use_repclear; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f5500c5..052ced4 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1360,7 +1360,8 @@ VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, + bool depth_clamp_enable); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); @@ -1485,6 +1486,8 @@ struct anv_pipeline { uint32_t cs_right_mask; + bool depth_clamp_enable; + struct { uint32_t sf[7]; uint32_t depth_stencil_state[3]; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index f069db9..dd34d71 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -196,6 +196,7 @@ genX(graphics_pipeline_create)( clip.ClipEnable = !(extra && extra->use_rectlist), clip.APIMode = APIMODE_OGL, clip.ViewportXYClipTestEnable = true, + clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable, clip.ClipMode = CLIPMODE_NORMAL, clip.TriangleStripListProvokingVertexSelect = 0, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 395d0da..e22b4e2 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -77,7 +77,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } void -gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) +gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, + bool depth_clamp_enable) { uint32_t count = cmd_buffer->state.dynamic.viewport.count; const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; @@ -88,8 +89,8 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) const VkViewport *vp = &viewports[i]; struct GENX(CC_VIEWPORT) cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth, + .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f, + .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f, }; GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 54585c3..2a96be0 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -85,11 +85,11 @@ emit_rs_state(struct anv_pipeline *pipeline, .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->use_rectlist), #if GEN_GEN == 8 - .ViewportZClipTestEnable = true, + .ViewportZClipTestEnable = !pipeline->depth_clamp_enable, #else /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ - .ViewportZFarClipTestEnable = true, - .ViewportZNearClipTestEnable = true, + .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable, + .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable, #endif .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 58f5e0b..3d628df 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -525,9 +525,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (dirty) gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gen8_cmd_buffer_emit_viewport(cmd_buffer); - gen8_cmd_buffer_emit_depth_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | + ANV_CMD_DIRTY_PIPELINE)) { + gen8_cmd_buffer_emit_depth_viewport(cmd_buffer, + pipeline->depth_clamp_enable); } if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) commit f3c8dde2e4cac98ab190c0378e20424f0b59d9ef Author: Jason Ekstrand <[email protected]> Date: Tue Jun 14 08:15:34 2016 -0700 anv/cmd_buffer: Split emit_viewport in two Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 8a46b505cb2c7255ad430b56c1ce0dfa9c13c559) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cd3588a..f5500c5 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1360,6 +1360,7 @@ VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index df4036a..395d0da 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -40,8 +40,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; struct anv_state sf_clip_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); for (uint32_t i = 0; i < count; i++) { const VkViewport *vp = &viewports[i]; @@ -65,29 +63,45 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) .YMaxViewPort = vp->y + vp->height - 1, }; + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(sf_clip_state); + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { + clip.SFClipViewportPointer = sf_clip_state.offset; + } +} + +void +gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t count = cmd_buffer->state.dynamic.viewport.count; + const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + struct GENX(CC_VIEWPORT) cc_viewport = { .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth + .MaximumDepth = vp->maxDepth, }; - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, - &sf_clip_viewport); GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); } - if (!cmd_buffer->device->info.has_llc) { - anv_state_clflush(sf_clip_state); + if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); - } anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { cc.CCViewportPointer = cc_state.offset; } - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { - clip.SFClipViewportPointer = sf_clip_state.offset; - } } #endif diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c62bed4..58f5e0b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -525,8 +525,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (dirty) gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { gen8_cmd_buffer_emit_viewport(cmd_buffer); + gen8_cmd_buffer_emit_depth_viewport(cmd_buffer); + } if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); commit 3fddb9fd46a6066d8ecf0bd19a370acbbbc05b2b Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 17:09:37 2016 -0700 anv/cmd_buffer: Set depth/stencil extent based on the image It used to be based on the framebuffer which isn't quite right. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 20e95a746df34923eb4aac5e7f1ab6d722432d89) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d9acf58..c62bed4 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1024,11 +1024,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) db.DepthBufferObjectControlState = GENX(MOCS), db.SurfacePitch = image->depth_surface.isl.row_pitch - 1; - db.Height = fb->height - 1; - db.Width = fb->width - 1; - db.LOD = 0; - db.Depth = 1 - 1; - db.MinimumArrayElement = 0; + db.Height = image->extent.height - 1; + db.Width = image->extent.width - 1; + db.LOD = iview->base_mip; + db.Depth = image->array_size - 1; /* FIXME: 3-D */ + db.MinimumArrayElement = iview->base_layer; #if GEN_GEN >= 8 db.SurfaceQPitch = commit f614a1f4d88d02f429f29a4e95596e4a40ba7cce Author: Jason Ekstrand <[email protected]> Date: Wed Jun 15 14:30:33 2016 -0700 anv/cmd_buffer: Don't crash if push constants are provided for missing stages Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit b65f2e4163c9180e6a022c0afec018b08e4c5aa5) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5be5f3e..24c18fe 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -1038,7 +1038,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.pipeline->prog_data[stage]; /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data->nr_params == 0) + if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) return (struct anv_state) { .offset = 0 }; struct anv_state state = commit f4bc7218d59d55825c4ab2b76e6134827f10d401 Author: Jason Ekstrand <[email protected]> Date: Thu Jun 16 10:57:39 2016 -0700 anv/pipeline: Do invariance propagation on SPIR-V shaders Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit e6c2fe451962e364f30f689dc48c34e2b6161b25) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 60b7c6b..e41f623 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -165,6 +165,9 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_remove_dead_variables(nir, nir_var_system_value); nir_validate_shader(nir); + nir_propagate_invariant(nir); + nir_validate_shader(nir); + nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, false); nir_lower_system_values(nir); commit 77f241bd37e7d0a76a0ac9223bc4cebba322994c Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 14:41:05 2016 -0700 nir/alu_to_scalar: Respect the exact ALU operation qualifier Just setting builder->exact isn't sufficient because that only applies to instructions that are built with the builder but instructions created manually and only inserted using the builder are left alone. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit bec07b729242f6a2dcf5a12ce75bf8b07ea658e0) diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index b491791..4f72cf7 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -56,6 +56,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; } + chan->exact = instr->exact; nir_builder_instr_insert(builder, &chan->instr); @@ -229,6 +230,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size); lower->dest.saturate = instr->dest.saturate; comps[chan] = &lower->dest.dest.ssa; + lower->exact = instr->exact; nir_builder_instr_insert(b, &lower->instr); } commit deedb368de7dc50f7196af440c338dcf6a361564 Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 12:47:19 2016 -0700 nir: Add a pass for propagating invariant decorations This pass is similar to propagate_invariance in the GLSL compiler. The real "output" of this pass is that any algebraic operations which are eventually consumed by an invariant variable get marked as "exact". Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 202751fbb7e3d35c1aa84f325f862245dab67f6c) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 09a756b..bbd5d14 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -231,6 +231,7 @@ NIR_FILES = \ nir/nir_phi_builder.c \ nir/nir_phi_builder.h \ nir/nir_print.c \ + nir/nir_propagate_invariant.c \ nir/nir_remove_dead_variables.c \ nir/nir_repair_ssa.c \ nir/nir_search.c \ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 20f6520..9816ed6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2290,6 +2290,8 @@ bool nir_lower_returns(nir_shader *shader); bool nir_inline_functions(nir_shader *shader); +bool nir_propagate_invariant(nir_shader *shader); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c new file mode 100644 index 0000000..7b5bd6c --- /dev/null +++ b/src/compiler/nir/nir_propagate_invariant.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" + +static void +add_src(nir_src *src, struct set *invariants) +{ + if (src->is_ssa) { + _mesa_set_add(invariants, src->ssa); + } else { + _mesa_set_add(invariants, src->reg.reg); + } +} + +static bool +add_src_cb(nir_src *src, void *state) +{ + add_src(src, state); + return true; +} + +static bool +dest_is_invariant(nir_dest *dest, struct set *invariants) +{ + if (dest->is_ssa) { + return _mesa_set_search(invariants, &dest->ssa); + } else { + return _mesa_set_search(invariants, dest->reg.reg); + } +} + +static void +add_cf_node(nir_cf_node *cf, struct set *invariants) +{ + if (cf->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(cf); + add_src(&if_stmt->condition, invariants); + } + + if (cf->parent) + add_cf_node(cf->parent, invariants); +} + +static void +add_var(nir_variable *var, struct set *invariants) +{ + _mesa_set_add(invariants, var); +} + +static bool +var_is_invariant(nir_variable *var, struct set * invariants) +{ + return var->data.invariant || _mesa_set_search(invariants, var); +} + +static void +propagate_invariant_instr(nir_instr *instr, struct set *invariants) +{ + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!dest_is_invariant(&alu->dest.dest, invariants)) + break; + + alu->exact = true; + nir_foreach_src(instr, add_src_cb, invariants); + break; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (dest_is_invariant(&tex->dest, invariants)) + nir_foreach_src(instr, add_src_cb, invariants); + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_copy_var: + /* If the destination is invariant then so is the source */ + if (var_is_invariant(intrin->variables[0]->var, invariants)) + add_var(intrin->variables[1]->var, invariants); + break; + + case nir_intrinsic_load_var: + if (dest_is_invariant(&intrin->dest, invariants)) + add_var(intrin->variables[0]->var, invariants); + break; + + case nir_intrinsic_store_var: + if (var_is_invariant(intrin->variables[0]->var, invariants)) + add_src(&intrin->src[0], invariants); + break; + + default: + /* Nothing to do */ + break; + } + } + + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_load_const: + break; /* Nothing to do */ + + case nir_instr_type_phi: { + nir_phi_instr *phi = nir_instr_as_phi(instr); + if (!dest_is_invariant(&phi->dest, invariants)) + break; + + nir_foreach_phi_src(src, phi) { + add_src(&src->src, invariants); + add_cf_node(&src->pred->cf_node, invariants); + } + break; + } + + case nir_instr_type_call: + unreachable("This pass must be run after function inlining"); + + case nir_instr_type_parallel_copy: + default: + unreachable("Cannot have this instruction type"); + } +} + +static bool +propagate_invariant_impl(nir_function_impl *impl, struct set *invariants) +{ + bool progress = false; + + while (true) { + uint32_t prev_entries = invariants->entries; + + nir_foreach_block_reverse(block, impl) { + nir_foreach_instr_reverse(instr, block) + propagate_invariant_instr(instr, invariants); + } + + /* Keep running until we make no more progress. */ + if (invariants->entries > prev_entries) { + progress = true; + continue; + } else { + break; + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + } + + return progress; +} + +bool +nir_propagate_invariant(nir_shader *shader) +{ + /* Hash set of invariant things */ + struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl && propagate_invariant_impl(function->impl, invariants)) + progress = true; + } + + _mesa_set_destroy(invariants, NULL); + + return progress; +} commit bac23b13eb75a7bacdec439eb4c239a8dedb24e7 Author: Jason Ekstrand <[email protected]> Date: Sat Jun 18 12:30:36 2016 -0700 nir/algebraic: Remove imprecise flog2 optimizations While mathematically correct, these two optimizations result in an expression with substantially lower precision than the original. For any positive finite floating-point value, log2(x) is well-defined and finite. More precisely, it is in the range [-150, 150] so any sum of logarithms log2(a) + log2(b) is also well-defined and finite as long as a and b are both positive and finite. However, if a and b are either very small or very large, their product may get flushed to infinity or zero causing log2(a * b) to be nowhere close to log2(a) + log2(b). This imprecision was causing incorrect rendering in Talos Principal because part of its HDR rendering process involves doing 8 texture operations, clamping the result to [0, 65000], taking a dot-product with a constant, and then taking the log2. This is done 6 or 8 times and summed to produce the final result which is written to a red texture. In cases where you have a region of the screen that is very dark, it can end up getting a result value of -inf which is not what is intended. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Matt Turner <[email protected]> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96425 Cc: "11.1 11.2 12.0" <[email protected]> (cherry picked from commit 68e308d85355079ad93bd4e16cba164784740fdf) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f8db2b6..a7a541a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -224,8 +224,6 @@ optimizations = [ (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))), (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), - (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), - (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), # Division and reciprocal (('~fdiv', 1.0, a), ('frcp', a)), commit b03b256e921c3f7cebfcf9efd5bdf7b403b9c961 Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 17 10:48:53 2016 +0200 radeonsi: fix calculation of valid RB mask per SE The old calculation treated too many RBs as disabled. Cc: 11.0 11.1 11.2 12.0 <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit c95175581e983642dc4b23d059e6eaff5b79d2db) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 47af9c8..96da179 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3478,10 +3478,15 @@ si_write_harvested_raster_configs(struct si_context *sctx, unsigned se_mask[4]; unsigned se; - se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; - se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; - se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; - se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; + se_mask[0] = ((1 << rb_per_se) - 1); + se_mask[1] = (se_mask[0] << rb_per_se); + se_mask[2] = (se_mask[1] << rb_per_se); + se_mask[3] = (se_mask[2] << rb_per_se); + + se_mask[0] &= rb_mask; + se_mask[1] &= rb_mask; + se_mask[2] &= rb_mask; + se_mask[3] &= rb_mask; assert(num_se == 1 || num_se == 2 || num_se == 4); assert(sh_per_se == 1 || sh_per_se == 2); commit 52ae654569cb1e0e42f12b492fdd62d7cf20dc12 Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 17 10:30:44 2016 +0200 radeonsi: raise SI_PM4_MAX_DW The old limit, introduced in commit afa752d3f03ac6697581ff5d324e8ac0512ef513, was exceeded by 4 SE configurations which hit si_write_harvested_raster_configs. Cc: 11.1 11.2 12.0 <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit 6c2e63698290d3ea868eefcc3e4dd51dc1e16c64) diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 309a596..35fa6c3 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -29,7 +29,7 @@ #include "radeon/radeon_winsys.h" -#define SI_PM4_MAX_DW 160

