VERSION | 2 src/compiler/Makefile.sources | 1 src/compiler/glsl/ast_to_hir.cpp | 17 src/compiler/glsl/builtin_variables.cpp | 13 src/compiler/glsl/linker.cpp | 20 - src/compiler/nir/nir.h | 2 src/compiler/nir/nir_lower_alu_to_scalar.c | 2 src/compiler/nir/nir_opt_algebraic.py | 2 src/compiler/nir/nir_propagate_invariant.c | 196 ++++++++++ src/gallium/auxiliary/gallivm/lp_bld_arit.c | 6 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 34 + src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 5 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 4 src/gallium/drivers/radeonsi/si_pm4.h | 2 src/gallium/drivers/radeonsi/si_state.c | 13 src/intel/vulkan/anv_cmd_buffer.c | 2 src/intel/vulkan/anv_device.c | 2 src/intel/vulkan/anv_meta_clear.c | 1 src/intel/vulkan/anv_pipeline.c | 5 src/intel/vulkan/anv_private.h | 4 src/intel/vulkan/gen7_pipeline.c | 1 src/intel/vulkan/gen8_cmd_buffer.c | 41 +- src/intel/vulkan/gen8_pipeline.c | 6 src/intel/vulkan/genX_cmd_buffer.c | 16 src/mesa/drivers/dri/i965/brw_context.c | 2 src/mesa/drivers/dri/i965/brw_fs.cpp | 5 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 ++ src/mesa/drivers/dri/i965/brw_tcs.c | 6 src/mesa/main/image.c | 8 src/mesa/main/mtypes.h | 2 src/mesa/main/pipelineobj.c | 17 src/mesa/program/prog_statevars.c | 19 src/mesa/program/prog_statevars.h | 2 src/mesa/state_tracker/st_cb_compute.c | 3 src/mesa/state_tracker/st_cb_copyimage.c | 3 src/mesa/state_tracker/st_cb_texture.c | 9 src/mesa/state_tracker/st_gen_mipmap.c | 3 37 files changed, 453 insertions(+), 76 deletions(-)
New commits: commit 5e0b11cb6dbeab7ca6a1ba2edca56701cdfde96c Author: Emil Velikov <[email protected]> Date: Tue Jun 21 13:32:04 2016 +0100 Update version to 12.0.0-rc4 Signed-off-by: Emil Velikov <[email protected]> diff --git a/VERSION b/VERSION index 6ef7d92..076ead7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -12.0.0-rc3 +12.0.0-rc4 commit 6306930c3f0cd790e9d8995468559453b6c75b2c Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 10 15:59:58 2016 +0200 st/mesa: flush bitmap cache before CopyImageSubData Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit f9ddd52317caf14a21ec7c040fd4bb944f9842e4) diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c index 617e470..8afb861 100644 --- a/src/mesa/state_tracker/st_cb_copyimage.c +++ b/src/mesa/state_tracker/st_cb_copyimage.c @@ -23,6 +23,7 @@ */ #include "state_tracker/st_context.h" +#include "state_tracker/st_cb_bitmap.h" #include "state_tracker/st_cb_copyimage.h" #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_texture.h" @@ -547,6 +548,8 @@ st_CopyImageSubData(struct gl_context *ctx, struct pipe_box box; int src_level, dst_level; + st_flush_bitmap_cache(st); + if (src_image) { struct st_texture_image *src = st_texture_image(src_image); src_res = src->pt; commit 76377387c2d44d0fbae21763386ac86ffb54c635 Author: Nicolai Hähnle <[email protected]> Date: Thu Jun 9 12:22:31 2016 +0200 st/mesa: flush bitmap cache before texture functions As far as I can tell, a sequence of glBitmap followed by texture functions that refer to a texture bound as the framebuffer is well within what should be allowed. Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit e7fff3cfe156e13198107e5e76a77fb79ed02173) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index cfe9c4b..cf1c351 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -48,6 +48,7 @@ #include "state_tracker/st_debug.h" #include "state_tracker/st_context.h" +#include "state_tracker/st_cb_bitmap.h" #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_cb_flush.h" #include "state_tracker/st_cb_texture.h" @@ -1716,6 +1717,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, unsigned dstz = texImage->Face + texImage->TexObject->MinLayer; unsigned dst_level = 0; + st_flush_bitmap_cache(st); + if (stObj->pt == stImage->pt) dst_level = texImage->TexObject->MinLevel + texImage->Level; @@ -2181,6 +2184,8 @@ st_GetTexSubImage(struct gl_context * ctx, assert(!_mesa_is_format_etc2(texImage->TexFormat) && texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); + st_flush_bitmap_cache(st); + if (!st->prefer_blit_based_texture_transfer && !_mesa_is_format_compressed(texImage->TexFormat)) { /* Try to avoid the fallback if we're doing texture decompression here */ @@ -2644,6 +2649,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims, unsigned bind; GLint srcY0, srcY1; + st_flush_bitmap_cache(st); + assert(!_mesa_is_format_etc2(texImage->TexFormat) && texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); @@ -3166,6 +3173,8 @@ st_ClearTexSubImage(struct gl_context *ctx, if (!pt) return; + st_flush_bitmap_cache(st); + u_box_3d(xoffset, yoffset, zoffset + texImage->Face, width, height, depth, &box); if (texImage->TexObject->Immutable) { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index a14bbfa..adf02e7 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -40,6 +40,7 @@ #include "st_context.h" #include "st_texture.h" #include "st_gen_mipmap.h" +#include "st_cb_bitmap.h" #include "st_cb_texture.h" @@ -96,6 +97,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, if (lastLevel == 0) return; + st_flush_bitmap_cache(st); + /* The texture isn't in a "complete" state yet so set the expected * lastLevel here, since it won't get done in st_finalize_texture(). */ commit 6775b169cdffecd373d57847e5d71db3fe39409a Author: Nicolai Hähnle <[email protected]> Date: Thu Jun 9 12:12:34 2016 +0200 st/mesa: flush bitmap cache before compute dispatch In the unlikely case that a program uses glBitmap to render to a framebuffer whose texture is bound in a compute shader. Found by inspection. Cc: 11.2 12.0 <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit c542b7e43d3a504456518c9f407e21c4e7e5fa88) diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c index bfc6d96..063d750 100644 --- a/src/mesa/state_tracker/st_cb_compute.c +++ b/src/mesa/state_tracker/st_cb_compute.c @@ -28,6 +28,7 @@ #include "main/state.h" #include "st_atom.h" #include "st_context.h" +#include "st_cb_bitmap.h" #include "st_cb_bufferobjects.h" #include "st_cb_compute.h" @@ -44,6 +45,8 @@ static void st_dispatch_compute_common(struct gl_context *ctx, struct pipe_context *pipe = st->pipe; struct pipe_grid_info info = { 0 }; + st_flush_bitmap_cache(st); + if (ctx->NewState) _mesa_update_state(ctx); commit a0235eb0f716e05c290cad66292b703c2178af91 Author: Kenneth Graunke <[email protected]> Date: Wed Jun 8 16:09:02 2016 -0700 i965: Fix multiplication of immediates on Cherryview/Broxton. Cherryview and Broxton don't support DW x DW multiplication. We have piles of code to handle this, but apparently weren't retyping in the immediate case. For example, tests/spec/arb_tessellation_shader/execution/dvec3-vs-tcs-tes makes the simulator angry about instructions such as: mul(8) r18<1>:D r10.0<8;8,1>:D 0x00000003:D Just retype to W or UW. It should be safe on all platforms. Cc: "12.0" <[email protected]> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 Reviewed-by: Matt Turner <[email protected]> Reviewed-by: Jordan Justen <[email protected]> Signed-off-by: Kenneth Graunke <[email protected]> (cherry picked from commit cd89c834a8b3b4e5f5874c8e1f90c9b01d541181) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0347b0a..8337774 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3564,7 +3564,10 @@ fs_visitor::lower_integer_multiplication() ibld.MOV(imm, inst->src[1]); ibld.MUL(inst->dst, imm, inst->src[0]); } else { - ibld.MUL(inst->dst, inst->src[0], inst->src[1]); + const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD); + ibld.MUL(inst->dst, inst->src[0], + ud ? brw_imm_uw(inst->src[1].ud) + : brw_imm_w(inst->src[1].d)); } } else { /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot commit 09a098bdeb89baacd6bbadc4180daf9c2ffaa840 Author: Jason Ekstrand <[email protected]> Date: Tue Jun 14 08:40:49 2016 -0700 anv: Add proper support for depth clamping Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit eb6764c4a73006eee32e19e3afc6eab100a2ce16) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index f864248..97300c3 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -380,7 +380,7 @@ void anv_GetPhysicalDeviceFeatures( .logicOp = true, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, - .depthClamp = false, + .depthClamp = true, .depthBiasClamp = false, .fillModeNonSolid = true, .depthBounds = false, diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index fe750c8..7ec0608 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -173,6 +173,7 @@ create_pipeline(struct anv_device *device, .cullMode = VK_CULL_MODE_NONE, .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, .depthBiasEnable = false, + .depthClampEnable = true, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index e41f623..32594f7 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1165,6 +1165,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->batch.relocs = &pipeline->batch_relocs; copy_non_dynamic_state(pipeline, pCreateInfo); + pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState && + pCreateInfo->pRasterizationState->depthClampEnable; pipeline->use_repclear = extra && extra->use_repclear; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index f5500c5..052ced4 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1360,7 +1360,8 @@ VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, + bool depth_clamp_enable); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); @@ -1485,6 +1486,8 @@ struct anv_pipeline { uint32_t cs_right_mask; + bool depth_clamp_enable; + struct { uint32_t sf[7]; uint32_t depth_stencil_state[3]; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index f069db9..dd34d71 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -196,6 +196,7 @@ genX(graphics_pipeline_create)( clip.ClipEnable = !(extra && extra->use_rectlist), clip.APIMode = APIMODE_OGL, clip.ViewportXYClipTestEnable = true, + clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable, clip.ClipMode = CLIPMODE_NORMAL, clip.TriangleStripListProvokingVertexSelect = 0, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 395d0da..e22b4e2 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -77,7 +77,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) } void -gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) +gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer, + bool depth_clamp_enable) { uint32_t count = cmd_buffer->state.dynamic.viewport.count; const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; @@ -88,8 +89,8 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) const VkViewport *vp = &viewports[i]; struct GENX(CC_VIEWPORT) cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth, + .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f, + .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f, }; GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 54585c3..2a96be0 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -85,11 +85,11 @@ emit_rs_state(struct anv_pipeline *pipeline, .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], .ScissorRectangleEnable = !(extra && extra->use_rectlist), #if GEN_GEN == 8 - .ViewportZClipTestEnable = true, + .ViewportZClipTestEnable = !pipeline->depth_clamp_enable, #else /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ - .ViewportZFarClipTestEnable = true, - .ViewportZNearClipTestEnable = true, + .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable, + .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable, #endif .GlobalDepthOffsetEnableSolid = info->depthBiasEnable, .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 58f5e0b..3d628df 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -525,9 +525,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (dirty) gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) gen8_cmd_buffer_emit_viewport(cmd_buffer); - gen8_cmd_buffer_emit_depth_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | + ANV_CMD_DIRTY_PIPELINE)) { + gen8_cmd_buffer_emit_depth_viewport(cmd_buffer, + pipeline->depth_clamp_enable); } if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) commit f3c8dde2e4cac98ab190c0378e20424f0b59d9ef Author: Jason Ekstrand <[email protected]> Date: Tue Jun 14 08:15:34 2016 -0700 anv/cmd_buffer: Split emit_viewport in two Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 8a46b505cb2c7255ad430b56c1ce0dfa9c13c559) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index cd3588a..f5500c5 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1360,6 +1360,7 @@ VkResult anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer); void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index df4036a..395d0da 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -40,8 +40,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; struct anv_state sf_clip_state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); for (uint32_t i = 0; i < count; i++) { const VkViewport *vp = &viewports[i]; @@ -65,29 +63,45 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) .YMaxViewPort = vp->y + vp->height - 1, }; + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(sf_clip_state); + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { + clip.SFClipViewportPointer = sf_clip_state.offset; + } +} + +void +gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t count = cmd_buffer->state.dynamic.viewport.count; + const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports; + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + struct GENX(CC_VIEWPORT) cc_viewport = { .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth + .MaximumDepth = vp->maxDepth, }; - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, - &sf_clip_viewport); GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); } - if (!cmd_buffer->device->info.has_llc) { - anv_state_clflush(sf_clip_state); + if (!cmd_buffer->device->info.has_llc) anv_state_clflush(cc_state); - } anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { cc.CCViewportPointer = cc_state.offset; } - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { - clip.SFClipViewportPointer = sf_clip_state.offset; - } } #endif diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index c62bed4..58f5e0b 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -525,8 +525,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) if (dirty) gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) { gen8_cmd_buffer_emit_viewport(cmd_buffer); + gen8_cmd_buffer_emit_depth_viewport(cmd_buffer); + } if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) gen7_cmd_buffer_emit_scissor(cmd_buffer); commit 3fddb9fd46a6066d8ecf0bd19a370acbbbc05b2b Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 17:09:37 2016 -0700 anv/cmd_buffer: Set depth/stencil extent based on the image It used to be based on the framebuffer which isn't quite right. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 20e95a746df34923eb4aac5e7f1ab6d722432d89) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index d9acf58..c62bed4 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1024,11 +1024,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) db.DepthBufferObjectControlState = GENX(MOCS), db.SurfacePitch = image->depth_surface.isl.row_pitch - 1; - db.Height = fb->height - 1; - db.Width = fb->width - 1; - db.LOD = 0; - db.Depth = 1 - 1; - db.MinimumArrayElement = 0; + db.Height = image->extent.height - 1; + db.Width = image->extent.width - 1; + db.LOD = iview->base_mip; + db.Depth = image->array_size - 1; /* FIXME: 3-D */ + db.MinimumArrayElement = iview->base_layer; #if GEN_GEN >= 8 db.SurfaceQPitch = commit f614a1f4d88d02f429f29a4e95596e4a40ba7cce Author: Jason Ekstrand <[email protected]> Date: Wed Jun 15 14:30:33 2016 -0700 anv/cmd_buffer: Don't crash if push constants are provided for missing stages Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit b65f2e4163c9180e6a022c0afec018b08e4c5aa5) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5be5f3e..24c18fe 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -1038,7 +1038,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.pipeline->prog_data[stage]; /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data->nr_params == 0) + if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) return (struct anv_state) { .offset = 0 }; struct anv_state state = commit f4bc7218d59d55825c4ab2b76e6134827f10d401 Author: Jason Ekstrand <[email protected]> Date: Thu Jun 16 10:57:39 2016 -0700 anv/pipeline: Do invariance propagation on SPIR-V shaders Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit e6c2fe451962e364f30f689dc48c34e2b6161b25) diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 60b7c6b..e41f623 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -165,6 +165,9 @@ anv_shader_compile_to_nir(struct anv_device *device, nir_remove_dead_variables(nir, nir_var_system_value); nir_validate_shader(nir); + nir_propagate_invariant(nir); + nir_validate_shader(nir); + nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, false); nir_lower_system_values(nir); commit 77f241bd37e7d0a76a0ac9223bc4cebba322994c Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 14:41:05 2016 -0700 nir/alu_to_scalar: Respect the exact ALU operation qualifier Just setting builder->exact isn't sufficient because that only applies to instructions that are built with the builder but instructions created manually and only inserted using the builder are left alone. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit bec07b729242f6a2dcf5a12ce75bf8b07ea658e0) diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index b491791..4f72cf7 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -56,6 +56,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; } + chan->exact = instr->exact; nir_builder_instr_insert(builder, &chan->instr); @@ -229,6 +230,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size); lower->dest.saturate = instr->dest.saturate; comps[chan] = &lower->dest.dest.ssa; + lower->exact = instr->exact; nir_builder_instr_insert(b, &lower->instr); } commit deedb368de7dc50f7196af440c338dcf6a361564 Author: Jason Ekstrand <[email protected]> Date: Mon Jun 13 12:47:19 2016 -0700 nir: Add a pass for propagating invariant decorations This pass is similar to propagate_invariance in the GLSL compiler. The real "output" of this pass is that any algebraic operations which are eventually consumed by an invariant variable get marked as "exact". Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]> Cc: "12.0" <[email protected]> (cherry picked from commit 202751fbb7e3d35c1aa84f325f862245dab67f6c) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 09a756b..bbd5d14 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -231,6 +231,7 @@ NIR_FILES = \ nir/nir_phi_builder.c \ nir/nir_phi_builder.h \ nir/nir_print.c \ + nir/nir_propagate_invariant.c \ nir/nir_remove_dead_variables.c \ nir/nir_repair_ssa.c \ nir/nir_search.c \ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 20f6520..9816ed6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2290,6 +2290,8 @@ bool nir_lower_returns(nir_shader *shader); bool nir_inline_functions(nir_shader *shader); +bool nir_propagate_invariant(nir_shader *shader); + void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c new file mode 100644 index 0000000..7b5bd6c --- /dev/null +++ b/src/compiler/nir/nir_propagate_invariant.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" + +static void +add_src(nir_src *src, struct set *invariants) +{ + if (src->is_ssa) { + _mesa_set_add(invariants, src->ssa); + } else { + _mesa_set_add(invariants, src->reg.reg); + } +} + +static bool +add_src_cb(nir_src *src, void *state) +{ + add_src(src, state); + return true; +} + +static bool +dest_is_invariant(nir_dest *dest, struct set *invariants) +{ + if (dest->is_ssa) { + return _mesa_set_search(invariants, &dest->ssa); + } else { + return _mesa_set_search(invariants, dest->reg.reg); + } +} + +static void +add_cf_node(nir_cf_node *cf, struct set *invariants) +{ + if (cf->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(cf); + add_src(&if_stmt->condition, invariants); + } + + if (cf->parent) + add_cf_node(cf->parent, invariants); +} + +static void +add_var(nir_variable *var, struct set *invariants) +{ + _mesa_set_add(invariants, var); +} + +static bool +var_is_invariant(nir_variable *var, struct set * invariants) +{ + return var->data.invariant || _mesa_set_search(invariants, var); +} + +static void +propagate_invariant_instr(nir_instr *instr, struct set *invariants) +{ + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!dest_is_invariant(&alu->dest.dest, invariants)) + break; + + alu->exact = true; + nir_foreach_src(instr, add_src_cb, invariants); + break; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (dest_is_invariant(&tex->dest, invariants)) + nir_foreach_src(instr, add_src_cb, invariants); + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_copy_var: + /* If the destination is invariant then so is the source */ + if (var_is_invariant(intrin->variables[0]->var, invariants)) + add_var(intrin->variables[1]->var, invariants); + break; + + case nir_intrinsic_load_var: + if (dest_is_invariant(&intrin->dest, invariants)) + add_var(intrin->variables[0]->var, invariants); + break; + + case nir_intrinsic_store_var: + if (var_is_invariant(intrin->variables[0]->var, invariants)) + add_src(&intrin->src[0], invariants); + break; + + default: + /* Nothing to do */ + break; + } + } + + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_load_const: + break; /* Nothing to do */ + + case nir_instr_type_phi: { + nir_phi_instr *phi = nir_instr_as_phi(instr); + if (!dest_is_invariant(&phi->dest, invariants)) + break; + + nir_foreach_phi_src(src, phi) { + add_src(&src->src, invariants); + add_cf_node(&src->pred->cf_node, invariants); + } + break; + } + + case nir_instr_type_call: + unreachable("This pass must be run after function inlining"); + + case nir_instr_type_parallel_copy: + default: + unreachable("Cannot have this instruction type"); + } +} + +static bool +propagate_invariant_impl(nir_function_impl *impl, struct set *invariants) +{ + bool progress = false; + + while (true) { + uint32_t prev_entries = invariants->entries; + + nir_foreach_block_reverse(block, impl) { + nir_foreach_instr_reverse(instr, block) + propagate_invariant_instr(instr, invariants); + } + + /* Keep running until we make no more progress. */ + if (invariants->entries > prev_entries) { + progress = true; + continue; + } else { + break; + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + } + + return progress; +} + +bool +nir_propagate_invariant(nir_shader *shader) +{ + /* Hash set of invariant things */ + struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl && propagate_invariant_impl(function->impl, invariants)) + progress = true; + } + + _mesa_set_destroy(invariants, NULL); + + return progress; +} commit bac23b13eb75a7bacdec439eb4c239a8dedb24e7 Author: Jason Ekstrand <[email protected]> Date: Sat Jun 18 12:30:36 2016 -0700 nir/algebraic: Remove imprecise flog2 optimizations While mathematically correct, these two optimizations result in an expression with substantially lower precision than the original. For any positive finite floating-point value, log2(x) is well-defined and finite. More precisely, it is in the range [-150, 150] so any sum of logarithms log2(a) + log2(b) is also well-defined and finite as long as a and b are both positive and finite. However, if a and b are either very small or very large, their product may get flushed to infinity or zero causing log2(a * b) to be nowhere close to log2(a) + log2(b). This imprecision was causing incorrect rendering in Talos Principal because part of its HDR rendering process involves doing 8 texture operations, clamping the result to [0, 65000], taking a dot-product with a constant, and then taking the log2. This is done 6 or 8 times and summed to produce the final result which is written to a red texture. In cases where you have a region of the screen that is very dark, it can end up getting a result value of -inf which is not what is intended. Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Matt Turner <[email protected]> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96425 Cc: "11.1 11.2 12.0" <[email protected]> (cherry picked from commit 68e308d85355079ad93bd4e16cba164784740fdf) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f8db2b6..a7a541a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -224,8 +224,6 @@ optimizations = [ (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))), (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), - (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), - (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), # Division and reciprocal (('~fdiv', 1.0, a), ('frcp', a)), commit b03b256e921c3f7cebfcf9efd5bdf7b403b9c961 Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 17 10:48:53 2016 +0200 radeonsi: fix calculation of valid RB mask per SE The old calculation treated too many RBs as disabled. Cc: 11.0 11.1 11.2 12.0 <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit c95175581e983642dc4b23d059e6eaff5b79d2db) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 47af9c8..96da179 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3478,10 +3478,15 @@ si_write_harvested_raster_configs(struct si_context *sctx, unsigned se_mask[4]; unsigned se; - se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; - se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; - se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; - se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; + se_mask[0] = ((1 << rb_per_se) - 1); + se_mask[1] = (se_mask[0] << rb_per_se); + se_mask[2] = (se_mask[1] << rb_per_se); + se_mask[3] = (se_mask[2] << rb_per_se); + + se_mask[0] &= rb_mask; + se_mask[1] &= rb_mask; + se_mask[2] &= rb_mask; + se_mask[3] &= rb_mask; assert(num_se == 1 || num_se == 2 || num_se == 4); assert(sh_per_se == 1 || sh_per_se == 2); commit 52ae654569cb1e0e42f12b492fdd62d7cf20dc12 Author: Nicolai Hähnle <[email protected]> Date: Fri Jun 17 10:30:44 2016 +0200 radeonsi: raise SI_PM4_MAX_DW The old limit, introduced in commit afa752d3f03ac6697581ff5d324e8ac0512ef513, was exceeded by 4 SE configurations which hit si_write_harvested_raster_configs. Cc: 11.1 11.2 12.0 <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Reviewed-by: Marek Olšák <[email protected]> (cherry picked from commit 6c2e63698290d3ea868eefcc3e4dd51dc1e16c64) diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 309a596..35fa6c3 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -29,7 +29,7 @@ #include "radeon/radeon_winsys.h" -#define SI_PM4_MAX_DW 160 +#define SI_PM4_MAX_DW 176 #define SI_PM4_MAX_BO 1 // forward defines commit f675339b224b90cac161840716fcc348926f88c7 Author: Roland Scheidegger <[email protected]> Date: Sun Jun 19 03:56:11 2016 +0200 gallivm: don't use integer min/max sse intrinsics with llvm >= 3.9 Apparently, these are deprecated. There's some AutoUpgrade feature which is supposed to promote these to cmp/select, which apparently doesn't work with jit code. It is possible it's not actually even meant to work (see the bug filed against llvm which couldn't provide an answer neither) but in any case this is meant to be only temporary unless the intrinsics are really illegal. So, just use the fallback code (which should be cmp/select, we're actually doing cmp/sext/trunc/select, but in any case llvm 3.9 manages

