mesa: Changes to 'upstream-experimental'

Timo Aaltonen Wed, 22 Jun 2016 07:40:07 -0700

 VERSION                                                       |    2 
 src/compiler/Makefile.sources                                 |    1 
 src/compiler/glsl/ast_to_hir.cpp                              |   17 
 src/compiler/glsl/builtin_variables.cpp                       |   13 
 src/compiler/glsl/linker.cpp                                  |   20 -
 src/compiler/nir/nir.h                                        |    2 
 src/compiler/nir/nir_lower_alu_to_scalar.c                    |    2 
 src/compiler/nir/nir_opt_algebraic.py                         |    2 
 src/compiler/nir/nir_propagate_invariant.c                    |  196 ++++++++++
 src/gallium/auxiliary/gallivm/lp_bld_arit.c                   |    6 
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   34 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h   |    5 
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c                   |    4 
 src/gallium/drivers/radeonsi/si_pm4.h                         |    2 
 src/gallium/drivers/radeonsi/si_state.c                       |   13 
 src/intel/vulkan/anv_cmd_buffer.c                             |    2 
 src/intel/vulkan/anv_device.c                                 |    2 
 src/intel/vulkan/anv_meta_clear.c                             |    1 
 src/intel/vulkan/anv_pipeline.c                               |    5 
 src/intel/vulkan/anv_private.h                                |    4 
 src/intel/vulkan/gen7_pipeline.c                              |    1 
 src/intel/vulkan/gen8_cmd_buffer.c                            |   41 +-
 src/intel/vulkan/gen8_pipeline.c                              |    6 
 src/intel/vulkan/genX_cmd_buffer.c                            |   16 
 src/mesa/drivers/dri/i965/brw_context.c                       |    2 
 src/mesa/drivers/dri/i965/brw_fs.cpp                          |    5 
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp                      |   53 ++
 src/mesa/drivers/dri/i965/brw_tcs.c                           |    6 
 src/mesa/main/image.c                                         |    8 
 src/mesa/main/mtypes.h                                        |    2 
 src/mesa/main/pipelineobj.c                                   |   17 
 src/mesa/program/prog_statevars.c                             |   19 
 src/mesa/program/prog_statevars.h                             |    2 
 src/mesa/state_tracker/st_cb_compute.c                        |    3 
 src/mesa/state_tracker/st_cb_copyimage.c                      |    3 
 src/mesa/state_tracker/st_cb_texture.c                        |    9 
 src/mesa/state_tracker/st_gen_mipmap.c                        |    3 
 37 files changed, 453 insertions(+), 76 deletions(-)


New commits:
commit 5e0b11cb6dbeab7ca6a1ba2edca56701cdfde96c
Author: Emil Velikov <[email protected]>
Date:   Tue Jun 21 13:32:04 2016 +0100

    Update version to 12.0.0-rc4
    
    Signed-off-by: Emil Velikov <[email protected]>

diff --git a/VERSION b/VERSION
index 6ef7d92..076ead7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-12.0.0-rc3
+12.0.0-rc4

commit 6306930c3f0cd790e9d8995468559453b6c75b2c
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 10 15:59:58 2016 +0200

    st/mesa: flush bitmap cache before CopyImageSubData
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit f9ddd52317caf14a21ec7c040fd4bb944f9842e4)

diff --git a/src/mesa/state_tracker/st_cb_copyimage.c 
b/src/mesa/state_tracker/st_cb_copyimage.c
index 617e470..8afb861 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -23,6 +23,7 @@
  */
 
 #include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
 #include "state_tracker/st_cb_copyimage.h"
 #include "state_tracker/st_cb_fbo.h"
 #include "state_tracker/st_texture.h"
@@ -547,6 +548,8 @@ st_CopyImageSubData(struct gl_context *ctx,
    struct pipe_box box;
    int src_level, dst_level;
 
+   st_flush_bitmap_cache(st);
+
    if (src_image) {
       struct st_texture_image *src = st_texture_image(src_image);
       src_res = src->pt;

commit 76377387c2d44d0fbae21763386ac86ffb54c635
Author: Nicolai Hähnle <[email protected]>
Date:   Thu Jun 9 12:22:31 2016 +0200

    st/mesa: flush bitmap cache before texture functions
    
    As far as I can tell, a sequence of glBitmap followed by texture functions
    that refer to a texture bound as the framebuffer is well within what should
    be allowed.
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit e7fff3cfe156e13198107e5e76a77fb79ed02173)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index cfe9c4b..cf1c351 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -48,6 +48,7 @@
 
 #include "state_tracker/st_debug.h"
 #include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
 #include "state_tracker/st_cb_fbo.h"
 #include "state_tracker/st_cb_flush.h"
 #include "state_tracker/st_cb_texture.h"
@@ -1716,6 +1717,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
    unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
    unsigned dst_level = 0;
 
+   st_flush_bitmap_cache(st);
+
    if (stObj->pt == stImage->pt)
       dst_level = texImage->TexObject->MinLevel + texImage->Level;
 
@@ -2181,6 +2184,8 @@ st_GetTexSubImage(struct gl_context * ctx,
    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
 
+   st_flush_bitmap_cache(st);
+
    if (!st->prefer_blit_based_texture_transfer &&
        !_mesa_is_format_compressed(texImage->TexFormat)) {
       /* Try to avoid the fallback if we're doing texture decompression here */
@@ -2644,6 +2649,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
    unsigned bind;
    GLint srcY0, srcY1;
 
+   st_flush_bitmap_cache(st);
+
    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
 
@@ -3166,6 +3173,8 @@ st_ClearTexSubImage(struct gl_context *ctx,
    if (!pt)
       return;
 
+   st_flush_bitmap_cache(st);
+
    u_box_3d(xoffset, yoffset, zoffset + texImage->Face,
             width, height, depth, &box);
    if (texImage->TexObject->Immutable) {
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index a14bbfa..adf02e7 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -40,6 +40,7 @@
 #include "st_context.h"
 #include "st_texture.h"
 #include "st_gen_mipmap.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_texture.h"
 
 
@@ -96,6 +97,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
    if (lastLevel == 0)
       return;
 
+   st_flush_bitmap_cache(st);
+
    /* The texture isn't in a "complete" state yet so set the expected
     * lastLevel here, since it won't get done in st_finalize_texture().
     */

commit 6775b169cdffecd373d57847e5d71db3fe39409a
Author: Nicolai Hähnle <[email protected]>
Date:   Thu Jun 9 12:12:34 2016 +0200

    st/mesa: flush bitmap cache before compute dispatch
    
    In the unlikely case that a program uses glBitmap to render to a framebuffer
    whose texture is bound in a compute shader.
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit c542b7e43d3a504456518c9f407e21c4e7e5fa88)

diff --git a/src/mesa/state_tracker/st_cb_compute.c 
b/src/mesa/state_tracker/st_cb_compute.c
index bfc6d96..063d750 100644
--- a/src/mesa/state_tracker/st_cb_compute.c
+++ b/src/mesa/state_tracker/st_cb_compute.c
@@ -28,6 +28,7 @@
 #include "main/state.h"
 #include "st_atom.h"
 #include "st_context.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_bufferobjects.h"
 #include "st_cb_compute.h"
 
@@ -44,6 +45,8 @@ static void st_dispatch_compute_common(struct gl_context *ctx,
    struct pipe_context *pipe = st->pipe;
    struct pipe_grid_info info = { 0 };
 
+   st_flush_bitmap_cache(st);
+
    if (ctx->NewState)
       _mesa_update_state(ctx);
 

commit a0235eb0f716e05c290cad66292b703c2178af91
Author: Kenneth Graunke <[email protected]>
Date:   Wed Jun 8 16:09:02 2016 -0700

    i965: Fix multiplication of immediates on Cherryview/Broxton.
    
    Cherryview and Broxton don't support DW x DW multiplication.  We have
    piles of code to handle this, but apparently weren't retyping in the
    immediate case.
    
    For example,
    tests/spec/arb_tessellation_shader/execution/dvec3-vs-tcs-tes
    makes the simulator angry about instructions such as:
    
       mul(8) r18<1>:D r10.0<8;8,1>:D 0x00000003:D
    
    Just retype to W or UW.  It should be safe on all platforms.
    
    Cc: "12.0" <[email protected]>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462
    Reviewed-by: Matt Turner <[email protected]>
    Reviewed-by: Jordan Justen <[email protected]>
    Signed-off-by: Kenneth Graunke <[email protected]>
    (cherry picked from commit cd89c834a8b3b4e5f5874c8e1f90c9b01d541181)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0347b0a..8337774 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3564,7 +3564,10 @@ fs_visitor::lower_integer_multiplication()
                ibld.MOV(imm, inst->src[1]);
                ibld.MUL(inst->dst, imm, inst->src[0]);
             } else {
-               ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+               const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD);
+               ibld.MUL(inst->dst, inst->src[0],
+                        ud ? brw_imm_uw(inst->src[1].ud)
+                           : brw_imm_w(inst->src[1].d));
             }
          } else {
             /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot

commit 09a098bdeb89baacd6bbadc4180daf9c2ffaa840
Author: Jason Ekstrand <[email protected]>
Date:   Tue Jun 14 08:40:49 2016 -0700

    anv: Add proper support for depth clamping
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit eb6764c4a73006eee32e19e3afc6eab100a2ce16)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index f864248..97300c3 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -380,7 +380,7 @@ void anv_GetPhysicalDeviceFeatures(
       .logicOp                                  = true,
       .multiDrawIndirect                        = false,
       .drawIndirectFirstInstance                = false,
-      .depthClamp                               = false,
+      .depthClamp                               = true,
       .depthBiasClamp                           = false,
       .fillModeNonSolid                         = true,
       .depthBounds                              = false,
diff --git a/src/intel/vulkan/anv_meta_clear.c 
b/src/intel/vulkan/anv_meta_clear.c
index fe750c8..7ec0608 100644
--- a/src/intel/vulkan/anv_meta_clear.c
+++ b/src/intel/vulkan/anv_meta_clear.c
@@ -173,6 +173,7 @@ create_pipeline(struct anv_device *device,
             .cullMode = VK_CULL_MODE_NONE,
             .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
             .depthBiasEnable = false,
+            .depthClampEnable = true,
          },
          .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e41f623..32594f7 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1165,6 +1165,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    pipeline->batch.relocs = &pipeline->batch_relocs;
 
    copy_non_dynamic_state(pipeline, pCreateInfo);
+   pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
+                                  
pCreateInfo->pRasterizationState->depthClampEnable;
 
    pipeline->use_repclear = extra && extra->use_repclear;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f5500c5..052ced4 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,7 +1360,8 @@ VkResult
 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
 
 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
-void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+                                         bool depth_clamp_enable);
 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
@@ -1485,6 +1486,8 @@ struct anv_pipeline {
 
    uint32_t                                     cs_right_mask;
 
+   bool                                         depth_clamp_enable;
+
    struct {
       uint32_t                                  sf[7];
       uint32_t                                  depth_stencil_state[3];
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index f069db9..dd34d71 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -196,6 +196,7 @@ genX(graphics_pipeline_create)(
       clip.ClipEnable               = !(extra && extra->use_rectlist),
       clip.APIMode                  = APIMODE_OGL,
       clip.ViewportXYClipTestEnable = true,
+      clip.ViewportZClipTestEnable  = !pipeline->depth_clamp_enable,
       clip.ClipMode                 = CLIPMODE_NORMAL,
 
       clip.TriangleStripListProvokingVertexSelect   = 0,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 395d0da..e22b4e2 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -77,7 +77,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
 }
 
 void
-gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+                                    bool depth_clamp_enable)
 {
    uint32_t count = cmd_buffer->state.dynamic.viewport.count;
    const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
@@ -88,8 +89,8 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer 
*cmd_buffer)
       const VkViewport *vp = &viewports[i];
 
       struct GENX(CC_VIEWPORT) cc_viewport = {
-         .MinimumDepth = vp->minDepth,
-         .MaximumDepth = vp->maxDepth,
+         .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f,
+         .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f,
       };
 
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 54585c3..2a96be0 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -85,11 +85,11 @@ emit_rs_state(struct anv_pipeline *pipeline,
       .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
       .ScissorRectangleEnable = !(extra && extra->use_rectlist),
 #if GEN_GEN == 8
-      .ViewportZClipTestEnable = true,
+      .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
 #else
       /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
-      .ViewportZFarClipTestEnable = true,
-      .ViewportZNearClipTestEnable = true,
+      .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
+      .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
 #endif
       .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
       .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 58f5e0b..3d628df 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,9 +525,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
    if (dirty)
       gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
 
-   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
+   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
       gen8_cmd_buffer_emit_viewport(cmd_buffer);
-      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+
+   if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+                                  ANV_CMD_DIRTY_PIPELINE)) {
+      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer,
+                                          pipeline->depth_clamp_enable);
    }
 
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)

commit f3c8dde2e4cac98ab190c0378e20424f0b59d9ef
Author: Jason Ekstrand <[email protected]>
Date:   Tue Jun 14 08:15:34 2016 -0700

    anv/cmd_buffer: Split emit_viewport in two
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 8a46b505cb2c7255ad430b56c1ce0dfa9c13c559)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cd3588a..f5500c5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,6 +1360,7 @@ VkResult
 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
 
 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index df4036a..395d0da 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -40,8 +40,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
    const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
    struct anv_state sf_clip_state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
-   struct anv_state cc_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
 
    for (uint32_t i = 0; i < count; i++) {
       const VkViewport *vp = &viewports[i];
@@ -65,29 +63,45 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
          .YMaxViewPort = vp->y + vp->height - 1,
       };
 
+      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
+                                 &sf_clip_viewport);
+   }
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(sf_clip_state);
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
+      clip.SFClipViewportPointer = sf_clip_state.offset;
+   }
+}
+
+void
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+   uint32_t count = cmd_buffer->state.dynamic.viewport.count;
+   const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
+   struct anv_state cc_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
+
+   for (uint32_t i = 0; i < count; i++) {
+      const VkViewport *vp = &viewports[i];
+
       struct GENX(CC_VIEWPORT) cc_viewport = {
          .MinimumDepth = vp->minDepth,
-         .MaximumDepth = vp->maxDepth
+         .MaximumDepth = vp->maxDepth,
       };
 
-      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
-                                 &sf_clip_viewport);
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
    }
 
-   if (!cmd_buffer->device->info.has_llc) {
-      anv_state_clflush(sf_clip_state);
+   if (!cmd_buffer->device->info.has_llc)
       anv_state_clflush(cc_state);
-   }
 
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
       cc.CCViewportPointer = cc_state.offset;
    }
-   anv_batch_emit(&cmd_buffer->batch,
-                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
-      clip.SFClipViewportPointer = sf_clip_state.offset;
-   }
 }
 #endif
 
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index c62bed4..58f5e0b 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,8 +525,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
    if (dirty)
       gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
 
-   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
+   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
       gen8_cmd_buffer_emit_viewport(cmd_buffer);
+      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+   }
 
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
       gen7_cmd_buffer_emit_scissor(cmd_buffer);

commit 3fddb9fd46a6066d8ecf0bd19a370acbbbc05b2b
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 17:09:37 2016 -0700

    anv/cmd_buffer: Set depth/stencil extent based on the image
    
    It used to be based on the framebuffer which isn't quite right.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 20e95a746df34923eb4aac5e7f1ab6d722432d89)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index d9acf58..c62bed4 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1024,11 +1024,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
          db.DepthBufferObjectControlState = GENX(MOCS),
 
          db.SurfacePitch         = image->depth_surface.isl.row_pitch - 1;
-         db.Height               = fb->height - 1;
-         db.Width                = fb->width - 1;
-         db.LOD                  = 0;
-         db.Depth                = 1 - 1;
-         db.MinimumArrayElement  = 0;
+         db.Height               = image->extent.height - 1;
+         db.Width                = image->extent.width - 1;
+         db.LOD                  = iview->base_mip;
+         db.Depth                = image->array_size - 1; /* FIXME: 3-D */
+         db.MinimumArrayElement  = iview->base_layer;
 
 #if GEN_GEN >= 8
          db.SurfaceQPitch =

commit f614a1f4d88d02f429f29a4e95596e4a40ba7cce
Author: Jason Ekstrand <[email protected]>
Date:   Wed Jun 15 14:30:33 2016 -0700

    anv/cmd_buffer: Don't crash if push constants are provided for missing 
stages
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit b65f2e4163c9180e6a022c0afec018b08e4c5aa5)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 5be5f3e..24c18fe 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1038,7 +1038,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
       cmd_buffer->state.pipeline->prog_data[stage];
 
    /* If we don't actually have any push constants, bail. */
-   if (data == NULL || prog_data->nr_params == 0)
+   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
       return (struct anv_state) { .offset = 0 };
 
    struct anv_state state =

commit f4bc7218d59d55825c4ab2b76e6134827f10d401
Author: Jason Ekstrand <[email protected]>
Date:   Thu Jun 16 10:57:39 2016 -0700

    anv/pipeline: Do invariance propagation on SPIR-V shaders
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit e6c2fe451962e364f30f689dc48c34e2b6161b25)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 60b7c6b..e41f623 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -165,6 +165,9 @@ anv_shader_compile_to_nir(struct anv_device *device,
       nir_remove_dead_variables(nir, nir_var_system_value);
       nir_validate_shader(nir);
 
+      nir_propagate_invariant(nir);
+      nir_validate_shader(nir);
+
       nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, 
false);
 
       nir_lower_system_values(nir);

commit 77f241bd37e7d0a76a0ac9223bc4cebba322994c
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 14:41:05 2016 -0700

    nir/alu_to_scalar: Respect the exact ALU operation qualifier
    
    Just setting builder->exact isn't sufficient because that only applies to
    instructions that are built with the builder but instructions created
    manually and only inserted using the builder are left alone.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit bec07b729242f6a2dcf5a12ce75bf8b07ea658e0)

diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
b/src/compiler/nir/nir_lower_alu_to_scalar.c
index b491791..4f72cf7 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -56,6 +56,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op 
merge_op,
          nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
       }
+      chan->exact = instr->exact;
 
       nir_builder_instr_insert(builder, &chan->instr);
 
@@ -229,6 +230,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
       lower->dest.saturate = instr->dest.saturate;
       comps[chan] = &lower->dest.dest.ssa;
+      lower->exact = instr->exact;
 
       nir_builder_instr_insert(b, &lower->instr);
    }

commit deedb368de7dc50f7196af440c338dcf6a361564
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 12:47:19 2016 -0700

    nir: Add a pass for propagating invariant decorations
    
    This pass is similar to propagate_invariance in the GLSL compiler.  The
    real "output" of this pass is that any algebraic operations which are
    eventually consumed by an invariant variable get marked as "exact".
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 202751fbb7e3d35c1aa84f325f862245dab67f6c)

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 09a756b..bbd5d14 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -231,6 +231,7 @@ NIR_FILES = \
        nir/nir_phi_builder.c \
        nir/nir_phi_builder.h \
        nir/nir_print.c \
+       nir/nir_propagate_invariant.c \
        nir/nir_remove_dead_variables.c \
        nir/nir_repair_ssa.c \
        nir/nir_search.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 20f6520..9816ed6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2290,6 +2290,8 @@ bool nir_lower_returns(nir_shader *shader);
 
 bool nir_inline_functions(nir_shader *shader);
 
+bool nir_propagate_invariant(nir_shader *shader);
+
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
 void nir_lower_var_copies(nir_shader *shader);
 
diff --git a/src/compiler/nir/nir_propagate_invariant.c 
b/src/compiler/nir/nir_propagate_invariant.c
new file mode 100644
index 0000000..7b5bd6c
--- /dev/null
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+static void
+add_src(nir_src *src, struct set *invariants)
+{
+   if (src->is_ssa) {
+      _mesa_set_add(invariants, src->ssa);
+   } else {
+      _mesa_set_add(invariants, src->reg.reg);
+   }
+}
+
+static bool
+add_src_cb(nir_src *src, void *state)
+{
+   add_src(src, state);
+   return true;
+}
+
+static bool
+dest_is_invariant(nir_dest *dest, struct set *invariants)
+{
+   if (dest->is_ssa) {
+      return _mesa_set_search(invariants, &dest->ssa);
+   } else {
+      return _mesa_set_search(invariants, dest->reg.reg);
+   }
+}
+
+static void
+add_cf_node(nir_cf_node *cf, struct set *invariants)
+{
+   if (cf->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(cf);
+      add_src(&if_stmt->condition, invariants);
+   }
+
+   if (cf->parent)
+      add_cf_node(cf->parent, invariants);
+}
+
+static void
+add_var(nir_variable *var, struct set *invariants)
+{
+   _mesa_set_add(invariants, var);
+}
+
+static bool
+var_is_invariant(nir_variable *var, struct set * invariants)
+{
+   return var->data.invariant || _mesa_set_search(invariants, var);
+}
+
+static void
+propagate_invariant_instr(nir_instr *instr, struct set *invariants)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (!dest_is_invariant(&alu->dest.dest, invariants))
+         break;
+
+      alu->exact = true;
+      nir_foreach_src(instr, add_src_cb, invariants);
+      break;
+   }
+
+   case nir_instr_type_tex: {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      if (dest_is_invariant(&tex->dest, invariants))
+         nir_foreach_src(instr, add_src_cb, invariants);
+      break;
+   }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_copy_var:
+         /* If the destination is invariant then so is the source */
+         if (var_is_invariant(intrin->variables[0]->var, invariants))
+            add_var(intrin->variables[1]->var, invariants);
+         break;
+
+      case nir_intrinsic_load_var:
+         if (dest_is_invariant(&intrin->dest, invariants))
+            add_var(intrin->variables[0]->var, invariants);
+         break;
+
+      case nir_intrinsic_store_var:
+         if (var_is_invariant(intrin->variables[0]->var, invariants))
+            add_src(&intrin->src[0], invariants);
+         break;
+
+      default:
+         /* Nothing to do */
+         break;
+      }
+   }
+
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_load_const:
+      break; /* Nothing to do */
+
+   case nir_instr_type_phi: {
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      if (!dest_is_invariant(&phi->dest, invariants))
+         break;
+
+      nir_foreach_phi_src(src, phi) {
+         add_src(&src->src, invariants);
+         add_cf_node(&src->pred->cf_node, invariants);
+      }
+      break;
+   }
+
+   case nir_instr_type_call:
+      unreachable("This pass must be run after function inlining");
+
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Cannot have this instruction type");
+   }
+}
+
+static bool
+propagate_invariant_impl(nir_function_impl *impl, struct set *invariants)
+{
+   bool progress = false;
+
+   while (true) {
+      uint32_t prev_entries = invariants->entries;
+
+      nir_foreach_block_reverse(block, impl) {
+         nir_foreach_instr_reverse(instr, block)
+            propagate_invariant_instr(instr, invariants);
+      }
+
+      /* Keep running until we make no more progress. */
+      if (invariants->entries > prev_entries) {
+         progress = true;
+         continue;
+      } else {
+         break;
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance |
+                                  nir_metadata_live_ssa_defs);
+   }
+
+   return progress;
+}
+
+bool
+nir_propagate_invariant(nir_shader *shader)
+{
+   /* Hash set of invariant things */
+   struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (function->impl && propagate_invariant_impl(function->impl, 
invariants))
+         progress = true;
+   }
+
+   _mesa_set_destroy(invariants, NULL);
+
+   return progress;
+}

commit bac23b13eb75a7bacdec439eb4c239a8dedb24e7
Author: Jason Ekstrand <[email protected]>
Date:   Sat Jun 18 12:30:36 2016 -0700

    nir/algebraic: Remove imprecise flog2 optimizations
    
    While mathematically correct, these two optimizations result in an
    expression with substantially lower precision than the original.  For any
    positive finite floating-point value, log2(x) is well-defined and finite.
    More precisely, it is in the range [-150, 150] so any sum of logarithms
    log2(a) + log2(b) is also well-defined and finite as long as a and b are
    both positive and finite.  However, if a and b are either very small or
    very large, their product may get flushed to infinity or zero causing
    log2(a * b) to be nowhere close to log2(a) + log2(b).
    
    This imprecision was causing incorrect rendering in Talos Principal because
    part of its HDR rendering process involves doing 8 texture operations,
    clamping the result to [0, 65000], taking a dot-product with a constant,
    and then taking the log2.  This is done 6 or 8 times and summed to produce
    the final result which is written to a red texture.  In cases where you
    have a region of the screen that is very dark, it can end up getting a
    result value of -inf which is not what is intended.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Matt Turner <[email protected]>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96425
    Cc: "11.1 11.2 12.0" <[email protected]>
    (cherry picked from commit 68e308d85355079ad93bd4e16cba164784740fdf)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index f8db2b6..a7a541a 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -224,8 +224,6 @@ optimizations = [
    (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
    (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
    (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
-   (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
-   (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, 
b))),
    (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
    (('~fdiv', 1.0, a), ('frcp', a)),

commit b03b256e921c3f7cebfcf9efd5bdf7b403b9c961
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 17 10:48:53 2016 +0200

    radeonsi: fix calculation of valid RB mask per SE
    
    The old calculation treated too many RBs as disabled.
    
    Cc: 11.0 11.1 11.2 12.0 <[email protected]>
    Reviewed-by: Alex Deucher <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit c95175581e983642dc4b23d059e6eaff5b79d2db)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 47af9c8..96da179 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3478,10 +3478,15 @@ si_write_harvested_raster_configs(struct si_context 
*sctx,
        unsigned se_mask[4];
        unsigned se;
 
-       se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
-       se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
-       se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
-       se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+       se_mask[0] = ((1 << rb_per_se) - 1);
+       se_mask[1] = (se_mask[0] << rb_per_se);
+       se_mask[2] = (se_mask[1] << rb_per_se);
+       se_mask[3] = (se_mask[2] << rb_per_se);
+
+       se_mask[0] &= rb_mask;
+       se_mask[1] &= rb_mask;
+       se_mask[2] &= rb_mask;
+       se_mask[3] &= rb_mask;
 
        assert(num_se == 1 || num_se == 2 || num_se == 4);
        assert(sh_per_se == 1 || sh_per_se == 2);

commit 52ae654569cb1e0e42f12b492fdd62d7cf20dc12
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 17 10:30:44 2016 +0200

    radeonsi: raise SI_PM4_MAX_DW
    
    The old limit, introduced in commit 
afa752d3f03ac6697581ff5d324e8ac0512ef513,
    was exceeded by 4 SE configurations which hit 
si_write_harvested_raster_configs.
    
    Cc: 11.1 11.2 12.0 <[email protected]>
    Reviewed-by: Alex Deucher <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit 6c2e63698290d3ea868eefcc3e4dd51dc1e16c64)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 309a596..35fa6c3 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -29,7 +29,7 @@
 
 #include "radeon/radeon_winsys.h"
 
-#define SI_PM4_MAX_DW          160
+#define SI_PM4_MAX_DW          176
 #define SI_PM4_MAX_BO          1
 
 // forward defines

commit f675339b224b90cac161840716fcc348926f88c7
Author: Roland Scheidegger <[email protected]>
Date:   Sun Jun 19 03:56:11 2016 +0200

    gallivm: don't use integer min/max sse intrinsics with llvm >= 3.9
    
    Apparently, these are deprecated. There's some AutoUpgrade feature which
    is supposed to promote these to cmp/select, which apparently doesn't work
    with jit code. It is possible it's not actually even meant to work (see
    the bug filed against llvm which couldn't provide an answer neither)
    but in any case this is meant to be only temporary unless the intrinsics
    are really illegal. So, just use the fallback code (which should be 
cmp/select,
    we're actually doing cmp/sext/trunc/select, but in any case llvm 3.9 manages

mesa: Changes to 'upstream-experimental'

Reply via email to