mesa: Changes to 'debian-experimental'

Timo Aaltonen Wed, 22 Jun 2016 07:39:49 -0700

 VERSION                                                       |    2 
 debian/changelog                                              |    2 
 src/compiler/Makefile.sources                                 |    1 
 src/compiler/glsl/ast_to_hir.cpp                              |   17 
 src/compiler/glsl/builtin_variables.cpp                       |   13 
 src/compiler/glsl/linker.cpp                                  |   20 -
 src/compiler/nir/nir.h                                        |    2 
 src/compiler/nir/nir_lower_alu_to_scalar.c                    |    2 
 src/compiler/nir/nir_opt_algebraic.py                         |    2 
 src/compiler/nir/nir_propagate_invariant.c                    |  196 ++++++++++
 src/gallium/auxiliary/gallivm/lp_bld_arit.c                   |    6 
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |   34 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h   |    5 
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c                   |    4 
 src/gallium/drivers/radeonsi/si_pm4.h                         |    2 
 src/gallium/drivers/radeonsi/si_state.c                       |   13 
 src/intel/vulkan/anv_cmd_buffer.c                             |    2 
 src/intel/vulkan/anv_device.c                                 |    2 
 src/intel/vulkan/anv_meta_clear.c                             |    1 
 src/intel/vulkan/anv_pipeline.c                               |    5 
 src/intel/vulkan/anv_private.h                                |    4 
 src/intel/vulkan/gen7_pipeline.c                              |    1 
 src/intel/vulkan/gen8_cmd_buffer.c                            |   41 +-
 src/intel/vulkan/gen8_pipeline.c                              |    6 
 src/intel/vulkan/genX_cmd_buffer.c                            |   16 
 src/mesa/drivers/dri/i965/brw_context.c                       |    2 
 src/mesa/drivers/dri/i965/brw_fs.cpp                          |    5 
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp                      |   53 ++
 src/mesa/drivers/dri/i965/brw_tcs.c                           |    6 
 src/mesa/main/image.c                                         |    8 
 src/mesa/main/mtypes.h                                        |    2 
 src/mesa/main/pipelineobj.c                                   |   17 
 src/mesa/program/prog_statevars.c                             |   19 
 src/mesa/program/prog_statevars.h                             |    2 
 src/mesa/state_tracker/st_cb_compute.c                        |    3 
 src/mesa/state_tracker/st_cb_copyimage.c                      |    3 
 src/mesa/state_tracker/st_cb_texture.c                        |    9 
 src/mesa/state_tracker/st_gen_mipmap.c                        |    3 
 38 files changed, 454 insertions(+), 77 deletions(-)


New commits:
commit 5ee64a01c106975169727603b64129e8ed460003
Author: Timo Aaltonen <[email protected]>
Date:   Wed Jun 22 15:07:20 2016 +0300

    bump version

diff --git a/debian/changelog b/debian/changelog
index b5ce92a..ed2141c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-mesa (12.0.0~rc3-1) UNRELEASED; urgency=medium
+mesa (12.0.0~rc4-1) UNRELEASED; urgency=medium
 
   * New upstream release candidate.
   * symbols: Updated.

commit 5e0b11cb6dbeab7ca6a1ba2edca56701cdfde96c
Author: Emil Velikov <[email protected]>
Date:   Tue Jun 21 13:32:04 2016 +0100

    Update version to 12.0.0-rc4
    
    Signed-off-by: Emil Velikov <[email protected]>

diff --git a/VERSION b/VERSION
index 6ef7d92..076ead7 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-12.0.0-rc3
+12.0.0-rc4

commit 6306930c3f0cd790e9d8995468559453b6c75b2c
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 10 15:59:58 2016 +0200

    st/mesa: flush bitmap cache before CopyImageSubData
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit f9ddd52317caf14a21ec7c040fd4bb944f9842e4)

diff --git a/src/mesa/state_tracker/st_cb_copyimage.c 
b/src/mesa/state_tracker/st_cb_copyimage.c
index 617e470..8afb861 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -23,6 +23,7 @@
  */
 
 #include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
 #include "state_tracker/st_cb_copyimage.h"
 #include "state_tracker/st_cb_fbo.h"
 #include "state_tracker/st_texture.h"
@@ -547,6 +548,8 @@ st_CopyImageSubData(struct gl_context *ctx,
    struct pipe_box box;
    int src_level, dst_level;
 
+   st_flush_bitmap_cache(st);
+
    if (src_image) {
       struct st_texture_image *src = st_texture_image(src_image);
       src_res = src->pt;

commit 76377387c2d44d0fbae21763386ac86ffb54c635
Author: Nicolai Hähnle <[email protected]>
Date:   Thu Jun 9 12:22:31 2016 +0200

    st/mesa: flush bitmap cache before texture functions
    
    As far as I can tell, a sequence of glBitmap followed by texture functions
    that refer to a texture bound as the framebuffer is well within what should
    be allowed.
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit e7fff3cfe156e13198107e5e76a77fb79ed02173)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index cfe9c4b..cf1c351 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -48,6 +48,7 @@
 
 #include "state_tracker/st_debug.h"
 #include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_bitmap.h"
 #include "state_tracker/st_cb_fbo.h"
 #include "state_tracker/st_cb_flush.h"
 #include "state_tracker/st_cb_texture.h"
@@ -1716,6 +1717,8 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
    unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
    unsigned dst_level = 0;
 
+   st_flush_bitmap_cache(st);
+
    if (stObj->pt == stImage->pt)
       dst_level = texImage->TexObject->MinLevel + texImage->Level;
 
@@ -2181,6 +2184,8 @@ st_GetTexSubImage(struct gl_context * ctx,
    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
 
+   st_flush_bitmap_cache(st);
+
    if (!st->prefer_blit_based_texture_transfer &&
        !_mesa_is_format_compressed(texImage->TexFormat)) {
       /* Try to avoid the fallback if we're doing texture decompression here */
@@ -2644,6 +2649,8 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
    unsigned bind;
    GLint srcY0, srcY1;
 
+   st_flush_bitmap_cache(st);
+
    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
 
@@ -3166,6 +3173,8 @@ st_ClearTexSubImage(struct gl_context *ctx,
    if (!pt)
       return;
 
+   st_flush_bitmap_cache(st);
+
    u_box_3d(xoffset, yoffset, zoffset + texImage->Face,
             width, height, depth, &box);
    if (texImage->TexObject->Immutable) {
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index a14bbfa..adf02e7 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -40,6 +40,7 @@
 #include "st_context.h"
 #include "st_texture.h"
 #include "st_gen_mipmap.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_texture.h"
 
 
@@ -96,6 +97,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
    if (lastLevel == 0)
       return;
 
+   st_flush_bitmap_cache(st);
+
    /* The texture isn't in a "complete" state yet so set the expected
     * lastLevel here, since it won't get done in st_finalize_texture().
     */

commit 6775b169cdffecd373d57847e5d71db3fe39409a
Author: Nicolai Hähnle <[email protected]>
Date:   Thu Jun 9 12:12:34 2016 +0200

    st/mesa: flush bitmap cache before compute dispatch
    
    In the unlikely case that a program uses glBitmap to render to a framebuffer
    whose texture is bound in a compute shader.
    
    Found by inspection.
    
    Cc: 11.2 12.0 <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit c542b7e43d3a504456518c9f407e21c4e7e5fa88)

diff --git a/src/mesa/state_tracker/st_cb_compute.c 
b/src/mesa/state_tracker/st_cb_compute.c
index bfc6d96..063d750 100644
--- a/src/mesa/state_tracker/st_cb_compute.c
+++ b/src/mesa/state_tracker/st_cb_compute.c
@@ -28,6 +28,7 @@
 #include "main/state.h"
 #include "st_atom.h"
 #include "st_context.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_bufferobjects.h"
 #include "st_cb_compute.h"
 
@@ -44,6 +45,8 @@ static void st_dispatch_compute_common(struct gl_context *ctx,
    struct pipe_context *pipe = st->pipe;
    struct pipe_grid_info info = { 0 };
 
+   st_flush_bitmap_cache(st);
+
    if (ctx->NewState)
       _mesa_update_state(ctx);
 

commit a0235eb0f716e05c290cad66292b703c2178af91
Author: Kenneth Graunke <[email protected]>
Date:   Wed Jun 8 16:09:02 2016 -0700

    i965: Fix multiplication of immediates on Cherryview/Broxton.
    
    Cherryview and Broxton don't support DW x DW multiplication.  We have
    piles of code to handle this, but apparently weren't retyping in the
    immediate case.
    
    For example,
    tests/spec/arb_tessellation_shader/execution/dvec3-vs-tcs-tes
    makes the simulator angry about instructions such as:
    
       mul(8) r18<1>:D r10.0<8;8,1>:D 0x00000003:D
    
    Just retype to W or UW.  It should be safe on all platforms.
    
    Cc: "12.0" <[email protected]>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462
    Reviewed-by: Matt Turner <[email protected]>
    Reviewed-by: Jordan Justen <[email protected]>
    Signed-off-by: Kenneth Graunke <[email protected]>
    (cherry picked from commit cd89c834a8b3b4e5f5874c8e1f90c9b01d541181)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0347b0a..8337774 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3564,7 +3564,10 @@ fs_visitor::lower_integer_multiplication()
                ibld.MOV(imm, inst->src[1]);
                ibld.MUL(inst->dst, imm, inst->src[0]);
             } else {
-               ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+               const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD);
+               ibld.MUL(inst->dst, inst->src[0],
+                        ud ? brw_imm_uw(inst->src[1].ud)
+                           : brw_imm_w(inst->src[1].d));
             }
          } else {
             /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot

commit 09a098bdeb89baacd6bbadc4180daf9c2ffaa840
Author: Jason Ekstrand <[email protected]>
Date:   Tue Jun 14 08:40:49 2016 -0700

    anv: Add proper support for depth clamping
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit eb6764c4a73006eee32e19e3afc6eab100a2ce16)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index f864248..97300c3 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -380,7 +380,7 @@ void anv_GetPhysicalDeviceFeatures(
       .logicOp                                  = true,
       .multiDrawIndirect                        = false,
       .drawIndirectFirstInstance                = false,
-      .depthClamp                               = false,
+      .depthClamp                               = true,
       .depthBiasClamp                           = false,
       .fillModeNonSolid                         = true,
       .depthBounds                              = false,
diff --git a/src/intel/vulkan/anv_meta_clear.c 
b/src/intel/vulkan/anv_meta_clear.c
index fe750c8..7ec0608 100644
--- a/src/intel/vulkan/anv_meta_clear.c
+++ b/src/intel/vulkan/anv_meta_clear.c
@@ -173,6 +173,7 @@ create_pipeline(struct anv_device *device,
             .cullMode = VK_CULL_MODE_NONE,
             .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
             .depthBiasEnable = false,
+            .depthClampEnable = true,
          },
          .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
             .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index e41f623..32594f7 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1165,6 +1165,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    pipeline->batch.relocs = &pipeline->batch_relocs;
 
    copy_non_dynamic_state(pipeline, pCreateInfo);
+   pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
+                                  
pCreateInfo->pRasterizationState->depthClampEnable;
 
    pipeline->use_repclear = extra && extra->use_repclear;
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f5500c5..052ced4 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,7 +1360,8 @@ VkResult
 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
 
 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
-void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+                                         bool depth_clamp_enable);
 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
@@ -1485,6 +1486,8 @@ struct anv_pipeline {
 
    uint32_t                                     cs_right_mask;
 
+   bool                                         depth_clamp_enable;
+
    struct {
       uint32_t                                  sf[7];
       uint32_t                                  depth_stencil_state[3];
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index f069db9..dd34d71 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -196,6 +196,7 @@ genX(graphics_pipeline_create)(
       clip.ClipEnable               = !(extra && extra->use_rectlist),
       clip.APIMode                  = APIMODE_OGL,
       clip.ViewportXYClipTestEnable = true,
+      clip.ViewportZClipTestEnable  = !pipeline->depth_clamp_enable,
       clip.ClipMode                 = CLIPMODE_NORMAL,
 
       clip.TriangleStripListProvokingVertexSelect   = 0,
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 395d0da..e22b4e2 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -77,7 +77,8 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
 }
 
 void
-gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
+                                    bool depth_clamp_enable)
 {
    uint32_t count = cmd_buffer->state.dynamic.viewport.count;
    const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
@@ -88,8 +89,8 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer 
*cmd_buffer)
       const VkViewport *vp = &viewports[i];
 
       struct GENX(CC_VIEWPORT) cc_viewport = {
-         .MinimumDepth = vp->minDepth,
-         .MaximumDepth = vp->maxDepth,
+         .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f,
+         .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f,
       };
 
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 54585c3..2a96be0 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -85,11 +85,11 @@ emit_rs_state(struct anv_pipeline *pipeline,
       .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
       .ScissorRectangleEnable = !(extra && extra->use_rectlist),
 #if GEN_GEN == 8
-      .ViewportZClipTestEnable = true,
+      .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
 #else
       /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
-      .ViewportZFarClipTestEnable = true,
-      .ViewportZNearClipTestEnable = true,
+      .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
+      .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
 #endif
       .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
       .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 58f5e0b..3d628df 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,9 +525,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
    if (dirty)
       gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
 
-   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
+   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
       gen8_cmd_buffer_emit_viewport(cmd_buffer);
-      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+
+   if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+                                  ANV_CMD_DIRTY_PIPELINE)) {
+      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer,
+                                          pipeline->depth_clamp_enable);
    }
 
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)

commit f3c8dde2e4cac98ab190c0378e20424f0b59d9ef
Author: Jason Ekstrand <[email protected]>
Date:   Tue Jun 14 08:15:34 2016 -0700

    anv/cmd_buffer: Split emit_viewport in two
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 8a46b505cb2c7255ad430b56c1ce0dfa9c13c559)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cd3588a..f5500c5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1360,6 +1360,7 @@ VkResult
 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
 
 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
+void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer);
 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
 
 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index df4036a..395d0da 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -40,8 +40,6 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
    const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
    struct anv_state sf_clip_state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
-   struct anv_state cc_state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
 
    for (uint32_t i = 0; i < count; i++) {
       const VkViewport *vp = &viewports[i];
@@ -65,29 +63,45 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
          .YMaxViewPort = vp->y + vp->height - 1,
       };
 
+      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
+                                 &sf_clip_viewport);
+   }
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(sf_clip_state);
+
+   anv_batch_emit(&cmd_buffer->batch,
+                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
+      clip.SFClipViewportPointer = sf_clip_state.offset;
+   }
+}
+
+void
+gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer)
+{
+   uint32_t count = cmd_buffer->state.dynamic.viewport.count;
+   const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
+   struct anv_state cc_state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
+
+   for (uint32_t i = 0; i < count; i++) {
+      const VkViewport *vp = &viewports[i];
+
       struct GENX(CC_VIEWPORT) cc_viewport = {
          .MinimumDepth = vp->minDepth,
-         .MaximumDepth = vp->maxDepth
+         .MaximumDepth = vp->maxDepth,
       };
 
-      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
-                                 &sf_clip_viewport);
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
    }
 
-   if (!cmd_buffer->device->info.has_llc) {
-      anv_state_clflush(sf_clip_state);
+   if (!cmd_buffer->device->info.has_llc)
       anv_state_clflush(cc_state);
-   }
 
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
       cc.CCViewportPointer = cc_state.offset;
    }
-   anv_batch_emit(&cmd_buffer->batch,
-                  GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
-      clip.SFClipViewportPointer = sf_clip_state.offset;
-   }
 }
 #endif
 
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index c62bed4..58f5e0b 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -525,8 +525,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
    if (dirty)
       gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
 
-   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
+   if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
       gen8_cmd_buffer_emit_viewport(cmd_buffer);
+      gen8_cmd_buffer_emit_depth_viewport(cmd_buffer);
+   }
 
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
       gen7_cmd_buffer_emit_scissor(cmd_buffer);

commit 3fddb9fd46a6066d8ecf0bd19a370acbbbc05b2b
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 17:09:37 2016 -0700

    anv/cmd_buffer: Set depth/stencil extent based on the image
    
    It used to be based on the framebuffer which isn't quite right.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 20e95a746df34923eb4aac5e7f1ab6d722432d89)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index d9acf58..c62bed4 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1024,11 +1024,11 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
          db.DepthBufferObjectControlState = GENX(MOCS),
 
          db.SurfacePitch         = image->depth_surface.isl.row_pitch - 1;
-         db.Height               = fb->height - 1;
-         db.Width                = fb->width - 1;
-         db.LOD                  = 0;
-         db.Depth                = 1 - 1;
-         db.MinimumArrayElement  = 0;
+         db.Height               = image->extent.height - 1;
+         db.Width                = image->extent.width - 1;
+         db.LOD                  = iview->base_mip;
+         db.Depth                = image->array_size - 1; /* FIXME: 3-D */
+         db.MinimumArrayElement  = iview->base_layer;
 
 #if GEN_GEN >= 8
          db.SurfaceQPitch =

commit f614a1f4d88d02f429f29a4e95596e4a40ba7cce
Author: Jason Ekstrand <[email protected]>
Date:   Wed Jun 15 14:30:33 2016 -0700

    anv/cmd_buffer: Don't crash if push constants are provided for missing 
stages
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit b65f2e4163c9180e6a022c0afec018b08e4c5aa5)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 5be5f3e..24c18fe 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -1038,7 +1038,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer 
*cmd_buffer,
       cmd_buffer->state.pipeline->prog_data[stage];
 
    /* If we don't actually have any push constants, bail. */
-   if (data == NULL || prog_data->nr_params == 0)
+   if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
       return (struct anv_state) { .offset = 0 };
 
    struct anv_state state =

commit f4bc7218d59d55825c4ab2b76e6134827f10d401
Author: Jason Ekstrand <[email protected]>
Date:   Thu Jun 16 10:57:39 2016 -0700

    anv/pipeline: Do invariance propagation on SPIR-V shaders
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit e6c2fe451962e364f30f689dc48c34e2b6161b25)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 60b7c6b..e41f623 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -165,6 +165,9 @@ anv_shader_compile_to_nir(struct anv_device *device,
       nir_remove_dead_variables(nir, nir_var_system_value);
       nir_validate_shader(nir);
 
+      nir_propagate_invariant(nir);
+      nir_validate_shader(nir);
+
       nir_lower_io_to_temporaries(entry_point->shader, entry_point, true, 
false);
 
       nir_lower_system_values(nir);

commit 77f241bd37e7d0a76a0ac9223bc4cebba322994c
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 14:41:05 2016 -0700

    nir/alu_to_scalar: Respect the exact ALU operation qualifier
    
    Just setting builder->exact isn't sufficient because that only applies to
    instructions that are built with the builder but instructions created
    manually and only inserted using the builder are left alone.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit bec07b729242f6a2dcf5a12ce75bf8b07ea658e0)

diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
b/src/compiler/nir/nir_lower_alu_to_scalar.c
index b491791..4f72cf7 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -56,6 +56,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op 
merge_op,
          nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
       }
+      chan->exact = instr->exact;
 
       nir_builder_instr_insert(builder, &chan->instr);
 
@@ -229,6 +230,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
       lower->dest.saturate = instr->dest.saturate;
       comps[chan] = &lower->dest.dest.ssa;
+      lower->exact = instr->exact;
 
       nir_builder_instr_insert(b, &lower->instr);
    }

commit deedb368de7dc50f7196af440c338dcf6a361564
Author: Jason Ekstrand <[email protected]>
Date:   Mon Jun 13 12:47:19 2016 -0700

    nir: Add a pass for propagating invariant decorations
    
    This pass is similar to propagate_invariance in the GLSL compiler.  The
    real "output" of this pass is that any algebraic operations which are
    eventually consumed by an invariant variable get marked as "exact".
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Kenneth Graunke <[email protected]>
    Cc: "12.0" <[email protected]>
    (cherry picked from commit 202751fbb7e3d35c1aa84f325f862245dab67f6c)

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 09a756b..bbd5d14 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -231,6 +231,7 @@ NIR_FILES = \
        nir/nir_phi_builder.c \
        nir/nir_phi_builder.h \
        nir/nir_print.c \
+       nir/nir_propagate_invariant.c \
        nir/nir_remove_dead_variables.c \
        nir/nir_repair_ssa.c \
        nir/nir_search.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 20f6520..9816ed6 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2290,6 +2290,8 @@ bool nir_lower_returns(nir_shader *shader);
 
 bool nir_inline_functions(nir_shader *shader);
 
+bool nir_propagate_invariant(nir_shader *shader);
+
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
 void nir_lower_var_copies(nir_shader *shader);
 
diff --git a/src/compiler/nir/nir_propagate_invariant.c 
b/src/compiler/nir/nir_propagate_invariant.c
new file mode 100644
index 0000000..7b5bd6c
--- /dev/null
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+static void
+add_src(nir_src *src, struct set *invariants)
+{
+   if (src->is_ssa) {
+      _mesa_set_add(invariants, src->ssa);
+   } else {
+      _mesa_set_add(invariants, src->reg.reg);
+   }
+}
+
+static bool
+add_src_cb(nir_src *src, void *state)
+{
+   add_src(src, state);
+   return true;
+}
+
+static bool
+dest_is_invariant(nir_dest *dest, struct set *invariants)
+{
+   if (dest->is_ssa) {
+      return _mesa_set_search(invariants, &dest->ssa);
+   } else {
+      return _mesa_set_search(invariants, dest->reg.reg);
+   }
+}
+
+static void
+add_cf_node(nir_cf_node *cf, struct set *invariants)
+{
+   if (cf->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(cf);
+      add_src(&if_stmt->condition, invariants);
+   }
+
+   if (cf->parent)
+      add_cf_node(cf->parent, invariants);
+}
+
+static void
+add_var(nir_variable *var, struct set *invariants)
+{
+   _mesa_set_add(invariants, var);
+}
+
+static bool
+var_is_invariant(nir_variable *var, struct set * invariants)
+{
+   return var->data.invariant || _mesa_set_search(invariants, var);
+}
+
+static void
+propagate_invariant_instr(nir_instr *instr, struct set *invariants)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (!dest_is_invariant(&alu->dest.dest, invariants))
+         break;
+
+      alu->exact = true;
+      nir_foreach_src(instr, add_src_cb, invariants);
+      break;
+   }
+
+   case nir_instr_type_tex: {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      if (dest_is_invariant(&tex->dest, invariants))
+         nir_foreach_src(instr, add_src_cb, invariants);
+      break;
+   }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_copy_var:
+         /* If the destination is invariant then so is the source */
+         if (var_is_invariant(intrin->variables[0]->var, invariants))
+            add_var(intrin->variables[1]->var, invariants);
+         break;
+
+      case nir_intrinsic_load_var:
+         if (dest_is_invariant(&intrin->dest, invariants))
+            add_var(intrin->variables[0]->var, invariants);
+         break;
+
+      case nir_intrinsic_store_var:
+         if (var_is_invariant(intrin->variables[0]->var, invariants))
+            add_src(&intrin->src[0], invariants);
+         break;
+
+      default:
+         /* Nothing to do */
+         break;
+      }
+   }
+
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_load_const:
+      break; /* Nothing to do */
+
+   case nir_instr_type_phi: {
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      if (!dest_is_invariant(&phi->dest, invariants))
+         break;
+
+      nir_foreach_phi_src(src, phi) {
+         add_src(&src->src, invariants);
+         add_cf_node(&src->pred->cf_node, invariants);
+      }
+      break;
+   }
+
+   case nir_instr_type_call:
+      unreachable("This pass must be run after function inlining");
+
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Cannot have this instruction type");
+   }
+}
+
+static bool
+propagate_invariant_impl(nir_function_impl *impl, struct set *invariants)
+{
+   bool progress = false;
+
+   while (true) {
+      uint32_t prev_entries = invariants->entries;
+
+      nir_foreach_block_reverse(block, impl) {
+         nir_foreach_instr_reverse(instr, block)
+            propagate_invariant_instr(instr, invariants);
+      }
+
+      /* Keep running until we make no more progress. */
+      if (invariants->entries > prev_entries) {
+         progress = true;
+         continue;
+      } else {
+         break;
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance |
+                                  nir_metadata_live_ssa_defs);
+   }
+
+   return progress;
+}
+
+bool
+nir_propagate_invariant(nir_shader *shader)
+{
+   /* Hash set of invariant things */
+   struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (function->impl && propagate_invariant_impl(function->impl, 
invariants))
+         progress = true;
+   }
+
+   _mesa_set_destroy(invariants, NULL);
+
+   return progress;
+}

commit bac23b13eb75a7bacdec439eb4c239a8dedb24e7
Author: Jason Ekstrand <[email protected]>
Date:   Sat Jun 18 12:30:36 2016 -0700

    nir/algebraic: Remove imprecise flog2 optimizations
    
    While mathematically correct, these two optimizations result in an
    expression with substantially lower precision than the original.  For any
    positive finite floating-point value, log2(x) is well-defined and finite.
    More precisely, it is in the range [-150, 150] so any sum of logarithms
    log2(a) + log2(b) is also well-defined and finite as long as a and b are
    both positive and finite.  However, if a and b are either very small or
    very large, their product may get flushed to infinity or zero causing
    log2(a * b) to be nowhere close to log2(a) + log2(b).
    
    This imprecision was causing incorrect rendering in Talos Principal because
    part of its HDR rendering process involves doing 8 texture operations,
    clamping the result to [0, 65000], taking a dot-product with a constant,
    and then taking the log2.  This is done 6 or 8 times and summed to produce
    the final result which is written to a red texture.  In cases where you
    have a region of the screen that is very dark, it can end up getting a
    result value of -inf which is not what is intended.
    
    Signed-off-by: Jason Ekstrand <[email protected]>
    Reviewed-by: Matt Turner <[email protected]>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96425
    Cc: "11.1 11.2 12.0" <[email protected]>
    (cherry picked from commit 68e308d85355079ad93bd4e16cba164784740fdf)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index f8db2b6..a7a541a 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -224,8 +224,6 @@ optimizations = [
    (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
    (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
    (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
-   (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
-   (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, 
b))),
    (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
    (('~fdiv', 1.0, a), ('frcp', a)),

commit b03b256e921c3f7cebfcf9efd5bdf7b403b9c961
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 17 10:48:53 2016 +0200

    radeonsi: fix calculation of valid RB mask per SE
    
    The old calculation treated too many RBs as disabled.
    
    Cc: 11.0 11.1 11.2 12.0 <[email protected]>
    Reviewed-by: Alex Deucher <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit c95175581e983642dc4b23d059e6eaff5b79d2db)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 47af9c8..96da179 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3478,10 +3478,15 @@ si_write_harvested_raster_configs(struct si_context 
*sctx,
        unsigned se_mask[4];
        unsigned se;
 
-       se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
-       se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
-       se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
-       se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+       se_mask[0] = ((1 << rb_per_se) - 1);
+       se_mask[1] = (se_mask[0] << rb_per_se);
+       se_mask[2] = (se_mask[1] << rb_per_se);
+       se_mask[3] = (se_mask[2] << rb_per_se);
+
+       se_mask[0] &= rb_mask;
+       se_mask[1] &= rb_mask;
+       se_mask[2] &= rb_mask;
+       se_mask[3] &= rb_mask;
 
        assert(num_se == 1 || num_se == 2 || num_se == 4);
        assert(sh_per_se == 1 || sh_per_se == 2);

commit 52ae654569cb1e0e42f12b492fdd62d7cf20dc12
Author: Nicolai Hähnle <[email protected]>
Date:   Fri Jun 17 10:30:44 2016 +0200

    radeonsi: raise SI_PM4_MAX_DW
    
    The old limit, introduced in commit 
afa752d3f03ac6697581ff5d324e8ac0512ef513,
    was exceeded by 4 SE configurations which hit 
si_write_harvested_raster_configs.
    
    Cc: 11.1 11.2 12.0 <[email protected]>
    Reviewed-by: Alex Deucher <[email protected]>
    Reviewed-by: Marek Olšák <[email protected]>
    (cherry picked from commit 6c2e63698290d3ea868eefcc3e4dd51dc1e16c64)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 309a596..35fa6c3 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -29,7 +29,7 @@
 
 #include "radeon/radeon_winsys.h"
 
-#define SI_PM4_MAX_DW          160

mesa: Changes to 'debian-experimental'

Reply via email to