Mesa (master): radv: Assert when setting 0 registers in a sequence.

2017-03-28 Thread Bas Nieuwenhuizen
Module: Mesa
Branch: master
Commit: 78ee8b3f849063e3e37db0767212397da522b6fa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=78ee8b3f849063e3e37db0767212397da522b6fa

Author: Bas Nieuwenhuizen 
Date:   Tue Mar 28 22:29:16 2017 +0200

radv: Assert when setting 0 registers in a sequence.

To catch more of those hangs early.

Signed-off-by: Bas Nieuwenhuizen 
Acked-by: Dave Airlie 

---

 src/amd/vulkan/radv_cs.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index 2c8935f306..0990270f5c 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -43,6 +43,7 @@ static inline void radeon_set_config_reg_seq(struct 
radeon_winsys_cs *cs, unsign
 {
 assert(reg < R600_CONTEXT_REG_OFFSET);
 assert(cs->cdw + 2 + num <= cs->max_dw);
+assert(num);
 radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
 radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
 }
@@ -57,6 +58,7 @@ static inline void radeon_set_context_reg_seq(struct 
radeon_winsys_cs *cs, unsig
 {
 assert(reg >= R600_CONTEXT_REG_OFFSET);
 assert(cs->cdw + 2 + num <= cs->max_dw);
+assert(num);
 radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
 radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
 }
@@ -83,6 +85,7 @@ static inline void radeon_set_sh_reg_seq(struct 
radeon_winsys_cs *cs, unsigned r
 {
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
 }
@@ -97,6 +100,7 @@ static inline void radeon_set_uconfig_reg_seq(struct 
radeon_winsys_cs *cs, unsig
 {
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv/cmd_buffer: Refactor flush_pipeline_select_*

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: f3673db3d61b77415a09ca5d44f976e6fb869ec7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3673db3d61b77415a09ca5d44f976e6fb869ec7

Author: Jason Ekstrand 
Date:   Wed Mar 15 11:58:53 2017 -0700

anv/cmd_buffer: Refactor flush_pipeline_select_*

While having the _3d and _gpgpu versions is nice, there's no reason why
we need to have duplicated logic for tracking the current pipeline.

Reviewed-by: Iago Toral Quiroga 

---

 src/intel/vulkan/genX_cmd_buffer.c | 42 +++---
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index d0ddc29f00..1ce549a202 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2118,9 +2118,12 @@ void genX(CmdDispatchIndirect)(
 }
 
 static void
-flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer,
-  uint32_t pipeline)
+genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
+uint32_t pipeline)
 {
+   if (cmd_buffer->state.current_pipeline == pipeline)
+  return;
+
 #if GEN_GEN >= 8 && GEN_GEN < 10
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
 *
@@ -2160,40 +2163,27 @@ flush_pipeline_before_pipeline_select(struct 
anv_cmd_buffer *cmd_buffer,
   pc.InstructionCacheInvalidateEnable = true;
   pc.PostSyncOperation= NoWrite;
}
+
+   anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) {
+#if GEN_GEN >= 9
+  ps.MaskBits = 3;
+#endif
+  ps.PipelineSelection = pipeline;
+   }
+
+   cmd_buffer->state.current_pipeline = pipeline;
 }
 
 void
 genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer)
 {
-   if (cmd_buffer->state.current_pipeline != _3D) {
-  flush_pipeline_before_pipeline_select(cmd_buffer, _3D);
-
-  anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) {
-#if GEN_GEN >= 9
- ps.MaskBits = 3;
-#endif
- ps.PipelineSelection = _3D;
-  }
-
-  cmd_buffer->state.current_pipeline = _3D;
-   }
+   genX(flush_pipeline_select)(cmd_buffer, _3D);
 }
 
 void
 genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer)
 {
-   if (cmd_buffer->state.current_pipeline != GPGPU) {
-  flush_pipeline_before_pipeline_select(cmd_buffer, GPGPU);
-
-  anv_batch_emit(_buffer->batch, GENX(PIPELINE_SELECT), ps) {
-#if GEN_GEN >= 9
- ps.MaskBits = 3;
-#endif
- ps.PipelineSelection = GPGPU;
-  }
-
-  cmd_buffer->state.current_pipeline = GPGPU;
-   }
+   genX(flush_pipeline_select)(cmd_buffer, GPGPU);
 }
 
 void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv: Make anv_get_layerCount a macro

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 1b8fa8dd794c22aba43b16470e75ecaebf902b11
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b8fa8dd794c22aba43b16470e75ecaebf902b11

Author: Jason Ekstrand 
Date:   Fri Mar 24 16:20:18 2017 -0700

anv: Make anv_get_layerCount a macro

Reviewed-by: Lionel Landwerlin 
Cc: "13.0 17.0" 

---

 src/intel/vulkan/anv_private.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 27c887c65c..74e80e8d53 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1828,13 +1828,13 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
const devinfo,
 const struct anv_image *image,
 const VkImageAspectFlags aspects,
 const VkImageLayout layout);
-static inline uint32_t
-anv_get_layerCount(const struct anv_image *image,
-   const VkImageSubresourceRange *range)
-{
-   return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
-  image->array_size - range->baseArrayLayer : range->layerCount;
-}
+
+/* This is defined as a macro so that it works for both
+ * VkImageSubresourceRange and VkImageSubresourceLayers
+ */
+#define anv_get_layerCount(_image, _range) \
+   ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
+(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
 
 static inline uint32_t
 anv_get_levelCount(const struct anv_image *image,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv/blorp: Use anv_get_layerCount everywhere

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 9319ef96fd5c2489754eae1b058e4087d7259341
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9319ef96fd5c2489754eae1b058e4087d7259341

Author: Jason Ekstrand 
Date:   Fri Mar 24 16:20:35 2017 -0700

anv/blorp: Use anv_get_layerCount everywhere

Reviewed-by: Lionel Landwerlin 
Cc: "13.0 17.0" 

---

 src/intel/vulkan/anv_blorp.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 16f1692ff5..72a468a744 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -233,7 +233,8 @@ void anv_CmdCopyImage(
  layer_count = pRegions[r].extent.depth;
   } else {
  dst_base_layer = pRegions[r].dstSubresource.baseArrayLayer;
- layer_count = pRegions[r].dstSubresource.layerCount;
+ layer_count =
+anv_get_layerCount(dst_image, [r].dstSubresource);
   }
 
   unsigned src_base_layer;
@@ -241,7 +242,8 @@ void anv_CmdCopyImage(
  src_base_layer = pRegions[r].srcOffset.z;
   } else {
  src_base_layer = pRegions[r].srcSubresource.baseArrayLayer;
- assert(pRegions[r].srcSubresource.layerCount == layer_count);
+ assert(layer_count ==
+anv_get_layerCount(src_image, [r].srcSubresource));
   }
 
   assert(pRegions[r].srcSubresource.aspectMask ==
@@ -313,7 +315,8 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
  anv_sanitize_image_extent(anv_image->type, pRegions[r].imageExtent);
   if (anv_image->type != VK_IMAGE_TYPE_3D) {
  image.offset.z = pRegions[r].imageSubresource.baseArrayLayer;
- extent.depth = pRegions[r].imageSubresource.layerCount;
+ extent.depth =
+anv_get_layerCount(anv_image, [r].imageSubresource);
   }
 
   const enum isl_format buffer_format =
@@ -467,7 +470,7 @@ void anv_CmdBlitImage(
  dst_end = pRegions[r].dstOffsets[1].z;
   } else {
  dst_start = dst_res->baseArrayLayer;
- dst_end = dst_start + dst_res->layerCount;
+ dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
   }
 
   unsigned src_start, src_end;
@@ -477,7 +480,7 @@ void anv_CmdBlitImage(
  src_end = pRegions[r].srcOffsets[1].z;
   } else {
  src_start = src_res->baseArrayLayer;
- src_end = src_start + src_res->layerCount;
+ src_end = src_start + anv_get_layerCount(src_image, src_res);
   }
 
   bool flip_z = flip_coords(_start, _end, _start, _end);
@@ -1407,10 +1410,11 @@ void anv_CmdResolveImage(
for (uint32_t r = 0; r < regionCount; r++) {
   assert(pRegions[r].srcSubresource.aspectMask ==
  pRegions[r].dstSubresource.aspectMask);
-  assert(pRegions[r].srcSubresource.layerCount ==
- pRegions[r].dstSubresource.layerCount);
+  assert(anv_get_layerCount(src_image, [r].srcSubresource) ==
+ anv_get_layerCount(dst_image, [r].dstSubresource));
 
-  const uint32_t layer_count = pRegions[r].dstSubresource.layerCount;
+  const uint32_t layer_count =
+ anv_get_layerCount(dst_image, [r].dstSubresource);
 
   for (uint32_t layer = 0; layer < layer_count; layer++) {
  resolve_image(,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv: Flush caches prior to PIPELINE_SELECT on all gens

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 6baae9625d26d282a72481598f9431fcad3211f6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6baae9625d26d282a72481598f9431fcad3211f6

Author: Jason Ekstrand 
Date:   Wed Mar 15 11:58:52 2017 -0700

anv: Flush caches prior to PIPELINE_SELECT on all gens

The programming note that says we need to do this still exists in the
SkyLake PRM and, from looking at the bspec, seems like it may apply to
all hardware generations SNB+.  Unfortunately, this isn't particularly
clear cut since there is also language in the bspec that says you can
skip the flushing and stall to get better throughput.  Experimentation
with the "Car Chase" benchmark in GL seems to indicate that some form of
flushing is still needed.  This commit makes us do the full set of
flushes regardless of hardware generation.  We can always reduce the
flushing later.

Reported-by: Topi Pohjolainen 
Reviewed-by: Iago Toral Quiroga 
Cc: "17.0 13.0" 

---

 src/intel/vulkan/genX_cmd_buffer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 5d923a8c08..d0ddc29f00 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2133,8 +2133,8 @@ flush_pipeline_before_pipeline_select(struct 
anv_cmd_buffer *cmd_buffer,
 */
if (pipeline == GPGPU)
   anv_batch_emit(_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
+#endif
 
-#elif GEN_GEN <= 7
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
 * PIPELINE_SELECT [DevBWR+]":
 *
@@ -2160,7 +2160,6 @@ flush_pipeline_before_pipeline_select(struct 
anv_cmd_buffer *cmd_buffer,
   pc.InstructionCacheInvalidateEnable = true;
   pc.PostSyncOperation= NoWrite;
}
-#endif
 }
 
 void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv/cmd_buffer: Apply flush operations prior to executing secondaries

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 01a65dc43be3a4bf6b8a901586f718f4b6b3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=01a65dc43be3a4bf6b8a901586f718f4b6b3

Author: Jason Ekstrand 
Date:   Fri Mar 24 16:30:24 2017 -0700

anv/cmd_buffer: Apply flush operations prior to executing secondaries

This fixes rendering issues in the Vulkan port of skia on some hardware.

Reviewed-by: Lionel Landwerlin 
Cc: "13.0 17.0" 

---

 src/intel/vulkan/genX_cmd_buffer.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 39856b9af7..b87d8693fd 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -654,6 +654,11 @@ genX(CmdExecuteCommands)(
 */
genX(cmd_buffer_enable_pma_fix)(primary, false);
 
+   /* The secondary command buffer doesn't know which textures etc. have been
+* flushed prior to their execution.  Apply those flushes now.
+*/
+   genX(cmd_buffer_apply_pipe_flushes)(primary);
+
for (uint32_t i = 0; i < commandBufferCount; i++) {
   ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): anv/cmd_buffer: Fix bad indentation

2017-03-28 Thread Jason Ekstrand
Module: Mesa
Branch: master
Commit: 0fe3dcce4c3e8b86a60beefe4c5adc760f2d59f8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fe3dcce4c3e8b86a60beefe4c5adc760f2d59f8

Author: Jason Ekstrand 
Date:   Wed Mar 15 11:58:51 2017 -0700

anv/cmd_buffer: Fix bad indentation

A bunch of code was indented in such a way that it looked like it went
with the if statement above but it definitely didn't.

Reviewed-by: Iago Toral Quiroga 
Cc: "17.0 13.0" 

---

 src/intel/vulkan/genX_cmd_buffer.c | 49 +++---
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index b87d8693fd..5d923a8c08 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2133,32 +2133,33 @@ flush_pipeline_before_pipeline_select(struct 
anv_cmd_buffer *cmd_buffer,
 */
if (pipeline == GPGPU)
   anv_batch_emit(_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
+
 #elif GEN_GEN <= 7
-  /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
-   * PIPELINE_SELECT [DevBWR+]":
-   *
-   *   Project: DEVSNB+
-   *
-   *   Software must ensure all the write caches are flushed through a
-   *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
-   *   command to invalidate read only caches prior to programming
-   *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
-   */
-  anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.RenderTargetCacheFlushEnable  = true;
- pc.DepthCacheFlushEnable = true;
- pc.DCFlushEnable = true;
- pc.PostSyncOperation = NoWrite;
- pc.CommandStreamerStallEnable= true;
-  }
+   /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+* PIPELINE_SELECT [DevBWR+]":
+*
+*   Project: DEVSNB+
+*
+*   Software must ensure all the write caches are flushed through a
+*   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
+*   command to invalidate read only caches prior to programming
+*   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
+*/
+   anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
+  pc.RenderTargetCacheFlushEnable  = true;
+  pc.DepthCacheFlushEnable = true;
+  pc.DCFlushEnable = true;
+  pc.PostSyncOperation = NoWrite;
+  pc.CommandStreamerStallEnable= true;
+   }
 
-  anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.TextureCacheInvalidationEnable   = true;
- pc.ConstantCacheInvalidationEnable  = true;
- pc.StateCacheInvalidationEnable = true;
- pc.InstructionCacheInvalidateEnable = true;
- pc.PostSyncOperation= NoWrite;
-  }
+   anv_batch_emit(_buffer->batch, GENX(PIPE_CONTROL), pc) {
+  pc.TextureCacheInvalidationEnable   = true;
+  pc.ConstantCacheInvalidationEnable  = true;
+  pc.StateCacheInvalidationEnable = true;
+  pc.InstructionCacheInvalidateEnable = true;
+  pc.PostSyncOperation= NoWrite;
+   }
 #endif
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: only emit ps_input_cntl is we have any to output

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 93d61e494518a5dd170c2b098b2ed7a26465d049
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=93d61e494518a5dd170c2b098b2ed7a26465d049

Author: Dave Airlie 
Date:   Tue Mar 28 20:09:36 2017 +0100

radv: only emit ps_input_cntl is we have any to output

Otherwise we get GPU hangs.

Reported-by: Alex Smith 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index e994df65fd..e6f098c208 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -697,9 +697,12 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, 
blend->cb_target_mask);
radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, 
blend->cb_shader_mask);
 
-   radeon_set_context_reg_seq(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num);
-   for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++)
-   radeon_emit(cmd_buffer->cs, 
pipeline->graphics.ps_input_cntl[i]);
+   if (pipeline->graphics.ps_input_cntl_num) {
+   radeon_set_context_reg_seq(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num);
+   for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; 
i++) {
+   radeon_emit(cmd_buffer->cs, 
pipeline->graphics.ps_input_cntl[i]);
+   }
+   }
 }
 
 static void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glx: Remove #include

2017-03-28 Thread Adam Jackson
Module: Mesa
Branch: master
Commit: f208bdc0d27cf7836420272738b707f2bad9c92a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f208bdc0d27cf7836420272738b707f2bad9c92a

Author: Adam Jackson 
Date:   Wed Mar 22 14:02:52 2017 -0400

glx: Remove #include 

We're not using anything in it, and we don't want to inherit struct
definitions from some other package anyway.

Signed-off-by: Adam Jackson 

---

 src/glx/glxconfig.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/glx/glxconfig.c b/src/glx/glxconfig.c
index e5718b143f..0e1643fcd8 100644
--- a/src/glx/glxconfig.c
+++ b/src/glx/glxconfig.c
@@ -32,7 +32,6 @@
  */
 
 #include 
-#include "GL/glxint.h"
 #include 
 #include 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): si_compute: check NULL return from u_upload_alloc

2017-03-28 Thread Julien Isorce
Module: Mesa
Branch: master
Commit: 4a5e779b5f9d169fd043ffaead1525040af816f3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a5e779b5f9d169fd043ffaead1525040af816f3

Author: Julien Isorce 
Date:   Thu Mar 23 13:43:49 2017 +

si_compute: check NULL return from u_upload_alloc

Signed-off-by: Julien Isorce 
Reviewed-by: Marek Olšák 

---

 src/gallium/drivers/radeonsi/si_compute.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 46476b68be..913a2ddbfe 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -579,7 +579,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context 
*sctx,
}
 }
 
-static void si_upload_compute_input(struct si_context *sctx,
+static bool si_upload_compute_input(struct si_context *sctx,
const amd_kernel_code_t *code_object,
const struct pipe_grid_info *info)
 {
@@ -602,6 +602,9 @@ static void si_upload_compute_input(struct si_context *sctx,
   _args_offset,
   (struct pipe_resource**)_buffer, _args_ptr);
 
+   if (unlikely(!kernel_args_ptr))
+   return false;
+
kernel_args = (uint32_t*)kernel_args_ptr;
kernel_args_va = input_buffer->gpu_address + kernel_args_offset;
 
@@ -636,6 +639,8 @@ static void si_upload_compute_input(struct si_context *sctx,
}
 
r600_resource_reference(_buffer, NULL);
+
+   return true;
 }
 
 static void si_setup_tgsi_grid(struct si_context *sctx,
@@ -790,8 +795,11 @@ static void si_launch_grid(
si_set_atom_dirty(sctx, sctx->atoms.s.render_cond, false);
}
 
-   if (program->input_size || program->ir_type == PIPE_SHADER_IR_NATIVE)
-   si_upload_compute_input(sctx, code_object, info);
+   if ((program->input_size ||
+program->ir_type == PIPE_SHADER_IR_NATIVE) &&
+   unlikely(!si_upload_compute_input(sctx, code_object, info))) {
+   return;
+   }
 
/* Global buffers */
for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer jitter] fix llvm-5.0.0 build bustage

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 79d92a72d5866fb4a00188fc5cb48d4385c46bb9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d92a72d5866fb4a00188fc5cb48d4385c46bb9

Author: Tim Rowley 
Date:   Mon Mar 27 13:29:31 2017 -0500

swr: [rasterizer jitter] fix llvm-5.0.0 build bustage

Add CreateAlignmentAssumptionHelper to gen_llvm_ir_macros.py ignore list.

Reviewed-by: Bruce Cherniak 

---

 src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index dbf56471ee..2ed2b2f61e 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -140,10 +140,9 @@ def parse_ir_builder(input_file):
 ignore = False
 
 # The following functions need to be ignored.
-if func_name == 'CreateInsertNUWNSWBinOp':
-ignore = True
-
-if func_name == 'CreateMaskedIntrinsic':
+if (func_name == 'CreateInsertNUWNSWBinOp' or
+func_name == 'CreateMaskedIntrinsic' or
+func_name == 'CreateAlignmentAssumptionHelper'):
 ignore = True
 
 # Convert CamelCase to CAMEL_CASE

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): isl: Validate the calculated row pitch (v45)

2017-03-28 Thread Chad Versace
Module: Mesa
Branch: master
Commit: 23802dafc2d5e04e6d2d444855961082b5887400
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=23802dafc2d5e04e6d2d444855961082b5887400

Author: Chad Versace 
Date:   Tue Mar 21 14:11:57 2017 -0700

isl: Validate the calculated row pitch (v45)

Validate that isl_surf::row_pitch fits in the below bitfields,
if applicable based on isl_surf::usage.

RENDER_SURFACE_STATE::SurfacePitch
RENDER_SURFACE_STATE::AuxiliarySurfacePitch
3DSTATE_DEPTH_BUFFER::SurfacePitch
3DSTATE_HIER_DEPTH_BUFFER::SurfacePitch

v2:
  -Add a Makefile dependency on generated header genX_bits.h.
v3:
  - Test ISL_SURF_USAGE_STORAGE_BIT too. [for jekstrand]
  - Drop explicity dependency on generated header. [for emil]
v4:
  - Rebase for new gen_bits_header.py script.
  - Replace gen_10x with gen_device_info*.
v5:
  - Drop FINISHME for validation of GEN9 1D row pitch. [for jekstrand]
  - Reformat bit tests. [for jekstrand]

Reviewed-by: Jason Ekstrand  (v4)

---

 src/intel/isl/isl.c | 70 -
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 81f40b6a6f..749fcdf46b 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 
+#include "genxml/genX_bits.h"
+
 #include "isl.h"
 #include "isl_gen4.h"
 #include "isl_gen6.h"
@@ -1089,18 +1091,72 @@ isl_calc_min_row_pitch(const struct isl_device *dev,
}
 }
 
-static uint32_t
+/**
+ * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
+ * size is `bits` bits?
+ *
+ * Hardware pitch fields are offset by 1. For example, if the size of
+ * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
+ * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
+ * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
+ */
+static bool
+pitch_in_range(uint32_t n, uint32_t bits)
+{
+   assert(n != 0);
+   return likely(bits != 0 && 1 <= n && n <= (1 << bits));
+}
+
+static bool
 isl_calc_row_pitch(const struct isl_device *dev,
const struct isl_surf_init_info *surf_info,
const struct isl_tile_info *tile_info,
enum isl_dim_layout dim_layout,
-   const struct isl_extent2d *phys_slice0_sa)
+   const struct isl_extent2d *phys_slice0_sa,
+   uint32_t *out_row_pitch)
 {
const uint32_t alignment =
   isl_calc_row_pitch_alignment(surf_info, tile_info);
 
-   return isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa,
- alignment);
+   const uint32_t row_pitch =
+  isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa,
+ alignment);
+
+   const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width;
+
+   if (row_pitch == 0)
+  return false;
+
+   if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
+  /* SurfacePitch is ignored for this layout. */
+  goto done;
+   }
+
+   if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
+ISL_SURF_USAGE_TEXTURE_BIT |
+ISL_SURF_USAGE_STORAGE_BIT)) &&
+   !pitch_in_range(row_pitch, 
RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
+  return false;
+
+   if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
+ISL_SURF_USAGE_MCS_BIT)) &&
+   !pitch_in_range(row_pitch_tiles, 
RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
+  return false;
+
+   if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
+   !pitch_in_range(row_pitch, 
_3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
+  return false;
+
+   if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
+   !pitch_in_range(row_pitch, 
_3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
+  return false;
+
+   if (surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT)
+  isl_finishme("validate row pitch of stencil surfaces");
+
+ done:
+   *out_row_pitch = row_pitch;
+   return true;
 }
 
 /**
@@ -1275,8 +1331,10 @@ isl_surf_init_s(const struct isl_device *dev,
uint32_t pad_bytes;
isl_apply_surface_padding(dev, info, _info, _h_el, _bytes);
 
-   const uint32_t row_pitch = isl_calc_row_pitch(dev, info, _info,
- dim_layout, _slice0_sa);
+   uint32_t row_pitch;
+   if (!isl_calc_row_pitch(dev, info, _info, dim_layout,
+   _slice0_sa, _pitch))
+  return false;
 
uint32_t size, base_alignment;
if (tiling == ISL_TILING_LINEAR) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): genxml: New generated header genX_bits.h (v6)

2017-03-28 Thread Chad Versace
Module: Mesa
Branch: master
Commit: f0eaf38db2c7ed5dd3cbc62ad078bf9d08924640
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0eaf38db2c7ed5dd3cbc62ad078bf9d08924640

Author: Chad Versace 
Date:   Fri Mar 24 14:35:24 2017 -0700

genxml: New generated header genX_bits.h (v6)

genX_bits.h contains the sizes of bitfields in genxml instructions,
structures, and registers. It also defines some functions to query those
sizes.

isl_surf_init() will use the new header to validate that requested
pitches fit in their destination bitfields.

What's currently in genX_bits.h:

  - Each CONTAINER::Field from gen*.xml that has a bitsize has a macro
in genX_bits.h:

#define GEN{N}_CONTAINER_Field_bits {bitsize}

  - For each set of macros whose name, after stripping the GEN prefix,
is the same, genX_bits.h contains a query function:

  static inline uint32_t __attribute__((pure))
  CONTAINER_Field_bits(const struct gen_device_info *devinfo);

v2 (Chad Versace):
  - Parse the XML instead of scraping the generated gen*_pack.h headers.

v3 (Dylan Baker):
  - Port to Mako.

v4 (Jason Ekstrand):
  - Make the _bits functions take a gen_device_info.

v5 (Chad Versace):
  - Fix autotools out-of-tree build.
  - Fix Android build. Tested with git://github.com/android-ia/manifest.
  - Fix macro names. They were all missing the "_bits" suffix.
  - Fix macros names more. Remove all double-underscores.
  - Unindent all generated code. (It was floating in a sea of whitespace).
  - Reformat header to appear human-written not machine-generated.
  - Sort gens from high to low. Newest gens should come first because,
when we read code, we likely want to read the gen8/9 code and ignore
the gen4 code. So put the gen4 code at the bottom.
  - Replace 'const' attributes with 'pure', because the functions now
have a pointer parameter.
  - Add --cpp-guard flag. Used by Android.
  - Kill class FieldCollection. After Jason's rewrite, it was just
a dict.

v6 (Chad Versace):
  - Replace `key not in d.keys()` with `key not in d`. [for dylan]

Co-authored-by: Dylan Baker 
Co-authored-by: Jason Ekstrand 
Reviewed-by: Jason Ekstrand  (v5)
Reviewed-by: Dylan Baker  (v6)

---

 src/intel/Android.genxml.mk |   9 +-
 src/intel/Makefile.genxml.am|   6 +-
 src/intel/Makefile.sources  |   6 +-
 src/intel/genxml/.gitignore |   1 +
 src/intel/genxml/gen_bits_header.py | 281 
 5 files changed, 300 insertions(+), 3 deletions(-)

diff --git a/src/intel/Android.genxml.mk b/src/intel/Android.genxml.mk
index 79de784380..842d0e13a3 100644
--- a/src/intel/Android.genxml.mk
+++ b/src/intel/Android.genxml.mk
@@ -46,9 +46,16 @@ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, 
$(GENXML_GENERATED_FIL
 define header-gen
@mkdir -p $(dir $@)
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
-   $(hide) $(PRIVATE_SCRIPT) $(PRIVATE_XML) > $@
+   $(hide) $(PRIVATE_SCRIPT) $(PRIVATE_SCRIPT_FLAGS) $(PRIVATE_XML) > $@
 endef
 
+$(intermediates)/genxml/genX_bits.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) 
$(LOCAL_PATH)/genxml/gen_bits_header.py
+$(intermediates)/genxml/genX_bits.h: PRIVATE_SCRIPT_FLAGS := 
--cpp-guard=GENX_BITS_H
+$(intermediates)/genxml/genX_bits.h: PRIVATE_XML := $(addprefix 
$(LOCAL_PATH)/,$(GENXML_XML_FILES))
+$(intermediates)/genxml/genX_bits.h: $(LOCAL_PATH)/genxml/gen_bits_header.py
+$(intermediates)/genxml/genX_bits.h: $(addprefix 
$(LOCAL_PATH)/,$(GENXML_XML_FILES))
+   $(call header-gen)
+
 $(intermediates)/genxml/gen4_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) 
$(LOCAL_PATH)/genxml/gen_pack_header.py
 $(intermediates)/genxml/gen4_pack.h: PRIVATE_XML := 
$(LOCAL_PATH)/genxml/gen4.xml
 $(intermediates)/genxml/gen4_pack.h: $(LOCAL_PATH)/genxml/gen4.xml 
$(LOCAL_PATH)/genxml/gen_pack_header.py
diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index 01a02b63b4..474b751f5f 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -30,7 +30,7 @@ EXTRA_DIST += \
 
 SUFFIXES = _pack.h _xml.h .xml
 
-$(GENXML_GENERATED_FILES): genxml/gen_pack_header.py
+$(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
 
 .xml_pack.h:
$(MKDIR_GEN)
@@ -42,6 +42,10 @@ $(AUBINATOR_GENERATED_FILES): genxml/gen_zipped_file.py
$(MKDIR_GEN)
$(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py $< > $@ || 
($(RM) $@; false)
 
+genxml/genX_bits.h: genxml/gen_bits_header.py $(GENXML_XML_FILES)
+   $(MKDIR_GEN)
+   $(PYTHON_GEN) $< -o $@ $(addprefix $(srcdir)/,$(GENXML_XML_FILES))
+
 EXTRA_DIST += \
genxml/genX_pack.h \
genxml/gen_macros.h \
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 88bcf60f6e..c56891643c 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -119,7 +119,7 @@ 

Mesa (master): intel: Fix requests for exact surface row pitch (v2)

2017-03-28 Thread Chad Versace
Module: Mesa
Branch: master
Commit: 6cbc13d94c40f875926b8fd2129852759f314d14
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cbc13d94c40f875926b8fd2129852759f314d14

Author: Chad Versace 
Date:   Fri Feb 24 17:15:43 2017 -0800

intel: Fix requests for exact surface row pitch (v2)

All callers of isl_surf_init() that set 'min_row_pitch' wanted to
request an *exact* row pitch, as evidenced by nearby asserts, but isl
lacked API for doing so. Now that isl has an API for that, update the
code to use it.

v2: Assert that isl_surf_init() succeeds because the callers assume
it.  [for jekstrand]

Reviewed-by: Nanley Chery  (v1)
Reviewed-by: Anuj Phogat  (v1)
Reviewed-by: Jason Ekstrand  (v2)

---

 src/intel/blorp/blorp_blit.c |  8 +---
 src/intel/vulkan/anv_blorp.c | 29 +++--
 src/intel/vulkan/anv_image.c |  2 +-
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 280b76ab70..691564c878 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -1375,6 +1375,8 @@ static void
 surf_convert_to_single_slice(const struct isl_device *isl_dev,
  struct brw_blorp_surface_info *info)
 {
+   bool ok UNUSED;
+
/* Just bail if we have nothing to do. */
if (info->surf.dim == ISL_SURF_DIM_2D &&
info->view.base_level == 0 && info->view.base_array_layer == 0 &&
@@ -1421,13 +1423,13 @@ surf_convert_to_single_slice(const struct isl_device 
*isl_dev,
   .levels = 1,
   .array_len = 1,
   .samples = info->surf.samples,
-  .min_pitch = info->surf.row_pitch,
+  .row_pitch = info->surf.row_pitch,
   .usage = info->surf.usage,
   .tiling_flags = 1 << info->surf.tiling,
};
 
-   isl_surf_init_s(isl_dev, >surf, _info);
-   assert(info->surf.row_pitch == init_info.min_pitch);
+   ok = isl_surf_init_s(isl_dev, >surf, _info);
+   assert(ok);
 
/* The view is also different now. */
info->view.base_level = 0;
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 9b3910f1b0..16f1692ff5 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -133,6 +133,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
 {
const struct isl_format_layout *fmtl =
   isl_format_get_layout(format);
+   bool ok UNUSED;
 
/* ASTC is the only format which doesn't support linear layouts.
 * Create an equivalently sized surface with ISL to get around this.
@@ -155,20 +156,20 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
   },
};
 
-   isl_surf_init(>isl_dev, isl_surf,
- .dim = ISL_SURF_DIM_2D,
- .format = format,
- .width = width,
- .height = height,
- .depth = 1,
- .levels = 1,
- .array_len = 1,
- .samples = 1,
- .min_pitch = row_pitch,
- .usage = ISL_SURF_USAGE_TEXTURE_BIT |
-  ISL_SURF_USAGE_RENDER_TARGET_BIT,
- .tiling_flags = ISL_TILING_LINEAR_BIT);
-   assert(isl_surf->row_pitch == row_pitch);
+   ok = isl_surf_init(>isl_dev, isl_surf,
+ .dim = ISL_SURF_DIM_2D,
+ .format = format,
+ .width = width,
+ .height = height,
+ .depth = 1,
+ .levels = 1,
+ .array_len = 1,
+ .samples = 1,
+ .row_pitch = row_pitch,
+ .usage = ISL_SURF_USAGE_TEXTURE_BIT |
+  ISL_SURF_USAGE_RENDER_TARGET_BIT,
+ .tiling_flags = ISL_TILING_LINEAR_BIT);
+   assert(ok);
 }
 
 static void
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 33499abca1..cf34dbe3b0 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -166,7 +166,7 @@ make_surface(const struct anv_device *dev,
   .array_len = vk_info->arrayLayers,
   .samples = vk_info->samples,
   .min_alignment = 0,
-  .min_pitch = anv_info->stride,
+  .row_pitch = anv_info->stride,
   .usage = choose_isl_surf_usage(image->usage, aspect),
   .tiling_flags = tiling_flags);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): isl: Drop unused isl_surf_init_info::min_pitch

2017-03-28 Thread Chad Versace
Module: Mesa
Branch: master
Commit: d1032a047b5f8ef29a1175192436f4a2291e6ff6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d1032a047b5f8ef29a1175192436f4a2291e6ff6

Author: Chad Versace 
Date:   Fri Mar 10 13:58:13 2017 -0800

isl: Drop unused isl_surf_init_info::min_pitch

Reviewed-by: Nanley Chery 
Reviewed-by: Anuj Phogat 
Reviewed-by: Jason Ekstrand 

---

 src/intel/isl/isl.c | 13 +++--
 src/intel/isl/isl.h |  3 ---
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 98a1152c28..c7072d0902 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1043,11 +1043,7 @@ isl_calc_linear_min_row_pitch(const struct isl_device 
*dev,
 
assert(phys_slice0_sa->w % fmtl->bw == 0);
 
-   uint32_t min_row_pitch = bs * (phys_slice0_sa->w / fmtl->bw);
-   min_row_pitch = MAX2(min_row_pitch, info->min_pitch);
-   min_row_pitch = isl_align_npot(min_row_pitch, alignment);
-
-   return min_row_pitch;
+   return isl_align_npot(bs * (phys_slice0_sa->w / fmtl->bw), alignment);
 }
 
 static uint32_t
@@ -1068,11 +1064,8 @@ isl_calc_tiled_min_row_pitch(const struct isl_device 
*dev,
   isl_align_div(total_w_el * tile_el_scale,
 tile_info->logical_extent_el.width);
 
-   uint32_t min_row_pitch = total_w_tl * tile_info->phys_extent_B.width;
-   min_row_pitch = MAX2(min_row_pitch, surf_info->min_pitch);
-   min_row_pitch = isl_align_npot(min_row_pitch, alignment);
-
-   return min_row_pitch;
+   assert(alignment == tile_info->phys_extent_B.width);
+   return total_w_tl * tile_info->phys_extent_B.width;
 }
 
 static uint32_t
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 012be7b98e..17b52cf2f4 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -810,9 +810,6 @@ struct isl_surf_init_info {
/** Lower bound for isl_surf::alignment, in bytes. */
uint32_t min_alignment;
 
-   /** Lower bound for isl_surf::pitch, in bytes. */
-   uint32_t min_pitch;
-
/**
 * Exact value for isl_surf::row_pitch. Ignored if zero.  isl_surf_init()
 * will fail if this is misaligned or out of bounds.

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): isl: Let isl_surf_init's caller set the exact row pitch (v2 )

2017-03-28 Thread Chad Versace
Module: Mesa
Branch: master
Commit: e9017d58dcd0117c67788f7e2084b09f5d47a279
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9017d58dcd0117c67788f7e2084b09f5d47a279

Author: Chad Versace 
Date:   Fri Feb 24 16:30:13 2017 -0800

isl: Let isl_surf_init's caller set the exact row pitch (v2)

The caller does so by setting the new field
isl_surf_init_info::row_pitch.

v2: Validate the requested row_pitch.

Reviewed-by: Jason Ekstrand  (v2)

---

 src/intel/isl/isl.c | 14 +-
 src/intel/isl/isl.h |  6 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 749fcdf46b..98a1152c28 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1118,10 +1118,22 @@ isl_calc_row_pitch(const struct isl_device *dev,
const uint32_t alignment =
   isl_calc_row_pitch_alignment(surf_info, tile_info);
 
-   const uint32_t row_pitch =
+   const uint32_t min_row_pitch =
   isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_slice0_sa,
  alignment);
 
+   uint32_t row_pitch = min_row_pitch;
+
+   if (surf_info->row_pitch != 0) {
+  row_pitch = surf_info->row_pitch;
+
+  if (row_pitch < min_row_pitch)
+ return false;
+
+  if (row_pitch % alignment != 0)
+ return false;
+   }
+
const uint32_t row_pitch_tiles = row_pitch / tile_info->phys_extent_B.width;
 
if (row_pitch == 0)
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 9d92906ca7..012be7b98e 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -813,6 +813,12 @@ struct isl_surf_init_info {
/** Lower bound for isl_surf::pitch, in bytes. */
uint32_t min_pitch;
 
+   /**
+* Exact value for isl_surf::row_pitch. Ignored if zero.  isl_surf_init()
+* will fail if this is misaligned or out of bounds.
+*/
+   uint32_t row_pitch;
+
isl_surf_usage_flags_t usage;
 
/** Flags that alter how ISL selects isl_surf::tiling.  */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer common] Use C++ thread_local keyword

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 1c7224c85fddcbac64ee5a6595ec8608b4f00437
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c7224c85fddcbac64ee5a6595ec8608b4f00437

Author: Tim Rowley 
Date:   Wed Mar 22 18:55:13 2017 -0500

swr: [rasterizer common] Use C++ thread_local keyword

Allows use of thread_local objects with constructors.

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/common/os.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h 
b/src/gallium/drivers/swr/rasterizer/common/os.h
index 28e7ff54f9..71c4da3a59 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -47,7 +47,6 @@
 #endif
 
 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
-#define THREAD __declspec(thread)
 #define INLINE __forceinline
 #define DEBUGBREAK __debugbreak()
 
@@ -108,7 +107,6 @@ typedef unsigned intDWORD;
 #define MAX_PATH PATH_MAX
 
 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
-#define THREAD __thread
 #ifndef INLINE
 #define INLINE __inline
 #endif
@@ -242,6 +240,8 @@ pid_t gettid(void);
 
 #endif
 
+#define THREAD thread_local
+
 // Universal types
 typedef uint8_t KILOBYTE[1024];
 typedef KILOBYTEMEGABYTE[1024];

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer core] SIMD16 Frontend WIP

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 549b9d2e9f1547af3fb061a7956b04fb30870a6d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=549b9d2e9f1547af3fb061a7956b04fb30870a6d

Author: Tim Rowley 
Date:   Mon Mar 20 12:17:07 2017 -0500

swr: [rasterizer core] SIMD16 Frontend WIP

Fix GS and streamout.

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/core/clip.h | 61 ++
 .../drivers/swr/rasterizer/core/frontend.cpp   | 97 +-
 2 files changed, 136 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h 
b/src/gallium/drivers/swr/rasterizer/core/clip.h
index eec65707e7..3a79d6a34c 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -376,7 +376,16 @@ public:
 const simdscalar vMask = _mm256_set_ps(0, -1, -1, -1, -1, -1, -1, -1);
 
 uint32_t numClippedPrims = 0;
+#if USE_SIMD16_FRONTEND
+const uint32_t numPrims = pa.NumPrims();
+const uint32_t numPrims_lo = std::min(numPrims, 
KNOB_SIMD_WIDTH);
+
+SWR_ASSERT(numPrims <= numPrims_lo);
+
+for (uint32_t inputPrim = 0; inputPrim < numPrims_lo; ++inputPrim)
+#else
 for (uint32_t inputPrim = 0; inputPrim < pa.NumPrims(); ++inputPrim)
+#endif
 {
 uint32_t numEmittedVerts = pVertexCount[inputPrim];
 if (numEmittedVerts < NumVertsPerPrim)
@@ -391,13 +400,28 @@ public:
 // tranpose clipper output so that each lane's vertices are in 
SIMD order
 // set aside space for 2 vertices, as the PA will try to read up 
to 16 verts
 // for triangle fan
+#if USE_SIMD16_FRONTEND
+simd16vertex transposedPrims[2];
+#else
 simdvertex transposedPrims[2];
+#endif
 
 // transpose pos
 uint8_t* pBase = 
(uint8_t*)([0].attrib[VERTEX_POSITION_SLOT]) + sizeof(float) * 
inputPrim;
+
+#if USE_SIMD16_FRONTEND
+// TEMPORARY WORKAROUND for bizarre VS2015 code-gen bug - use 
dx11_clipping_03-09 failures to check for existence of bug
+static const float *dummy = reinterpret_cast(pBase);
+#endif
+
 for (uint32_t c = 0; c < 4; ++c)
 {
+#if USE_SIMD16_FRONTEND
+simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(), 
(const float *)pBase, vOffsets, vMask, 1);
+transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = 
_simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+#else
 transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = 
_simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, 
vMask, 1);
+#endif
 pBase += sizeof(simdscalar);
 }
 
@@ -408,7 +432,12 @@ public:
 uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
 for (uint32_t c = 0; c < 4; ++c)
 {
+#if USE_SIMD16_FRONTEND
+simdscalar temp = 
_simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, 
vMask, 1);
+transposedPrims[0].attrib[attribSlot][c] = 
_simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+#else
 transposedPrims[0].attrib[attribSlot][c] = 
_simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, 
vMask, 1);
+#endif
 pBase += sizeof(simdscalar);
 }
 }
@@ -419,7 +448,12 @@ public:
 pBase = 
(uint8_t*)([0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT]) + sizeof(float) * 
inputPrim;
 for (uint32_t c = 0; c < 4; ++c)
 {
+#if USE_SIMD16_FRONTEND
+simdscalar temp = 
_simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, 
vMask, 1);
+transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] 
= _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+#else
 transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_LO_SLOT][c] 
= _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, 
vMask, 1);
+#endif
 pBase += sizeof(simdscalar);
 }
 }
@@ -429,7 +463,12 @@ public:
 pBase = 
(uint8_t*)([0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT]) + sizeof(float) * 
inputPrim;
 for (uint32_t c = 0; c < 4; ++c)
 {
+#if USE_SIMD16_FRONTEND
+simdscalar temp = 
_simd_mask_i32gather_ps(_simd_setzero_ps(), (const float *)pBase, vOffsets, 
vMask, 1);
+transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] 
= _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+#else
 transposedPrims[0].attrib[VERTEX_CLIPCULL_DIST_HI_SLOT][c] 
= _simd_mask_i32gather_ps(_mm256_undefined_ps(), (const float*)pBase, vOffsets, 
vMask, 1);
+#endif
 pBase += 

Mesa (master): swr: [rasterizer codegen] Refactor codegen

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: fee3fc018b274af2913c2a9aa17e024b8eb293d0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fee3fc018b274af2913c2a9aa17e024b8eb293d0

Author: Tim Rowley 
Date:   Fri Mar 17 12:39:15 2017 -0500

swr: [rasterizer codegen] Refactor codegen

Move common codegen functions into gen_common.py.

v2: change gen_knobs.py to find the template file internally, like
the rest of the gen scripts.

Reviewed-by: Bruce Cherniak 

---

 src/gallium/drivers/swr/Makefile.am|  20 ++-
 .../drivers/swr/rasterizer/codegen/gen_archrast.py |  30 +---
 .../drivers/swr/rasterizer/codegen/gen_backends.py |  30 +---
 .../drivers/swr/rasterizer/codegen/gen_common.py   | 162 +
 .../drivers/swr/rasterizer/codegen/gen_knobs.py|  64 +++-
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  35 +
 .../swr/rasterizer/codegen/gen_llvm_types.py   |  32 +---
 7 files changed, 215 insertions(+), 158 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 8ba9ac93da..515a9089cc 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -71,30 +71,28 @@ gen_swr_context_llvm.h: 
rasterizer/codegen/gen_llvm_types.py rasterizer/codegen/
--input $(srcdir)/swr_context.h \
--output ./gen_swr_context_llvm.h
 
-rasterizer/codegen/gen_knobs.cpp: rasterizer/codegen/gen_knobs.py 
rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp
+rasterizer/codegen/gen_knobs.cpp: rasterizer/codegen/gen_knobs.py 
rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp 
rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_knobs.py \
-   --input $(srcdir)/rasterizer/codegen/templates/gen_knobs.cpp \
--output rasterizer/codegen/gen_knobs.cpp \
--gen_cpp
 
-rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py 
rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp
+rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py 
rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp 
rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_knobs.py \
-   --input $(srcdir)/rasterizer/codegen/templates/gen_knobs.cpp \
--output rasterizer/codegen/gen_knobs.h \
--gen_h
 
-rasterizer/jitter/gen_state_llvm.h: rasterizer/codegen/gen_llvm_types.py 
rasterizer/codegen/templates/gen_llvm.hpp rasterizer/core/state.h
+rasterizer/jitter/gen_state_llvm.h: rasterizer/codegen/gen_llvm_types.py 
rasterizer/codegen/templates/gen_llvm.hpp rasterizer/core/state.h 
rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_llvm_types.py \
--input $(srcdir)/rasterizer/core/state.h \
--output rasterizer/jitter/gen_state_llvm.h
 
-rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp
+rasterizer/jitter/gen_builder.hpp: rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \
@@ -102,14 +100,14 @@ rasterizer/jitter/gen_builder.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py rast
--output rasterizer/jitter \
--gen_h
 
-rasterizer/jitter/gen_builder_x86.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp
+rasterizer/jitter/gen_builder_x86.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \
--output rasterizer/jitter \
--gen_x86_h
 
-rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py 
rasterizer/codegen/templates/gen_ar_event.hpp rasterizer/archrast/events.proto
+rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py 
rasterizer/codegen/templates/gen_ar_event.hpp rasterizer/archrast/events.proto 
rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_archrast.py \
@@ -117,7 +115,7 @@ rasterizer/archrast/gen_ar_event.hpp: 
rasterizer/codegen/gen_archrast.py rasteri
--output rasterizer/archrast/gen_ar_event.hpp \
--gen_event_h
 
-rasterizer/archrast/gen_ar_event.cpp: rasterizer/codegen/gen_archrast.py 
rasterizer/codegen/templates/gen_ar_event.cpp 

Mesa (master): swr: [rasterizer core] Don't bind single-threaded contexts

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: aea737e12e186091507d33b874ec101da0f69c5e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aea737e12e186091507d33b874ec101da0f69c5e

Author: Tim Rowley 
Date:   Tue Mar 21 16:52:49 2017 -0500

swr: [rasterizer core] Don't bind single-threaded contexts

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/core/threads.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index ea29f66c88..e3ad2585c0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -274,7 +274,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, 
uint32_t& out_numThread
 void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId 
= 0, bool bindProcGroup=false)
 {
 // Only bind threads when MAX_WORKER_THREADS isn't set.
-if (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false)
+if (pContext->threadInfo.SINGLE_THREADED || 
(pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
 {
 return;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer core] Disable inline function expansion

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 3974cfea2531b86fb394d8501b106e69f00c5f89
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3974cfea2531b86fb394d8501b106e69f00c5f89

Author: Tim Rowley 
Date:   Wed Mar 22 19:20:42 2017 -0500

swr: [rasterizer core] Disable inline function expansion

Disable expansion in windows Debug builds.

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/common/os.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h 
b/src/gallium/drivers/swr/rasterizer/common/os.h
index 71c4da3a59..ef00a255d3 100644
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -47,7 +47,19 @@
 #endif
 
 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
+
+#if defined(_DEBUG)
+// We compile Debug builds with inline function expansion enabled.  This allows
+// functions compiled with __forceinline to be inlined even in Debug builds.
+// The inline_depth(0) pragma below will disable inline function expansion for
+// normal INLINE / inline functions, but not for __forceinline functions.
+// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
+// Debug builds.
+#define INLINE inline
+#pragma inline_depth(0)
+#else
 #define INLINE __forceinline
+#endif
 #define DEBUGBREAK __debugbreak()
 
 #define PRAGMA_WARNING_PUSH_DISABLE(...) \

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer core] Enable SIMD16

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 4cd0b1bb2c284609d2ac3413456b29f1a3e42d10
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4cd0b1bb2c284609d2ac3413456b29f1a3e42d10

Author: Tim Rowley 
Date:   Tue Mar 21 15:32:34 2017 -0500

swr: [rasterizer core] Enable SIMD16

Make the AVX512 insert/extract intrinsics KNL-compatible

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/common/simd16intrin.h | 14 +++---
 src/gallium/drivers/swr/rasterizer/core/knobs.h  |  2 +-
 src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp   | 12 
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h 
b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 88814a58aa..3b43d510e6 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -853,10 +853,10 @@ INLINE simd16scalari _simd16_set_epi32(int e7, int e6, 
int e5, int e4, int e3, i
 #define _simd16_broadcast_ps(m) _mm512_extload_ps(m, _MM_UPCONV_PS_NONE, 
_MM_BROADCAST_4X16, 0)
 #define _simd16_store_ps_mm512_store_ps
 #define _simd16_store_si_mm512_store_si512
-#define _simd16_extract_ps  _mm512_extractf32x8_ps
-#define _simd16_extract_si  _mm512_extracti32x8_epi32
-#define _simd16_insert_ps   _mm512_insertf32x8
-#define _simd16_insert_si   _mm512_inserti32x8
+#define _simd16_extract_ps(a, imm8) 
_mm256_castsi256_ps(_mm512_extracti64x4_epi64(_mm512_castps_si512(a), imm8))
+#define _simd16_extract_si  _mm512_extracti64x4_epi64
+#define _simd16_insert_ps(a, b, imm8)  
_mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castps_si512(a), 
_mm256_castps_si256(b), imm8))
+#define _simd16_insert_si   _mm512_inserti64x4
 
 INLINE void _simd16_maskstore_ps(float *m, simd16scalari mask, simd16scalar a)
 {
@@ -871,21 +871,21 @@ INLINE simd16scalar _simd16_blendv_ps(simd16scalar a, 
simd16scalar b, const simd
 {
 simd16mask k = _simd16_scalari2mask(_mm512_castps_si512(mask));
 
-_mm512_mask_blend_ps(k, a, b);
+return _mm512_mask_blend_ps(k, a, b);
 }
 
 INLINE simd16scalari _simd16_blendv_epi32(simd16scalari a, simd16scalari b, 
const simd16scalar mask)
 {
 simd16mask k = _simd16_scalari2mask(_mm512_castps_si512(mask));
 
-_mm512_mask_blend_epi32(k, a, b);
+return _mm512_mask_blend_epi32(k, a, b);
 }
 
 INLINE simd16scalari _simd16_blendv_epi32(simd16scalari a, simd16scalari b, 
const simd16scalari mask)
 {
 simd16mask k = _simd16_scalari2mask(mask);
 
-_mm512_mask_blend_epi32(k, a, b);
+return _mm512_mask_blend_epi32(k, a, b);
 }
 
 #define _simd16_mul_ps  _mm512_mul_ps
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h 
b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index 8e54f90526..7928f5d6d7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -38,7 +38,7 @@
 // AVX512 Support
 ///
 
-#define ENABLE_AVX512_SIMD160
+#define ENABLE_AVX512_SIMD161
 #define USE_8x2_TILE_BACKEND0
 #define USE_SIMD16_FRONTEND 0
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp 
b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
index 297f23a88c..511a1fc0df 100644
--- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
@@ -1297,7 +1297,19 @@ bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, 
simd16vector verts[])
 
 bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
 {
+#if USE_SIMD16_FRONTEND
 const simd16vector  = pa.leadingVertex.attrib[slot];
+#else
+simd16vector a;
+
+{
+for (uint32_t i = 0; i < 4; i += 1)
+{
+a[i] = _simd16_insert_ps(_simd16_setzero_ps(), 
pa.leadingVertex.attrib[slot][i], 0);
+}
+}
+
+#endif
 const simd16vector  = PaGetSimdVector_simd16(pa, pa.prev, slot);
 const simd16vector  = PaGetSimdVector_simd16(pa, pa.cur, slot);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer jitter] Clean up EngineBuilder construction

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: ec51e8ecfea9d81313192fcd25f9767f8203a9ca
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec51e8ecfea9d81313192fcd25f9767f8203a9ca

Author: Tim Rowley 
Date:   Mon Mar 20 19:44:49 2017 -0500

swr: [rasterizer jitter] Clean up EngineBuilder construction

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 79118f5f65..bdb8a52e2f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -106,16 +106,10 @@ JitManager::JitManager(uint32_t simdWidth, const char 
*arch, const char* core)
 std::unique_ptr newModule(new Module(fnName.str(), mContext));
 mpCurrentModule = newModule.get();
 
-auto & = EngineBuilder(std::move(newModule));
-EB.setTargetOptions(tOpts);
-EB.setOptLevel(CodeGenOpt::Aggressive);
-
 StringRef hostCPUName;
 
 hostCPUName = sys::getHostCPUName();
 
-EB.setMCPU(hostCPUName);
-
 #if defined(_WIN32)
 // Needed for MCJIT on windows
 Triple hostTriple(sys::getProcessTriple());
@@ -123,7 +117,11 @@ JitManager::JitManager(uint32_t simdWidth, const char 
*arch, const char* core)
 mpCurrentModule->setTargetTriple(hostTriple.getTriple());
 #endif // _WIN32
 
-mpExec = EB.create();
+mpExec = EngineBuilder(std::move(newModule))
+.setTargetOptions(tOpts)
+.setOptLevel(CodeGenOpt::Aggressive)
+.setMCPU(hostCPUName)
+.create();
 
 #if LLVM_USE_INTEL_JITEVENTS
 JITEventListener *vTune = JITEventListener::createIntelJITEventListener();

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): swr: [rasterizer core] SIMD16 Frontend WIP

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: aee5276375d79f5d73680d6038a1fd838894679a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aee5276375d79f5d73680d6038a1fd838894679a

Author: Tim Rowley 
Date:   Wed Mar 22 12:36:49 2017 -0500

swr: [rasterizer core] SIMD16 Frontend WIP

Implement widened clipper and binner interfaces for SIMD16.

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/core/api.cpp|  24 
 src/gallium/drivers/swr/rasterizer/core/binner.cpp | 154 +
 src/gallium/drivers/swr/rasterizer/core/clip.cpp   | 131 ++
 src/gallium/drivers/swr/rasterizer/core/clip.h |   6 +
 src/gallium/drivers/swr/rasterizer/core/context.h  |   3 +
 .../drivers/swr/rasterizer/core/frontend.cpp   | 115 +--
 src/gallium/drivers/swr/rasterizer/core/frontend.h |   7 +
 src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp |  12 ++
 8 files changed, 371 insertions(+), 81 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp 
b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index bd63796d13..dabd0616d3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -839,11 +839,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
 }
 
 PFN_PROCESS_PRIMS pfnBinner;
+#if USE_SIMD16_FRONTEND
+PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16;
+#endif
 switch (pState->state.topology)
 {
 case TOP_POINT_LIST:
 pState->pfnProcessPrims = ClipPoints;
 pfnBinner = BinPoints;
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = ClipPoints_simd16;
+pfnBinner_simd16 = BinPoints_simd16;
+#endif
 break;
 case TOP_LINE_LIST:
 case TOP_LINE_STRIP:
@@ -852,10 +859,18 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
 case TOP_LISTSTRIP_ADJ:
 pState->pfnProcessPrims = ClipLines;
 pfnBinner = BinLines;
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = ClipLines_simd16;
+pfnBinner_simd16 = BinLines_simd16;
+#endif
 break;
 default:
 pState->pfnProcessPrims = ClipTriangles;
 pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = ClipTriangles_simd16;
+pfnBinner_simd16 = 
GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0));
+#endif
 break;
 };
 
@@ -864,6 +879,9 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
 if (pState->state.frontendState.vpTransformDisable)
 {
 pState->pfnProcessPrims = pfnBinner;
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = pfnBinner_simd16;
+#endif
 }
 
 if ((pState->state.psState.pfnPixelShader == nullptr) &&
@@ -874,11 +892,17 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
 (pState->state.backendState.numAttributes == 0))
 {
 pState->pfnProcessPrims = nullptr;
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = nullptr;
+#endif
 }
 
 if (pState->state.soState.rasterizerDisable == true)
 {
 pState->pfnProcessPrims = nullptr;
+#if USE_SIMD16_FRONTEND
+pState->pfnProcessPrims_simd16 = nullptr;
+#endif
 }
 
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp 
b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index 490a86804f..63eab33ac0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -856,6 +856,58 @@ endBinTriangles:
 AR_END(FEBinTriangles, 1);
 }
 
+#if USE_SIMD16_FRONTEND
+inline uint32_t GetPrimMaskLo(uint32_t primMask)
+{
+return primMask & 255;
+}
+
+inline uint32_t GetPrimMaskHi(uint32_t primMask)
+{
+return (primMask >> 8) & 255;
+}
+
+template 
+void BinTriangles_simd16(
+DRAW_CONTEXT *pDC,
+PA_STATE& pa,
+uint32_t workerId,
+simd16vector tri[3],
+uint32_t triMask,
+simd16scalari primID,
+simd16scalari viewportIdx)
+{
+enum { VERTS_PER_PRIM = 3 };
+
+simdvector verts[VERTS_PER_PRIM];
+
+for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
+{
+for (uint32_t j = 0; j < 4; j += 1)
+{
+verts[i][j] = _simd16_extract_ps(tri[i][j], 0);
+}
+}
+
+pa.useAlternateOffset = false;
+BinTriangles(pDC, pa, workerId, verts, GetPrimMaskLo(triMask), 
_simd16_extract_si(primID, 0), _simd16_extract_si(viewportIdx, 0));
+
+if (GetPrimMaskHi(triMask))
+{
+for (uint32_t i = 0; i < VERTS_PER_PRIM; i += 1)
+{
+for (uint32_t j = 0; j < 4; j += 1)
+{
+verts[i][j] = _simd16_extract_ps(tri[i][j], 1);
+}
+}
+
+pa.useAlternateOffset = true;
+BinTriangles(pDC, pa, workerId, verts, GetPrimMaskHi(triMask), 
_simd16_extract_si(primID, 1), _simd16_extract_si(viewportIdx, 1));
+}
+}
+
+#endif
 struct 

Mesa (master): swr: [rasterizer codegen] add cmdline to archrast gen files

2017-03-28 Thread Tim Rowley
Module: Mesa
Branch: master
Commit: 89b83f4b1e8b51cc23f7a998b81b486cf39f8d86
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=89b83f4b1e8b51cc23f7a998b81b486cf39f8d86

Author: Tim Rowley 
Date:   Mon Mar 20 17:39:41 2017 -0500

swr: [rasterizer codegen] add cmdline to archrast gen files

Reviewed-by: George Kyriazis 

---

 src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py   | 4 
 .../drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp| 3 +++
 .../drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp| 3 +++
 .../drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp | 5 -
 .../swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp | 5 -
 5 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
index efe42bb8db..06a3dea4ad 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
@@ -155,6 +155,7 @@ def main():
 output_fullpath = os.sep.join([output_dir, output_filename])
 
 MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
 filename=output_filename,
 protos=protos)
 
@@ -165,6 +166,7 @@ def main():
 output_fullpath = os.sep.join([output_dir, output_filename])
 
 MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
 filename=output_filename,
 protos=protos)
 
@@ -175,6 +177,7 @@ def main():
 output_fullpath = os.sep.join([output_dir, output_filename])
 
 MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
 filename=output_filename,
 event_header='gen_ar_event.hpp',
 protos=protos)
@@ -186,6 +189,7 @@ def main():
 output_fullpath = os.sep.join([output_dir, output_filename])
 
 MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
 filename=output_filename,
 event_header='gen_ar_eventhandler.hpp',
 protos=protos)
diff --git 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
index b743b2f3d2..d48fda61c2 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
@@ -26,6 +26,9 @@
 * 
 * DO NOT EDIT
 *
+* Generation Command Line:
+*  ${'\n*'.join(cmdline)}
+*
 **/
 #include "common/os.h"
 #include "gen_ar_event.hpp"
diff --git 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
index 68926ea805..e792f5f63e 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
@@ -25,6 +25,9 @@
 * @brief Definitions for events.  auto-generated file
 * 
 * DO NOT EDIT
+*
+* Generation Command Line:
+*  ${'\n*'.join(cmdline)}
 * 
 **/
 #pragma once
diff --git 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
index cfed2aded0..87d0ef47ca 100644
--- 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
+++ 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
@@ -25,7 +25,10 @@
 * @brief Event handler interface.  auto-generated file
 * 
 * DO NOT EDIT
-* 
+*
+* Generation Command Line:
+*  ${'\n*'.join(cmdline)}
+*
 **/
 #pragma once
 
diff --git 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
index 48ff0b0a95..3a618a124d 100644
--- 
a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
+++ 
b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
@@ -25,7 +25,10 @@
 * @brief Event handler interface.  auto-generated file
 * 
 * DO NOT EDIT
-* 
+*
+* Generation Command Line:
+*  ${'\n*'.join(cmdline)}
+*
 **/
 #pragma once
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org

Mesa (master): tests/cache_test: allow crossing mount points

2017-03-28 Thread Juan Antonio Suárez Romero
Module: Mesa
Branch: master
Commit: caa616ccc4384ea1479865e12b56cf816561a827
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=caa616ccc4384ea1479865e12b56cf816561a827

Author: Juan A. Suarez Romero 
Date:   Tue Mar 28 18:00:39 2017 +0200

tests/cache_test: allow crossing mount points

When using an overlayfs system (like a Docker container), rmrf_local()
fails because part of the files to be removed are in different mount
points (layouts). And thus cache-test fails.

Letting crossing mount points is not a big problem, specially because
this is just for a test, not to be used in real code.

Reviewed-by: Nicolai Hähnle 

---

 src/compiler/glsl/tests/cache_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/tests/cache_test.c 
b/src/compiler/glsl/tests/cache_test.c
index 537a81bc76..bec1d240e9 100644
--- a/src/compiler/glsl/tests/cache_test.c
+++ b/src/compiler/glsl/tests/cache_test.c
@@ -124,7 +124,7 @@ rmrf_local(const char *path)
if (path == NULL || *path == '\0' || *path != '.')
   return -1;
 
-   return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS | FTW_MOUNT);
+   return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS);
 }
 
 static void

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): 23 new commits

2017-03-28 Thread Emil Velikov
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0f9a0cb5f55b432f58c9adbb9b1c63c748d1dfd0
Author: Emil Velikov 
Date:   Tue Feb 28 13:29:06 2017 +

glcpp/tests/glcpp-test-cr-lf: error out if we cannot find any tests

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8096b75aa15fdda8433c2c8614ca0bf5de2c150
Author: Emil Velikov 
Date:   Tue Feb 28 13:24:55 2017 +

glcpp/tests/glcpp-test-cr-lf: correctly set/use srcdir/abs_builddir

Otherwise manual invokation of the script from elsewhere than
`dirname $0` will fail.

With these all the artefacts should be created in the correct location,
and thus we can remove the old (and slighly strange) clean-local line.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf77cdce839a06097b5f995118261eb98285ffc7
Author: Emil Velikov 
Date:   Tue Feb 28 12:13:58 2017 +

glcpp/tests: update testname in help string

Rather than hardcoding glcpp/other use `basename "$0"` which expands
appropriatelly.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4ea4fbf93a5a2229af3d48dc7fb23a43c90adb7f
Author: Emil Velikov 
Date:   Tue Feb 28 12:10:41 2017 +

glcpp/tests/glcpp-test: error out if we cannot find any tests

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=182d48ceb9e58eb53b52436b2cd6010de072d29b
Author: Emil Velikov 
Date:   Tue Feb 28 12:08:52 2017 +

glcpp/tests/glcpp-test: print only the test basename

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=addf62946d6c73885dd261099cddc06d3c910f17
Author: Emil Velikov 
Date:   Tue Feb 28 12:02:35 2017 +

glcpp/tests/glcpp-test: set srcdir/abs_builddir variables

Current definitions work fine for the manual invokation of the script,
although the whole script does not consider that one can run it OOT.

The latter will be handled with latter patches, although it will be
extensively using the two variables.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee8aea35725e4b582ed8af2866d0feffa2d13c6e
Author: Emil Velikov 
Date:   Mon Feb 27 18:58:06 2017 +

glsl/tests/optimization-test: 'echo' only folders which has generators

The current "let's print any folder which exists" is simply confusing.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=79a95f19e64a1d2f855e3f8194b86dc0b2a78c3f
Author: Emil Velikov 
Date:   Mon Feb 27 18:56:38 2017 +

glsl/tests/optimization-test: print only the test basedir/name

The relative/absolute path brings little to no benefit in being printed
as testname. Trim it out.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=33cd136fa267a44931b8f0230c5d68259ebec2d5
Author: Emil Velikov 
Date:   Sun Feb 26 20:43:05 2017 +

glsl/tests/optimization-test: error if zero tests were executed

We don't want to lie ourselves that 'everything is fine' when no tests
were found/ran.

Signed-off-by: Emil Velikov 
Acked-by: Kenneth Graunke 
Reviewed-by: Eric Engestrom 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=421115a72939b7dbcdc9f714d85f3e7616323a3e
Author: Emil Velikov 
Date:   Sun Feb 

Mesa (master): st/va: remove assert for single slice

2017-03-28 Thread Christian König
Module: Mesa
Branch: master
Commit: 3472be2bfd8b9cbc931342cc99d0e1abdc48350b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3472be2bfd8b9cbc931342cc99d0e1abdc48350b

Author: Nayan Deshmukh 
Date:   Tue Mar 21 14:02:27 2017 +0530

st/va: remove assert for single slice

we anyway allow for multiple slices

v2: do not remove assert to check for buf->size

Signed-off-by: Nayan Deshmukh 
Reviewed-by: Christian König 

---

 src/gallium/state_trackers/va/picture_mpeg12.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/va/picture_mpeg12.c 
b/src/gallium/state_trackers/va/picture_mpeg12.c
index 812e9e5b2a..1e5a9c7428 100644
--- a/src/gallium/state_trackers/va/picture_mpeg12.c
+++ b/src/gallium/state_trackers/va/picture_mpeg12.c
@@ -81,6 +81,6 @@ void vlVaHandleIQMatrixBufferMPEG12(vlVaContext *context, 
vlVaBuffer *buf)
 
 void vlVaHandleSliceParameterBufferMPEG12(vlVaContext *context, vlVaBuffer 
*buf)
 {
-   assert(buf->size >= sizeof(VASliceParameterBufferMPEG2) && 
buf->num_elements == 1);
+   assert(buf->size >= sizeof(VASliceParameterBufferMPEG2));
context->desc.mpeg12.num_slices += buf->num_elements;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radeonsi: CP DMA clear supports unaligned destination addresses

2017-03-28 Thread Nicolai Hähnle
Module: Mesa
Branch: master
Commit: f0d9af772e0fbb5854fc8293186a70ea3721748a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0d9af772e0fbb5854fc8293186a70ea3721748a

Author: Nicolai Hähnle 
Date:   Mon Feb 13 13:08:52 2017 +0100

radeonsi: CP DMA clear supports unaligned destination addresses

Reviewed-by: Marek Olšák 

---

 src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b40f5cc587..0cf7b3b3cb 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -197,7 +197,7 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
   offset + size);
 
/* Fallback for unaligned clears. */
-   if (offset % 4 != 0 || size % 4 != 0) {
+   if (size % 4 != 0) {
uint8_t *map = r600_buffer_map_sync_with_rings(>b, rdst,
   
PIPE_TRANSFER_WRITE);
map += offset;
@@ -211,6 +211,7 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
/* dma_clear_buffer can use clear_buffer on failure. Make sure that
 * doesn't happen. We don't want an infinite recursion: */
if (sctx->b.dma.cs &&
+   (offset % 4 == 0) &&
/* CP DMA is very slow. Always use SDMA for big clears. This
 * alone improves DeusEx:MD performance by 70%. */
(size > 128 * 1024 ||

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radeonsi: use DMA for clears with unaligned size

2017-03-28 Thread Nicolai Hähnle
Module: Mesa
Branch: master
Commit: 21ba6543be0c979c5f5ae10e2623ba697292dccc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=21ba6543be0c979c5f5ae10e2623ba697292dccc

Author: Nicolai Hähnle 
Date:   Mon Feb 13 13:19:45 2017 +0100

radeonsi: use DMA for clears with unaligned size

Only a small tail needs to be uploaded manually.

This is only partly a performance measure (apps are expected to use
aligned access). Mostly it is preparation for sparse buffers, which the
old code would incorrectly have attempted to map directly.

Reviewed-by: Marek Olšák 

---

 src/gallium/drivers/radeonsi/si_cp_dma.c | 46 +++-
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 0cf7b3b3cb..812fcbc2b2 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -185,28 +185,19 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
struct r600_resource *rdst = r600_resource(dst);
unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
unsigned flush_flags = get_flush_flags(sctx, coher);
+   uint64_t dma_clear_size;
bool is_first = true;
 
if (!size)
return;
 
+   dma_clear_size = size & ~3llu;
+
/* Mark the buffer range of destination as valid (initialized),
 * so that transfer_map knows it should wait for the GPU when mapping
 * that range. */
util_range_add(>valid_buffer_range, offset,
-  offset + size);
-
-   /* Fallback for unaligned clears. */
-   if (size % 4 != 0) {
-   uint8_t *map = r600_buffer_map_sync_with_rings(>b, rdst,
-  
PIPE_TRANSFER_WRITE);
-   map += offset;
-   for (uint64_t i = 0; i < size; i++) {
-   unsigned byte_within_dword = (offset + i) % 4;
-   *map++ = (value >> (byte_within_dword * 8)) & 0xff;
-   }
-   return;
-   }
+  offset + dma_clear_size);
 
/* dma_clear_buffer can use clear_buffer on failure. Make sure that
 * doesn't happen. We don't want an infinite recursion: */
@@ -223,25 +214,31 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
  * of them are moved to SDMA thanks to this. */
 !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf,
  RADEON_USAGE_READWRITE))) {
-   sctx->b.dma_clear_buffer(ctx, dst, offset, size, value);
-   } else {
+   sctx->b.dma_clear_buffer(ctx, dst, offset, dma_clear_size, 
value);
+
+   offset += dma_clear_size;
+   size -= dma_clear_size;
+   } else if (dma_clear_size >= 4) {
uint64_t va = rdst->gpu_address + offset;
 
+   offset += dma_clear_size;
+   size -= dma_clear_size;
+
/* Flush the caches. */
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
 
-   while (size) {
-   unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+   while (dma_clear_size) {
+   unsigned byte_count = MIN2(dma_clear_size, 
CP_DMA_MAX_BYTE_COUNT);
unsigned dma_flags = tc_l2_flag  | CP_DMA_CLEAR;
 
-   si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0,
+   si_cp_dma_prepare(sctx, dst, NULL, byte_count, 
dma_clear_size, 0,
  _first, _flags);
 
/* Emit the clear packet. */
si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, 
coher);
 
-   size -= byte_count;
+   dma_clear_size -= byte_count;
va += byte_count;
}
 
@@ -252,6 +249,17 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
if (coher == R600_COHERENCY_SHADER)
sctx->b.num_cp_dma_calls++;
}
+
+   if (size) {
+   /* Handle non-dword alignment.
+*
+* This function is called for embedded texture metadata clears,
+* but those should always be properly aligned. */
+   assert(dst->target == PIPE_BUFFER);
+   assert(size < 4);
+
+   pipe_buffer_write(ctx, dst, offset, size, );
+   }
 }
 
 /**

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radeonsi: remove the early-out for SDMA in si_clear_buffer

2017-03-28 Thread Nicolai Hähnle
Module: Mesa
Branch: master
Commit: d9014952f5ca10a5292df3bb8c4bf1b7ccaed240
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9014952f5ca10a5292df3bb8c4bf1b7ccaed240

Author: Nicolai Hähnle 
Date:   Mon Feb 13 12:51:36 2017 +0100

radeonsi: remove the early-out for SDMA in si_clear_buffer

This allows the next patches to be simple while still being able
to make use of SDMA even in some unusual cases.

Reviewed-by: Marek Olšák 

---

 src/gallium/drivers/radeonsi/si_cp_dma.c | 43 
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 1be7586d16..b40f5cc587 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -223,35 +223,34 @@ static void si_clear_buffer(struct pipe_context *ctx, 
struct pipe_resource *dst,
 !ws->cs_is_buffer_referenced(sctx->b.gfx.cs, rdst->buf,
  RADEON_USAGE_READWRITE))) {
sctx->b.dma_clear_buffer(ctx, dst, offset, size, value);
-   return;
-   }
-
-   uint64_t va = rdst->gpu_address + offset;
+   } else {
+   uint64_t va = rdst->gpu_address + offset;
 
-   /* Flush the caches. */
-   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
+   /* Flush the caches. */
+   sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
 
-   while (size) {
-   unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-   unsigned dma_flags = tc_l2_flag  | CP_DMA_CLEAR;
+   while (size) {
+   unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+   unsigned dma_flags = tc_l2_flag  | CP_DMA_CLEAR;
 
-   si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0,
- _first, _flags);
+   si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0,
+ _first, _flags);
 
-   /* Emit the clear packet. */
-   si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher);
+   /* Emit the clear packet. */
+   si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, 
coher);
 
-   size -= byte_count;
-   va += byte_count;
-   }
+   size -= byte_count;
+   va += byte_count;
+   }
 
-   if (tc_l2_flag)
-   rdst->TC_L2_dirty = true;
+   if (tc_l2_flag)
+   rdst->TC_L2_dirty = true;
 
-   /* If it's not a framebuffer fast clear... */
-   if (coher == R600_COHERENCY_SHADER)
-   sctx->b.num_cp_dma_calls++;
+   /* If it's not a framebuffer fast clear... */
+   if (coher == R600_COHERENCY_SHADER)
+   sctx->b.num_cp_dma_calls++;
+   }
 }
 
 /**

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: rework vertex/export shader output handling

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 931a8d0c9a15df462f14ab40f9ae31c8ecf75376
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=931a8d0c9a15df462f14ab40f9ae31c8ecf75376

Author: Dave Airlie 
Date:   Tue Mar 28 06:13:09 2017 +1000

radv: rework vertex/export shader output handling

In order to faciliate adding tess support, split the vs/es
output info into a separate block, so we make it easier to
have the tess shaders export the same info.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/common/ac_nir_to_llvm.c  | 54 +
 src/amd/common/ac_nir_to_llvm.h  | 30 ---
 src/amd/vulkan/radv_cmd_buffer.c | 64 ++--
 src/amd/vulkan/radv_pipeline.c   |  8 ++---
 4 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6e36c192c3..cfbdeae1a3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context 
*ctx,
int length = glsl_get_length(variable->type);
if (idx == VARYING_SLOT_CLIP_DIST0) {
if (ctx->stage == MESA_SHADER_VERTEX)
-   ctx->shader_info->vs.clip_dist_mask = 
(1 << length) - 1;
+   
ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1;
ctx->num_output_clips = length;
} else if (idx == VARYING_SLOT_CULL_DIST0) {
if (ctx->stage == MESA_SHADER_VERTEX)
-   ctx->shader_info->vs.cull_dist_mask = 
(1 << length) - 1;
+   
ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1;
ctx->num_output_culls = length;
}
if (length > 4)
@@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 }
 
 static void
-handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
+handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
+  struct ac_vs_output_info *outinfo)
 {
uint32_t param_count = 0;
unsigned target;
@@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
   (1ull << 
VARYING_SLOT_CULL_DIST0) |
   (1ull << 
VARYING_SLOT_CULL_DIST1));
 
-   ctx->shader_info->vs.prim_id_output = 0x;
-   ctx->shader_info->vs.layer_output = 0x;
+   outinfo->prim_id_output = 0x;
+   outinfo->layer_output = 0x;
if (clip_mask) {
LLVMValueRef slots[8];
unsigned j;
 
-   if (ctx->shader_info->vs.cull_dist_mask)
-   ctx->shader_info->vs.cull_dist_mask <<= 
ctx->num_output_clips;
+   if (outinfo->cull_dist_mask)
+   outinfo->cull_dist_mask <<= ctx->num_output_clips;
 
i = VARYING_SLOT_CLIP_DIST0;
for (j = 0; j < ctx->num_output_clips; j++)
@@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
   i == VARYING_SLOT_CULL_DIST1) {
continue;
} else if (i == VARYING_SLOT_PSIZ) {
-   ctx->shader_info->vs.writes_pointsize = true;
+   outinfo->writes_pointsize = true;
psize_value = values[0];
continue;
} else if (i == VARYING_SLOT_LAYER) {
-   ctx->shader_info->vs.writes_layer = true;
+   outinfo->writes_layer = true;
layer_value = values[0];
-   ctx->shader_info->vs.layer_output = param_count;
+   outinfo->layer_output = param_count;
target = V_008DFC_SQ_EXP_PARAM + param_count;
param_count++;
} else if (i == VARYING_SLOT_VIEWPORT) {
-   ctx->shader_info->vs.writes_viewport_index = true;
+   outinfo->writes_viewport_index = true;
viewport_index_value = values[0];
continue;
} else if (i == VARYING_SLOT_PRIMITIVE_ID) {
-   ctx->shader_info->vs.prim_id_output = param_count;
+   outinfo->prim_id_output = param_count;
target = V_008DFC_SQ_EXP_PARAM + param_count;
param_count++;
} else if (i >= VARYING_SLOT_VAR0) {
-   ctx->shader_info->vs.export_mask |= 1u << 

Mesa (master): radv: move shader_z_format calculation to pipeline.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 4b467c759ea1e9d5960a5e668a166f33ef03e9d6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b467c759ea1e9d5960a5e668a166f33ef03e9d6

Author: Dave Airlie 
Date:   Tue Mar 28 11:34:46 2017 +1000

radv: move shader_z_format calculation to pipeline.

No need to recalculate this every time.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 5 +
 src/amd/vulkan/radv_pipeline.c   | 6 ++
 src/amd/vulkan/radv_private.h| 1 +
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8e35dc5299..c3b141ea3a 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -709,10 +709,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, 
spi_baryc_cntl);
 
radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT,
-  ps->info.fs.writes_sample_mask ? 
V_028710_SPI_SHADER_32_ABGR :
-  ps->info.fs.writes_stencil ? 
V_028710_SPI_SHADER_32_GR :
-  ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
-  V_028710_SPI_SHADER_ZERO);
+  pipeline->graphics.shader_z_format);
 
radeon_set_context_reg(cmd_buffer->cs, R_028714_SPI_SHADER_COL_FORMAT, 
blend->spi_shader_col_format);
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 42e8abd84e..550b773e9a 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1666,6 +1666,12 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);
 
+   pipeline->graphics.shader_z_format =
+   ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
+   ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
+   ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
+   V_028710_SPI_SHADER_ZERO;
+
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) 
{
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 8e45e95b77..dff0aef832 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -956,6 +956,7 @@ struct radv_pipeline {
struct radv_raster_state raster;
struct radv_multisample_state ms;
uint32_t db_shader_control;
+   uint32_t shader_z_format;
unsigned prim;
unsigned gs_out;
uint32_t vgt_gs_mode;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: move db_shader_control calculation to pipeline.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 8996fdbf61e5341c321c802278ee388ac5001f50
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8996fdbf61e5341c321c802278ee388ac5001f50

Author: Dave Airlie 
Date:   Tue Mar 28 11:34:19 2017 +1000

radv: move db_shader_control calculation to pipeline.

There is no need to recalculate this every time.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 16 +---
 src/amd/vulkan/radv_pipeline.c   | 19 ++-
 src/amd/vulkan/radv_private.h|  1 +
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 195a82fef5..8e35dc5299 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -674,7 +674,6 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
struct radv_blend_state *blend = >graphics.blend;
unsigned ps_offset = 0;
-   unsigned z_order;
struct ac_vs_output_info *outinfo;
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
@@ -692,21 +691,8 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
radeon_emit(cmd_buffer->cs, ps->rsrc1);
radeon_emit(cmd_buffer->cs, ps->rsrc2);
 
-   if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
-   z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
-   else
-   z_order = V_02880C_LATE_Z;
-
-
radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL,
-  S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) |
-  
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) |
-  S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
-  
S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) |
-  S_02880C_Z_ORDER(z_order) |
-  
S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
-  
S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
-  
S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory));
+  pipeline->graphics.db_shader_control);
 
radeon_set_context_reg(cmd_buffer->cs, R_0286CC_SPI_PS_INPUT_ENA,
   ps->config.spi_ps_input_ena);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 752986a9c5..42e8abd84e 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1641,14 +1641,31 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 *
 * Don't add this to CB_SHADER_MASK.
 */
+   struct radv_shader_variant *ps = 
pipeline->shaders[MESA_SHADER_FRAGMENT];
if (!pipeline->graphics.blend.spi_shader_col_format) {
-   struct radv_shader_variant *ps = 
pipeline->shaders[MESA_SHADER_FRAGMENT];
if (!ps->info.fs.writes_z &&
!ps->info.fs.writes_stencil &&
!ps->info.fs.writes_sample_mask)
pipeline->graphics.blend.spi_shader_col_format = 
V_028714_SPI_SHADER_32_R;
}

+   unsigned z_order;
+   pipeline->graphics.db_shader_control = 0;
+   if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
+   z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
+   else
+   z_order = V_02880C_LATE_Z;
+
+   pipeline->graphics.db_shader_control =
+   S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) |
+   
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) |
+   S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
+   S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) |
+   S_02880C_Z_ORDER(z_order) |
+   S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
+   S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
+   S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);
+
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) 
{
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index dcd738a54f..8e45e95b77 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -955,6 +955,7 @@ struct radv_pipeline {
struct radv_depth_stencil_state ds;
struct radv_raster_state raster;
struct radv_multisample_state ms;
+   uint32_t db_shader_control;
unsigned prim;
 

Mesa (master): radv: move shader stages calculation to pipeline.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 239a9224a33d280cd5703c29ce6eb9df2eab9b3d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=239a9224a33d280cd5703c29ce6eb9df2eab9b3d

Author: Dave Airlie 
Date:   Tue Mar 28 12:59:17 2017 +1000

radv: move shader stages calculation to pipeline.

With tess this becomes a bit more complex. so move to pipeline
for now.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 9 +
 src/amd/vulkan/radv_pipeline.c   | 9 -
 src/amd/vulkan/radv_private.h| 1 +
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 04c28d6a29..e994df65fd 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1286,14 +1286,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer 
*cmd_buffer,
}
 
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) {
-   uint32_t stages = 0;
-
-   if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
-   stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
-   S_028B54_GS_EN(1) |
-   S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
-
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028B54_VGT_SHADER_STAGES_EN, stages);
+   radeon_set_context_reg(cmd_buffer->cs, 
R_028B54_VGT_SHADER_STAGES_EN, pipeline->graphics.vgt_shader_stages_en);
 
if (cmd_buffer->device->physical_device->rad_info.chip_class >= 
CIK) {
radeon_set_context_reg_idx(cmd_buffer->cs, 
R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 252808d7a7..07020e8c38 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1771,7 +1771,14 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 
calculate_pa_cl_vs_out_cntl(pipeline);
calculate_ps_inputs(pipeline);
-   
+
+   uint32_t stages = 0;
+   if (radv_pipeline_has_gs(pipeline))
+   stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+   S_028B54_GS_EN(1) |
+   S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+   pipeline->graphics.vgt_shader_stages_en = stages;
+
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) 
{
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 0b8c86df79..f587ee3ffd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -965,6 +965,7 @@ struct radv_pipeline {
uint32_t ps_input_cntl[32];
uint32_t ps_input_cntl_num;
uint32_t pa_cl_vs_out_cntl;
+   uint32_t vgt_shader_stages_en;
struct radv_prim_vertex_count prim_vertex_count;
} graphics;
};

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: move vgt_gs_mode value to pipeline.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: cd33a5c1cb68d8c7e67f4724cc19bb92a405c796
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd33a5c1cb68d8c7e67f4724cc19bb92a405c796

Author: Dave Airlie 
Date:   Tue Mar 28 11:33:35 2017 +1000

radv: move vgt_gs_mode value to pipeline.

No need to recalculate this everytime.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 29 +++--
 src/amd/vulkan/radv_pipeline.c   | 27 ++-
 src/amd/vulkan/radv_private.h|  1 +
 3 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index ce34204b8a..195a82fef5 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -599,27 +599,6 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
 }
 
-static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
-{
-   unsigned gs_max_vert_out = gs->info.gs.vertices_out;
-   unsigned cut_mode;
-
-   if (gs_max_vert_out <= 128) {
-   cut_mode = V_028A40_GS_CUT_128;
-   } else if (gs_max_vert_out <= 256) {
-   cut_mode = V_028A40_GS_CUT_256;
-   } else if (gs_max_vert_out <= 512) {
-   cut_mode = V_028A40_GS_CUT_512;
-   } else {
-   assert(gs_max_vert_out <= 1024);
-   cut_mode = V_028A40_GS_CUT_1024;
-   }
-
-   return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
-  S_028A40_CUT_MODE(cut_mode)|
-  S_028A40_ES_WRITE_OPTIMIZE(1) |
-  S_028A40_GS_WRITE_OPTIMIZE(1);
-}
 
 static void
 radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
@@ -629,13 +608,11 @@ radv_emit_geometry_shader(struct radv_cmd_buffer 
*cmd_buffer,
struct radv_shader_variant *gs;
uint64_t va;
 
+   radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 
pipeline->graphics.vgt_gs_mode);
+
gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
-   if (!gs) {
-   radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 0);
+   if (!gs)
return;
-   }
-
-   radeon_set_context_reg(cmd_buffer->cs, R_028A40_VGT_GS_MODE, 
si_vgt_gs_mode(gs));
 
uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2;
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 2c710f4eb8..752986a9c5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1505,6 +1505,28 @@ static const struct radv_prim_vertex_count 
prim_size_table[] = {
[V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
 };
 
+static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
+{
+   unsigned gs_max_vert_out = gs->info.gs.vertices_out;
+   unsigned cut_mode;
+
+   if (gs_max_vert_out <= 128) {
+   cut_mode = V_028A40_GS_CUT_128;
+   } else if (gs_max_vert_out <= 256) {
+   cut_mode = V_028A40_GS_CUT_256;
+   } else if (gs_max_vert_out <= 512) {
+   cut_mode = V_028A40_GS_CUT_512;
+   } else {
+   assert(gs_max_vert_out <= 1024);
+   cut_mode = V_028A40_GS_CUT_1024;
+   }
+
+   return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+  S_028A40_CUT_MODE(cut_mode)|
+  S_028A40_ES_WRITE_OPTIMIZE(1) |
+  S_028A40_GS_WRITE_OPTIMIZE(1);
+}
+
 VkResult
 radv_pipeline_init(struct radv_pipeline *pipeline,
   struct radv_device *device,
@@ -1559,7 +1581,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 
pipeline->active_stages |= 
mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
calculate_gs_ring_sizes(pipeline);
-   }
+
+   pipeline->graphics.vgt_gs_mode = 
si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
+   } else
+   pipeline->graphics.vgt_gs_mode = 0;
 
if (!modules[MESA_SHADER_FRAGMENT]) {
nir_builder fs_b;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 433cba7d28..dcd738a54f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -957,6 +957,7 @@ struct radv_pipeline {
struct radv_multisample_state ms;
unsigned prim;
unsigned gs_out;
+   uint32_t vgt_gs_mode;
bool prim_restart_enable;
unsigned esgs_ring_size;
unsigned gsvs_ring_size;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: move calculating fragment shader i/os to pipeline.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 92e9c14a6a8d536404ef5b41217662bb2286d946
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=92e9c14a6a8d536404ef5b41217662bb2286d946

Author: Dave Airlie 
Date:   Tue Mar 28 11:43:48 2017 +1000

radv: move calculating fragment shader i/os to pipeline.

There is no need to calculate this on each command submit.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 67 +++--
 src/amd/vulkan/radv_pipeline.c   | 71 
 src/amd/vulkan/radv_private.h|  2 ++
 3 files changed, 77 insertions(+), 63 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index c3b141ea3a..92e68efa86 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -669,18 +669,13 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
  struct radv_pipeline *pipeline)
 {
struct radeon_winsys *ws = cmd_buffer->device->ws;
-   struct radv_shader_variant *ps, *vs;
+   struct radv_shader_variant *ps;
uint64_t va;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
struct radv_blend_state *blend = >graphics.blend;
-   unsigned ps_offset = 0;
-   struct ac_vs_output_info *outinfo;
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-   vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : 
pipeline->shaders[MESA_SHADER_VERTEX];
-
-   outinfo = >info.vs.outinfo;
 
va = ws->buffer_get_va(ps->bo);
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
@@ -716,63 +711,9 @@ radv_emit_fragment_shader(struct radv_cmd_buffer 
*cmd_buffer,
radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, 
blend->cb_target_mask);
radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, 
blend->cb_shader_mask);
 
-   if (ps->info.fs.has_pcoord) {
-   unsigned val;
-   val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
-   ps_offset++;
-   }
-
-   if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 
0x)) {
-   unsigned vs_offset, flat_shade;
-   unsigned val;
-   vs_offset = outinfo->prim_id_output;
-   flat_shade = true;
-   val = S_028644_OFFSET(vs_offset) | 
S_028644_FLAT_SHADE(flat_shade);
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
-   ++ps_offset;
-   }
-
-   if (ps->info.fs.layer_input && (outinfo->layer_output != 0x)) {
-   unsigned vs_offset, flat_shade;
-   unsigned val;
-   vs_offset = outinfo->layer_output;
-   flat_shade = true;
-   val = S_028644_OFFSET(vs_offset) | 
S_028644_FLAT_SHADE(flat_shade);
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
-   ++ps_offset;
-   }
-
-   for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; 
++i) {
-   unsigned vs_offset, flat_shade;
-   unsigned val;
-
-   if (!(ps->info.fs.input_mask & (1u << i)))
-   continue;
-
-
-   if (!(outinfo->export_mask & (1u << i))) {
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
-  S_028644_OFFSET(0x20));
-   ++ps_offset;
-   continue;
-   }
-
-   vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 
1));
-   if (outinfo->prim_id_output != 0x) {
-   if (vs_offset >= outinfo->prim_id_output)
-   vs_offset++;
-   }
-   if (outinfo->layer_output != 0x) {
-   if (vs_offset >= outinfo->layer_output)
- vs_offset++;
-   }
-   flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << 
ps_offset));
-
-   val = S_028644_OFFSET(vs_offset) | 
S_028644_FLAT_SHADE(flat_shade);
-   radeon_set_context_reg(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
-   ++ps_offset;
-   }
+   radeon_set_context_reg_seq(cmd_buffer->cs, 
R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num);
+   for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++)
+   radeon_emit(cmd_buffer->cs, 
pipeline->graphics.ps_input_cntl[i]);
 }
 
 static 

Mesa (master): radv: handle NULL multisample state.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: a8b8e542c2e9ea97413095993cee5ec8faf2ee16
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a8b8e542c2e9ea97413095993cee5ec8faf2ee16

Author: Dave Airlie 
Date:   Tue Mar 28 05:48:27 2017 +1000

radv: handle NULL multisample state.

If rasterization is disabled, we can get a NULL multisample
state.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_pipeline.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 45277b94fa..1becb65055 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1163,10 +1163,13 @@ radv_pipeline_init_multisample_state(struct 
radv_pipeline *pipeline,
int ps_iter_samples = 1;
uint32_t mask = 0x;
 
-   ms->num_samples = vkms->rasterizationSamples;
+   if (vkms)
+   ms->num_samples = vkms->rasterizationSamples;
+   else
+   ms->num_samples = 1;
 
if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) {
-   ps_iter_samples = vkms->rasterizationSamples;
+   ps_iter_samples = ms->num_samples;
}
 
ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
@@ -1184,8 +1187,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline 
*pipeline,
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
 
-   if (vkms->rasterizationSamples > 1) {
-   unsigned log_samples = 
util_logbase2(vkms->rasterizationSamples);
+   if (ms->num_samples > 1) {
+   unsigned log_samples = util_logbase2(ms->num_samples);
unsigned log_ps_iter_samples = 
util_logbase2(util_next_power_of_two(ps_iter_samples));
ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1);
ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* 
CM_R_028BDC_PA_SC_LINE_CNTL */
@@ -1199,11 +1202,12 @@ radv_pipeline_init_multisample_state(struct 
radv_pipeline *pipeline,
ms->pa_sc_mode_cntl_1 |= 
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
}
 
-   if (vkms->alphaToCoverageEnable)
-   blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+   if (vkms) {
+   if (vkms->alphaToCoverageEnable)
+   blend->db_alpha_to_mask |= 
S_028B70_ALPHA_TO_MASK_ENABLE(1);
 
-   if (vkms->pSampleMask) {
-   mask = vkms->pSampleMask[0] & 0x;
+   if (vkms->pSampleMask)
+   mask = vkms->pSampleMask[0] & 0x;
}
 
ms->pa_sc_aa_mask[0] = mask | (mask << 16);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: fix ia_multi_vgt_param for instanced vs indirect draw.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: ae0551b4b3f7ca79148f0cb8384c0f1efc3faac2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae0551b4b3f7ca79148f0cb8384c0f1efc3faac2

Author: Dave Airlie 
Date:   Tue Mar 28 05:53:50 2017 +1000

radv: fix ia_multi_vgt_param for instanced vs indirect draw.

The logic was different than radeonsi, fix it up before adding
tess support.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 13 +++--
 src/amd/vulkan/radv_private.h|  3 ++-
 src/amd/vulkan/si_cmd_buffer.c   | 12 ++--
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 1b13ae7bc6..eb2a7b0dde 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1317,7 +1317,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 }
 
 static void
-radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool 
instanced_or_indirect_draw,
+radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
+   bool instanced_draw, bool indirect_draw,
uint32_t draw_vertex_count)
 {
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
@@ -1382,7 +1383,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer 
*cmd_buffer, bool instanced_o
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR))
radv_emit_scissor(cmd_buffer);
 
-   ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, 
instanced_or_indirect_draw, draw_vertex_count);
+   ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, 
instanced_draw, indirect_draw, draw_vertex_count);
if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
if (cmd_buffer->device->physical_device->rad_info.chip_class >= 
CIK)
radeon_set_context_reg_idx(cmd_buffer->cs, 
R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
@@ -2296,7 +2297,7 @@ void radv_CmdDraw(
 {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), 
vertexCount);
+   radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), false, 
vertexCount);
 
MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
 
@@ -2347,7 +2348,7 @@ void radv_CmdDrawIndexed(
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - 
cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
 
-   radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), 
indexCount);
+   radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), false, 
indexCount);
radv_emit_primitive_reset_index(cmd_buffer);
 
MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
@@ -2445,7 +2446,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer  
   command
  uint32_t
stride)
 {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-   radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
+   radv_cmd_buffer_flush_state(cmd_buffer, false, true, 0);
 
MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws,
   cmd_buffer->cs, 14);
@@ -2470,7 +2471,7 @@ radv_cmd_draw_indexed_indirect_count(
int index_size = cmd_buffer->state.index_type ? 4 : 2;
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - 
cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
-   radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
+   radv_cmd_buffer_flush_state(cmd_buffer, false, true, 0);
radv_emit_primitive_reset_index(cmd_buffer);
 
index_va = 
cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e4654bb4d4..433cba7d28 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -759,7 +759,8 @@ void si_write_viewport(struct radeon_winsys_cs *cs, int 
first_vp,
 void si_write_scissors(struct radeon_winsys_cs *cs, int first,
   int count, const VkRect2D *scissors);
 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
-  bool instanced_or_indirect_draw, uint32_t 
draw_vertex_count);
+  bool instanced_draw, bool indirect_draw,
+  uint32_t draw_vertex_count);
 void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 enum chip_class chip_class,
 bool is_mec,

Mesa (master): radv: add parameter to emit_waitcnt.

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: d43691ce775ed7bd525b5d195cc6e17b7c15574e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d43691ce775ed7bd525b5d195cc6e17b7c15574e

Author: Dave Airlie 
Date:   Tue Mar 28 08:46:35 2017 +1000

radv: add parameter to emit_waitcnt.

This is just a precursor for tess support, which needs to
pass different values here.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/common/ac_nir_to_llvm.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index cfbdeae1a3..5a25487a30 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2818,10 +2818,15 @@ static LLVMValueRef visit_image_size(struct 
nir_to_llvm_context *ctx,
return res;
 }
 
-static void emit_waitcnt(struct nir_to_llvm_context *ctx)
+#define NOOP_WAITCNT 0xf7f
+#define LGKM_CNT 0x07f
+#define VM_CNT 0xf70
+
+static void emit_waitcnt(struct nir_to_llvm_context *ctx,
+unsigned simm16)
 {
LLVMValueRef args[1] = {
-   LLVMConstInt(ctx->i32, 0xf70, false),
+   LLVMConstInt(ctx->i32, simm16, false),
};
ac_build_intrinsic(>ac, "llvm.amdgcn.s.waitcnt",
   ctx->voidt, args, 1, 0);
@@ -3297,7 +3302,7 @@ static void visit_intrinsic(struct nir_to_llvm_context 
*ctx,
emit_discard_if(ctx, instr);
break;
case nir_intrinsic_memory_barrier:
-   emit_waitcnt(ctx);
+   emit_waitcnt(ctx, VM_CNT);
break;
case nir_intrinsic_barrier:
emit_barrier(ctx);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: move pa_cl_vs_out_cntl calculation to pipeline

2017-03-28 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 0232ea8025d3da65295c0af1b8f4ca8fc97a74dd
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0232ea8025d3da65295c0af1b8f4ca8fc97a74dd

Author: Dave Airlie 
Date:   Tue Mar 28 11:48:38 2017 +1000

radv: move pa_cl_vs_out_cntl calculation to pipeline

This also takes the side band setting code from radeonsi.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 16 +---
 src/amd/vulkan/radv_pipeline.c   | 31 ++-
 src/amd/vulkan/radv_private.h|  2 +-
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 92e68efa86..04c28d6a29 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -540,23 +540,9 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
   S_028818_VPORT_Y_SCALE_ENA(1) | 
S_028818_VPORT_Y_OFFSET_ENA(1) |
   S_028818_VPORT_Z_SCALE_ENA(1) | 
S_028818_VPORT_Z_OFFSET_ENA(1));
 
-   unsigned clip_dist_mask, cull_dist_mask, total_mask;
-   clip_dist_mask = outinfo->clip_dist_mask;
-   cull_dist_mask = outinfo->cull_dist_mask;
-   total_mask = clip_dist_mask | cull_dist_mask;
 
radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
-  
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
-  
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
-  
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
-  
S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize ||
-   
outinfo->writes_layer ||
-   
outinfo->writes_viewport_index) |
-  S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 
0x0f) != 0) |
-  S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 
0xf0) != 0) |
-  pipeline->graphics.raster.pa_cl_vs_out_cntl |
-  cull_dist_mask << 8 |
-  clip_dist_mask);
+  pipeline->graphics.pa_cl_vs_out_cntl);
 
radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
   
S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c7d74805a2..252808d7a7 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1126,7 +1126,7 @@ radv_pipeline_init_raster_state(struct radv_pipeline 
*pipeline,
S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
S_0286D4_PNT_SPRITE_TOP_1(0); // vulkan is top to bottom - 1.0 
at bottom
 
-   raster->pa_cl_vs_out_cntl = S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1);
+
raster->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) |
S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) 
|
@@ -1527,6 +1527,33 @@ static uint32_t si_vgt_gs_mode(struct 
radv_shader_variant *gs)
   S_028A40_GS_WRITE_OPTIMIZE(1);
 }
 
+static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
+{
+   struct radv_shader_variant *vs;
+   vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : 
pipeline->shaders[MESA_SHADER_VERTEX];
+
+   struct ac_vs_output_info *outinfo = >info.vs.outinfo;
+
+   unsigned clip_dist_mask, cull_dist_mask, total_mask;
+   clip_dist_mask = outinfo->clip_dist_mask;
+   cull_dist_mask = outinfo->cull_dist_mask;
+   total_mask = clip_dist_mask | cull_dist_mask;
+
+   bool misc_vec_ena = outinfo->writes_pointsize ||
+   outinfo->writes_layer ||
+   outinfo->writes_viewport_index;
+   pipeline->graphics.pa_cl_vs_out_cntl =
+   S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+   S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+   S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+   S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+   S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+   S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+   S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+   cull_dist_mask << 8 |
+   clip_dist_mask;
+
+}
 static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 {
struct radv_shader_variant *ps, *vs;
@@ -1742,7 +1769,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
ps->info.fs.writes_z ?