Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Thu, Feb 09, 2017 at 11:38:31AM -0800, Jason Ekstrand wrote: > On Thu, Feb 9, 2017 at 10:33 AM, Nanley Cherywrote: > > > On Wed, Feb 08, 2017 at 06:27:52PM -0800, Jason Ekstrand wrote: > > > On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery > > wrote: > > > > > > > On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > > > > > In order to get good performance numbers for this, I had to hack up > > the > > > > > driver to whack wm_prog_data::uses_kill to true to emulate a discard > > and > > > > > used the Sascha "shadowmapping" demo. Setting uses_kill to true > > dropped > > > > > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > > > > > back up to around 90% of the original framerate. This doesn't seem > > to > > > > > really impact Dota 2; probably because it doesn't use 16-bit depth. > > > > > > > > > > Reviewed-by: Lionel Landwerlin > > > > > --- > > > > > src/intel/vulkan/TODO | 1 - > > > > > src/intel/vulkan/anv_cmd_buffer.c | 2 + > > > > > src/intel/vulkan/anv_genX.h| 3 + > > > > > src/intel/vulkan/anv_private.h | 17 + > > > > > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > > > > > src/intel/vulkan/gen8_cmd_buffer.c | 133 > > ++ > > > > +++ > > > > > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > > > > > src/intel/vulkan/genX_cmd_buffer.c | 15 - > > > > > src/intel/vulkan/genX_pipeline.c | 38 +++ > > > > > 9 files changed, 219 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > > > > > index 38acc0d..f8b73a1 100644 > > > > > --- a/src/intel/vulkan/TODO > > > > > +++ b/src/intel/vulkan/TODO > > > > > @@ -12,5 +12,4 @@ Performance: > > > > > - Compressed multisample support > > > > > - Pushing pieces of UBOs? > > > > > - Enable guardband clipping > > > > > - - pma stall workaround > > > > > - Use soft-pin to avoid relocations > > > > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > > > > b/src/intel/vulkan/anv_cmd_buffer.c > > > > > index 5886fa6..8c08f8d 100644 > > > > > --- a/src/intel/vulkan/anv_cmd_buffer.c > > > > > +++ b/src/intel/vulkan/anv_cmd_buffer.c > > > > > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer > > > > *cmd_buffer) > > > > > state->restart_index = UINT32_MAX; > > > > > state->dynamic = default_dynamic_state; > > > > > state->need_query_wa = true; > > > > > + state->pma_fix_enabled = false; > > > > > + state->hiz_enabled = false; > > > > > > > > > > if (state->attachments != NULL) { > > > > >vk_free(_buffer->pool->alloc, state->attachments); > > > > > diff --git a/src/intel/vulkan/anv_genX.h > > b/src/intel/vulkan/anv_genX.h > > > > > index d04fe38..67147b0 100644 > > > > > --- a/src/intel/vulkan/anv_genX.h > > > > > +++ b/src/intel/vulkan/anv_genX.h > > > > > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > > > > anv_cmd_buffer *cmd_buffer); > > > > > > > > > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > > > > *cmd_buffer); > > > > > > > > > > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer > > *cmd_buffer, > > > > > + bool enable); > > > > > + > > > > > void > > > > > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch > > *batch, > > > > > const struct gen_l3_config *l3_config, > > > > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > > > > private.h > > > > > index 4fe3ebc..6efe4ea 100644 > > > > > --- a/src/intel/vulkan/anv_private.h > > > > > +++ b/src/intel/vulkan/anv_private.h > > > > > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > > > > > bool need_query_wa; > > > > > > > > > > /** > > > > > +* Whether or not the gen8 PMA fix is enabled. We ensure that, > > at > > > > the top > > > > > +* of any command buffer it disabled by disabling it in > > > > EndCommandBuffer > > > > ^ > > > > is? > > > > > > > > Fixed? > > > > done > > > > > > > +* and before invoking the secondary in ExecuteCommands. > > > > > +*/ > > > > > + bool pma_fix_enabled; > > > > > + > > > > > + /** > > > > > +* Whether or not we now for certain that HiZ is enabled for the > > > > current > > > >^ > > > >know > > > > > > > > Fixed? > > > > done > > > > > > > +* subpass. If, for whatever reason, we are unsure as to whether > > > > HiZ is > > > > > +* enabled or not, this will be false. > > > > > +*/ > > > > > + bool hiz_enabled; > > > > > + > > > > > + /** > > > > > * Array length is anv_cmd_state::pass::attachment_count. Array > > > > content
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Thu, Feb 9, 2017 at 10:33 AM, Nanley Cherywrote: > On Wed, Feb 08, 2017 at 06:27:52PM -0800, Jason Ekstrand wrote: > > On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery > wrote: > > > > > On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > > > > In order to get good performance numbers for this, I had to hack up > the > > > > driver to whack wm_prog_data::uses_kill to true to emulate a discard > and > > > > used the Sascha "shadowmapping" demo. Setting uses_kill to true > dropped > > > > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > > > > back up to around 90% of the original framerate. This doesn't seem > to > > > > really impact Dota 2; probably because it doesn't use 16-bit depth. > > > > > > > > Reviewed-by: Lionel Landwerlin > > > > --- > > > > src/intel/vulkan/TODO | 1 - > > > > src/intel/vulkan/anv_cmd_buffer.c | 2 + > > > > src/intel/vulkan/anv_genX.h| 3 + > > > > src/intel/vulkan/anv_private.h | 17 + > > > > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > > > > src/intel/vulkan/gen8_cmd_buffer.c | 133 > ++ > > > +++ > > > > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > > > > src/intel/vulkan/genX_cmd_buffer.c | 15 - > > > > src/intel/vulkan/genX_pipeline.c | 38 +++ > > > > 9 files changed, 219 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > > > > index 38acc0d..f8b73a1 100644 > > > > --- a/src/intel/vulkan/TODO > > > > +++ b/src/intel/vulkan/TODO > > > > @@ -12,5 +12,4 @@ Performance: > > > > - Compressed multisample support > > > > - Pushing pieces of UBOs? > > > > - Enable guardband clipping > > > > - - pma stall workaround > > > > - Use soft-pin to avoid relocations > > > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > > > b/src/intel/vulkan/anv_cmd_buffer.c > > > > index 5886fa6..8c08f8d 100644 > > > > --- a/src/intel/vulkan/anv_cmd_buffer.c > > > > +++ b/src/intel/vulkan/anv_cmd_buffer.c > > > > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer > > > *cmd_buffer) > > > > state->restart_index = UINT32_MAX; > > > > state->dynamic = default_dynamic_state; > > > > state->need_query_wa = true; > > > > + state->pma_fix_enabled = false; > > > > + state->hiz_enabled = false; > > > > > > > > if (state->attachments != NULL) { > > > >vk_free(_buffer->pool->alloc, state->attachments); > > > > diff --git a/src/intel/vulkan/anv_genX.h > b/src/intel/vulkan/anv_genX.h > > > > index d04fe38..67147b0 100644 > > > > --- a/src/intel/vulkan/anv_genX.h > > > > +++ b/src/intel/vulkan/anv_genX.h > > > > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > > > anv_cmd_buffer *cmd_buffer); > > > > > > > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > > > *cmd_buffer); > > > > > > > > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer > *cmd_buffer, > > > > + bool enable); > > > > + > > > > void > > > > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch > *batch, > > > > const struct gen_l3_config *l3_config, > > > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > > > private.h > > > > index 4fe3ebc..6efe4ea 100644 > > > > --- a/src/intel/vulkan/anv_private.h > > > > +++ b/src/intel/vulkan/anv_private.h > > > > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > > > > bool need_query_wa; > > > > > > > > /** > > > > +* Whether or not the gen8 PMA fix is enabled. We ensure that, > at > > > the top > > > > +* of any command buffer it disabled by disabling it in > > > EndCommandBuffer > > > ^ > > > is? > > > > > Fixed? > done > > > > +* and before invoking the secondary in ExecuteCommands. > > > > +*/ > > > > + bool pma_fix_enabled; > > > > + > > > > + /** > > > > +* Whether or not we now for certain that HiZ is enabled for the > > > current > > >^ > > >know > > > > > Fixed? > done > > > > +* subpass. If, for whatever reason, we are unsure as to whether > > > HiZ is > > > > +* enabled or not, this will be false. > > > > +*/ > > > > + bool hiz_enabled; > > > > + > > > > + /** > > > > * Array length is anv_cmd_state::pass::attachment_count. Array > > > content is > > > > * valid only when recording a render pass instance. > > > > */ > > > > @@ -1465,8 +1479,11 @@ struct anv_pipeline { > > > > > > > > uint32_t cs_right_mask; > > > > > > > > + bool writes_depth; > > > > + bool
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Wed, Feb 08, 2017 at 09:38:52PM -0800, Jason Ekstrand wrote: > On Wed, Feb 8, 2017 at 8:11 PM, Jason Ekstrandwrote: > > > On Wed, Feb 8, 2017 at 6:27 PM, Jason Ekstrand > > wrote: > > > >> On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery > >> wrote: > >> > >>> On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > >>> > In order to get good performance numbers for this, I had to hack up the > >>> > driver to whack wm_prog_data::uses_kill to true to emulate a discard > >>> and > >>> > used the Sascha "shadowmapping" demo. Setting uses_kill to true > >>> dropped > >>> > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > >>> > back up to around 90% of the original framerate. This doesn't seem to > >>> > really impact Dota 2; probably because it doesn't use 16-bit depth. > >>> > > >>> > Reviewed-by: Lionel Landwerlin > >>> > --- > >>> > src/intel/vulkan/TODO | 1 - > >>> > src/intel/vulkan/anv_cmd_buffer.c | 2 + > >>> > src/intel/vulkan/anv_genX.h| 3 + > >>> > src/intel/vulkan/anv_private.h | 17 + > >>> > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > >>> > src/intel/vulkan/gen8_cmd_buffer.c | 133 > >>> + > >>> > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > >>> > src/intel/vulkan/genX_cmd_buffer.c | 15 - > >>> > src/intel/vulkan/genX_pipeline.c | 38 +++ > >>> > 9 files changed, 219 insertions(+), 2 deletions(-) > >>> > > >>> > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > >>> > index 38acc0d..f8b73a1 100644 > >>> > --- a/src/intel/vulkan/TODO > >>> > +++ b/src/intel/vulkan/TODO > >>> > @@ -12,5 +12,4 @@ Performance: > >>> > - Compressed multisample support > >>> > - Pushing pieces of UBOs? > >>> > - Enable guardband clipping > >>> > - - pma stall workaround > >>> > - Use soft-pin to avoid relocations > >>> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > >>> b/src/intel/vulkan/anv_cmd_buffer.c > >>> > index 5886fa6..8c08f8d 100644 > >>> > --- a/src/intel/vulkan/anv_cmd_buffer.c > >>> > +++ b/src/intel/vulkan/anv_cmd_buffer.c > >>> > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer > >>> *cmd_buffer) > >>> > state->restart_index = UINT32_MAX; > >>> > state->dynamic = default_dynamic_state; > >>> > state->need_query_wa = true; > >>> > + state->pma_fix_enabled = false; > >>> > + state->hiz_enabled = false; > >>> > > >>> > if (state->attachments != NULL) { > >>> >vk_free(_buffer->pool->alloc, state->attachments); > >>> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > >>> > index d04fe38..67147b0 100644 > >>> > --- a/src/intel/vulkan/anv_genX.h > >>> > +++ b/src/intel/vulkan/anv_genX.h > >>> > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > >>> anv_cmd_buffer *cmd_buffer); > >>> > > >>> > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > >>> *cmd_buffer); > >>> > > >>> > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer > >>> *cmd_buffer, > >>> > + bool enable); > >>> > + > >>> > void > >>> > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch > >>> *batch, > >>> > const struct gen_l3_config *l3_config, > >>> > diff --git a/src/intel/vulkan/anv_private.h > >>> b/src/intel/vulkan/anv_private.h > >>> > index 4fe3ebc..6efe4ea 100644 > >>> > --- a/src/intel/vulkan/anv_private.h > >>> > +++ b/src/intel/vulkan/anv_private.h > >>> > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > >>> > bool need_query_wa; > >>> > > >>> > /** > >>> > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at > >>> the top > >>> > +* of any command buffer it disabled by disabling it in > >>> EndCommandBuffer > >>> ^ > >>> is? > >>> > >>> > +* and before invoking the secondary in ExecuteCommands. > >>> > +*/ > >>> > + bool pma_fix_enabled; > >>> > + > >>> > + /** > >>> > +* Whether or not we now for certain that HiZ is enabled for the > >>> current > >>>^ > >>>know > >>> > >>> > +* subpass. If, for whatever reason, we are unsure as to whether > >>> HiZ is > >>> > +* enabled or not, this will be false. > >>> > +*/ > >>> > + bool hiz_enabled; > >>> > + > >>> > + /** > >>> > * Array length is anv_cmd_state::pass::attachment_count. Array > >>> content is > >>> > * valid only when recording a render pass instance. > >>> > */ > >>> > @@ -1465,8 +1479,11 @@ struct anv_pipeline { > >>> > > >>> > uint32_t cs_right_mask; > >>> > > >>> > + bool
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Wed, Feb 08, 2017 at 06:27:52PM -0800, Jason Ekstrand wrote: > On Wed, Feb 8, 2017 at 5:34 PM, Nanley Cherywrote: > > > On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > > > In order to get good performance numbers for this, I had to hack up the > > > driver to whack wm_prog_data::uses_kill to true to emulate a discard and > > > used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped > > > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > > > back up to around 90% of the original framerate. This doesn't seem to > > > really impact Dota 2; probably because it doesn't use 16-bit depth. > > > > > > Reviewed-by: Lionel Landwerlin > > > --- > > > src/intel/vulkan/TODO | 1 - > > > src/intel/vulkan/anv_cmd_buffer.c | 2 + > > > src/intel/vulkan/anv_genX.h| 3 + > > > src/intel/vulkan/anv_private.h | 17 + > > > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > > > src/intel/vulkan/gen8_cmd_buffer.c | 133 ++ > > +++ > > > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > > > src/intel/vulkan/genX_cmd_buffer.c | 15 - > > > src/intel/vulkan/genX_pipeline.c | 38 +++ > > > 9 files changed, 219 insertions(+), 2 deletions(-) > > > > > > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > > > index 38acc0d..f8b73a1 100644 > > > --- a/src/intel/vulkan/TODO > > > +++ b/src/intel/vulkan/TODO > > > @@ -12,5 +12,4 @@ Performance: > > > - Compressed multisample support > > > - Pushing pieces of UBOs? > > > - Enable guardband clipping > > > - - pma stall workaround > > > - Use soft-pin to avoid relocations > > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > > b/src/intel/vulkan/anv_cmd_buffer.c > > > index 5886fa6..8c08f8d 100644 > > > --- a/src/intel/vulkan/anv_cmd_buffer.c > > > +++ b/src/intel/vulkan/anv_cmd_buffer.c > > > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer > > *cmd_buffer) > > > state->restart_index = UINT32_MAX; > > > state->dynamic = default_dynamic_state; > > > state->need_query_wa = true; > > > + state->pma_fix_enabled = false; > > > + state->hiz_enabled = false; > > > > > > if (state->attachments != NULL) { > > >vk_free(_buffer->pool->alloc, state->attachments); > > > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > > > index d04fe38..67147b0 100644 > > > --- a/src/intel/vulkan/anv_genX.h > > > +++ b/src/intel/vulkan/anv_genX.h > > > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > > anv_cmd_buffer *cmd_buffer); > > > > > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > > *cmd_buffer); > > > > > > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, > > > + bool enable); > > > + > > > void > > > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, > > > const struct gen_l3_config *l3_config, > > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > > private.h > > > index 4fe3ebc..6efe4ea 100644 > > > --- a/src/intel/vulkan/anv_private.h > > > +++ b/src/intel/vulkan/anv_private.h > > > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > > > bool need_query_wa; > > > > > > /** > > > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at > > the top > > > +* of any command buffer it disabled by disabling it in > > EndCommandBuffer > > ^ > > is? > > Fixed? > > > +* and before invoking the secondary in ExecuteCommands. > > > +*/ > > > + bool pma_fix_enabled; > > > + > > > + /** > > > +* Whether or not we now for certain that HiZ is enabled for the > > current > >^ > >know > > Fixed? > > > +* subpass. If, for whatever reason, we are unsure as to whether > > HiZ is > > > +* enabled or not, this will be false. > > > +*/ > > > + bool hiz_enabled; > > > + > > > + /** > > > * Array length is anv_cmd_state::pass::attachment_count. Array > > content is > > > * valid only when recording a render pass instance. > > > */ > > > @@ -1465,8 +1479,11 @@ struct anv_pipeline { > > > > > > uint32_t cs_right_mask; > > > > > > + bool writes_depth; > > > + bool depth_test_enable; > > > bool writes_stencil; > > > bool depth_clamp_enable; > > > + bool kill_pixel; > > > > > > struct { > > >uint32_t
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Wed, Feb 8, 2017 at 8:11 PM, Jason Ekstrandwrote: > On Wed, Feb 8, 2017 at 6:27 PM, Jason Ekstrand > wrote: > >> On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery >> wrote: >> >>> On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: >>> > In order to get good performance numbers for this, I had to hack up the >>> > driver to whack wm_prog_data::uses_kill to true to emulate a discard >>> and >>> > used the Sascha "shadowmapping" demo. Setting uses_kill to true >>> dropped >>> > the framerate on the demo by 25-30%. Enabling the PMA fix brought it >>> > back up to around 90% of the original framerate. This doesn't seem to >>> > really impact Dota 2; probably because it doesn't use 16-bit depth. >>> > >>> > Reviewed-by: Lionel Landwerlin >>> > --- >>> > src/intel/vulkan/TODO | 1 - >>> > src/intel/vulkan/anv_cmd_buffer.c | 2 + >>> > src/intel/vulkan/anv_genX.h| 3 + >>> > src/intel/vulkan/anv_private.h | 17 + >>> > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ >>> > src/intel/vulkan/gen8_cmd_buffer.c | 133 >>> + >>> > src/intel/vulkan/genX_blorp_exec.c | 5 ++ >>> > src/intel/vulkan/genX_cmd_buffer.c | 15 - >>> > src/intel/vulkan/genX_pipeline.c | 38 +++ >>> > 9 files changed, 219 insertions(+), 2 deletions(-) >>> > >>> > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO >>> > index 38acc0d..f8b73a1 100644 >>> > --- a/src/intel/vulkan/TODO >>> > +++ b/src/intel/vulkan/TODO >>> > @@ -12,5 +12,4 @@ Performance: >>> > - Compressed multisample support >>> > - Pushing pieces of UBOs? >>> > - Enable guardband clipping >>> > - - pma stall workaround >>> > - Use soft-pin to avoid relocations >>> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c >>> b/src/intel/vulkan/anv_cmd_buffer.c >>> > index 5886fa6..8c08f8d 100644 >>> > --- a/src/intel/vulkan/anv_cmd_buffer.c >>> > +++ b/src/intel/vulkan/anv_cmd_buffer.c >>> > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer >>> *cmd_buffer) >>> > state->restart_index = UINT32_MAX; >>> > state->dynamic = default_dynamic_state; >>> > state->need_query_wa = true; >>> > + state->pma_fix_enabled = false; >>> > + state->hiz_enabled = false; >>> > >>> > if (state->attachments != NULL) { >>> >vk_free(_buffer->pool->alloc, state->attachments); >>> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h >>> > index d04fe38..67147b0 100644 >>> > --- a/src/intel/vulkan/anv_genX.h >>> > +++ b/src/intel/vulkan/anv_genX.h >>> > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct >>> anv_cmd_buffer *cmd_buffer); >>> > >>> > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer >>> *cmd_buffer); >>> > >>> > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer >>> *cmd_buffer, >>> > + bool enable); >>> > + >>> > void >>> > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch >>> *batch, >>> > const struct gen_l3_config *l3_config, >>> > diff --git a/src/intel/vulkan/anv_private.h >>> b/src/intel/vulkan/anv_private.h >>> > index 4fe3ebc..6efe4ea 100644 >>> > --- a/src/intel/vulkan/anv_private.h >>> > +++ b/src/intel/vulkan/anv_private.h >>> > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { >>> > bool need_query_wa; >>> > >>> > /** >>> > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at >>> the top >>> > +* of any command buffer it disabled by disabling it in >>> EndCommandBuffer >>> ^ >>> is? >>> >>> > +* and before invoking the secondary in ExecuteCommands. >>> > +*/ >>> > + bool pma_fix_enabled; >>> > + >>> > + /** >>> > +* Whether or not we now for certain that HiZ is enabled for the >>> current >>>^ >>>know >>> >>> > +* subpass. If, for whatever reason, we are unsure as to whether >>> HiZ is >>> > +* enabled or not, this will be false. >>> > +*/ >>> > + bool hiz_enabled; >>> > + >>> > + /** >>> > * Array length is anv_cmd_state::pass::attachment_count. Array >>> content is >>> > * valid only when recording a render pass instance. >>> > */ >>> > @@ -1465,8 +1479,11 @@ struct anv_pipeline { >>> > >>> > uint32_t cs_right_mask; >>> > >>> > + bool writes_depth; >>> > + bool depth_test_enable; >>> > bool writes_stencil; >>> > bool depth_clamp_enable; >>> > + bool
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Wed, Feb 8, 2017 at 6:27 PM, Jason Ekstrandwrote: > On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery > wrote: > >> On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: >> > In order to get good performance numbers for this, I had to hack up the >> > driver to whack wm_prog_data::uses_kill to true to emulate a discard and >> > used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped >> > the framerate on the demo by 25-30%. Enabling the PMA fix brought it >> > back up to around 90% of the original framerate. This doesn't seem to >> > really impact Dota 2; probably because it doesn't use 16-bit depth. >> > >> > Reviewed-by: Lionel Landwerlin >> > --- >> > src/intel/vulkan/TODO | 1 - >> > src/intel/vulkan/anv_cmd_buffer.c | 2 + >> > src/intel/vulkan/anv_genX.h| 3 + >> > src/intel/vulkan/anv_private.h | 17 + >> > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ >> > src/intel/vulkan/gen8_cmd_buffer.c | 133 >> + >> > src/intel/vulkan/genX_blorp_exec.c | 5 ++ >> > src/intel/vulkan/genX_cmd_buffer.c | 15 - >> > src/intel/vulkan/genX_pipeline.c | 38 +++ >> > 9 files changed, 219 insertions(+), 2 deletions(-) >> > >> > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO >> > index 38acc0d..f8b73a1 100644 >> > --- a/src/intel/vulkan/TODO >> > +++ b/src/intel/vulkan/TODO >> > @@ -12,5 +12,4 @@ Performance: >> > - Compressed multisample support >> > - Pushing pieces of UBOs? >> > - Enable guardband clipping >> > - - pma stall workaround >> > - Use soft-pin to avoid relocations >> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c >> b/src/intel/vulkan/anv_cmd_buffer.c >> > index 5886fa6..8c08f8d 100644 >> > --- a/src/intel/vulkan/anv_cmd_buffer.c >> > +++ b/src/intel/vulkan/anv_cmd_buffer.c >> > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer >> *cmd_buffer) >> > state->restart_index = UINT32_MAX; >> > state->dynamic = default_dynamic_state; >> > state->need_query_wa = true; >> > + state->pma_fix_enabled = false; >> > + state->hiz_enabled = false; >> > >> > if (state->attachments != NULL) { >> >vk_free(_buffer->pool->alloc, state->attachments); >> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h >> > index d04fe38..67147b0 100644 >> > --- a/src/intel/vulkan/anv_genX.h >> > +++ b/src/intel/vulkan/anv_genX.h >> > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct >> anv_cmd_buffer *cmd_buffer); >> > >> > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer >> *cmd_buffer); >> > >> > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer >> *cmd_buffer, >> > + bool enable); >> > + >> > void >> > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch >> *batch, >> > const struct gen_l3_config *l3_config, >> > diff --git a/src/intel/vulkan/anv_private.h >> b/src/intel/vulkan/anv_private.h >> > index 4fe3ebc..6efe4ea 100644 >> > --- a/src/intel/vulkan/anv_private.h >> > +++ b/src/intel/vulkan/anv_private.h >> > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { >> > bool need_query_wa; >> > >> > /** >> > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at >> the top >> > +* of any command buffer it disabled by disabling it in >> EndCommandBuffer >> ^ >> is? >> >> > +* and before invoking the secondary in ExecuteCommands. >> > +*/ >> > + bool pma_fix_enabled; >> > + >> > + /** >> > +* Whether or not we now for certain that HiZ is enabled for the >> current >>^ >>know >> >> > +* subpass. If, for whatever reason, we are unsure as to whether >> HiZ is >> > +* enabled or not, this will be false. >> > +*/ >> > + bool hiz_enabled; >> > + >> > + /** >> > * Array length is anv_cmd_state::pass::attachment_count. Array >> content is >> > * valid only when recording a render pass instance. >> > */ >> > @@ -1465,8 +1479,11 @@ struct anv_pipeline { >> > >> > uint32_t cs_right_mask; >> > >> > + bool writes_depth; >> > + bool depth_test_enable; >> > bool writes_stencil; >> > bool depth_clamp_enable; >> > + bool kill_pixel; >> > >> > struct { >> >uint32_t sf[7]; >> > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c >> b/src/intel/vulkan/gen7_cmd_buffer.c >>
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Wed, Feb 8, 2017 at 5:34 PM, Nanley Cherywrote: > On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > > In order to get good performance numbers for this, I had to hack up the > > driver to whack wm_prog_data::uses_kill to true to emulate a discard and > > used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped > > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > > back up to around 90% of the original framerate. This doesn't seem to > > really impact Dota 2; probably because it doesn't use 16-bit depth. > > > > Reviewed-by: Lionel Landwerlin > > --- > > src/intel/vulkan/TODO | 1 - > > src/intel/vulkan/anv_cmd_buffer.c | 2 + > > src/intel/vulkan/anv_genX.h| 3 + > > src/intel/vulkan/anv_private.h | 17 + > > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > > src/intel/vulkan/gen8_cmd_buffer.c | 133 ++ > +++ > > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > > src/intel/vulkan/genX_cmd_buffer.c | 15 - > > src/intel/vulkan/genX_pipeline.c | 38 +++ > > 9 files changed, 219 insertions(+), 2 deletions(-) > > > > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > > index 38acc0d..f8b73a1 100644 > > --- a/src/intel/vulkan/TODO > > +++ b/src/intel/vulkan/TODO > > @@ -12,5 +12,4 @@ Performance: > > - Compressed multisample support > > - Pushing pieces of UBOs? > > - Enable guardband clipping > > - - pma stall workaround > > - Use soft-pin to avoid relocations > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > > index 5886fa6..8c08f8d 100644 > > --- a/src/intel/vulkan/anv_cmd_buffer.c > > +++ b/src/intel/vulkan/anv_cmd_buffer.c > > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer > *cmd_buffer) > > state->restart_index = UINT32_MAX; > > state->dynamic = default_dynamic_state; > > state->need_query_wa = true; > > + state->pma_fix_enabled = false; > > + state->hiz_enabled = false; > > > > if (state->attachments != NULL) { > >vk_free(_buffer->pool->alloc, state->attachments); > > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > > index d04fe38..67147b0 100644 > > --- a/src/intel/vulkan/anv_genX.h > > +++ b/src/intel/vulkan/anv_genX.h > > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > anv_cmd_buffer *cmd_buffer); > > > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer > *cmd_buffer); > > > > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, > > + bool enable); > > + > > void > > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, > > const struct gen_l3_config *l3_config, > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > private.h > > index 4fe3ebc..6efe4ea 100644 > > --- a/src/intel/vulkan/anv_private.h > > +++ b/src/intel/vulkan/anv_private.h > > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > > bool need_query_wa; > > > > /** > > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at > the top > > +* of any command buffer it disabled by disabling it in > EndCommandBuffer > ^ > is? > > > +* and before invoking the secondary in ExecuteCommands. > > +*/ > > + bool pma_fix_enabled; > > + > > + /** > > +* Whether or not we now for certain that HiZ is enabled for the > current >^ >know > > > +* subpass. If, for whatever reason, we are unsure as to whether > HiZ is > > +* enabled or not, this will be false. > > +*/ > > + bool hiz_enabled; > > + > > + /** > > * Array length is anv_cmd_state::pass::attachment_count. Array > content is > > * valid only when recording a render pass instance. > > */ > > @@ -1465,8 +1479,11 @@ struct anv_pipeline { > > > > uint32_t cs_right_mask; > > > > + bool writes_depth; > > + bool depth_test_enable; > > bool writes_stencil; > > bool depth_clamp_enable; > > + bool kill_pixel; > > > > struct { > >uint32_t sf[7]; > > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c > b/src/intel/vulkan/gen7_cmd_buffer.c > > index 013ed87..c1a25e8 100644 > > --- a/src/intel/vulkan/gen7_cmd_buffer.c > > +++ b/src/intel/vulkan/gen7_cmd_buffer.c > > @@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct >
Re: [Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: > In order to get good performance numbers for this, I had to hack up the > driver to whack wm_prog_data::uses_kill to true to emulate a discard and > used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped > the framerate on the demo by 25-30%. Enabling the PMA fix brought it > back up to around 90% of the original framerate. This doesn't seem to > really impact Dota 2; probably because it doesn't use 16-bit depth. > > Reviewed-by: Lionel Landwerlin> --- > src/intel/vulkan/TODO | 1 - > src/intel/vulkan/anv_cmd_buffer.c | 2 + > src/intel/vulkan/anv_genX.h| 3 + > src/intel/vulkan/anv_private.h | 17 + > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ > src/intel/vulkan/gen8_cmd_buffer.c | 133 > + > src/intel/vulkan/genX_blorp_exec.c | 5 ++ > src/intel/vulkan/genX_cmd_buffer.c | 15 - > src/intel/vulkan/genX_pipeline.c | 38 +++ > 9 files changed, 219 insertions(+), 2 deletions(-) > > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO > index 38acc0d..f8b73a1 100644 > --- a/src/intel/vulkan/TODO > +++ b/src/intel/vulkan/TODO > @@ -12,5 +12,4 @@ Performance: > - Compressed multisample support > - Pushing pieces of UBOs? > - Enable guardband clipping > - - pma stall workaround > - Use soft-pin to avoid relocations > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > index 5886fa6..8c08f8d 100644 > --- a/src/intel/vulkan/anv_cmd_buffer.c > +++ b/src/intel/vulkan/anv_cmd_buffer.c > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) > state->restart_index = UINT32_MAX; > state->dynamic = default_dynamic_state; > state->need_query_wa = true; > + state->pma_fix_enabled = false; > + state->hiz_enabled = false; > > if (state->attachments != NULL) { >vk_free(_buffer->pool->alloc, state->attachments); > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > index d04fe38..67147b0 100644 > --- a/src/intel/vulkan/anv_genX.h > +++ b/src/intel/vulkan/anv_genX.h > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > anv_cmd_buffer *cmd_buffer); > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); > > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, > + bool enable); > + > void > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, > const struct gen_l3_config *l3_config, > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index 4fe3ebc..6efe4ea 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { > bool need_query_wa; > > /** > +* Whether or not the gen8 PMA fix is enabled. We ensure that, at the top > +* of any command buffer it disabled by disabling it in EndCommandBuffer ^ is? > +* and before invoking the secondary in ExecuteCommands. > +*/ > + bool pma_fix_enabled; > + > + /** > +* Whether or not we now for certain that HiZ is enabled for the current ^ know > +* subpass. If, for whatever reason, we are unsure as to whether HiZ is > +* enabled or not, this will be false. > +*/ > + bool hiz_enabled; > + > + /** > * Array length is anv_cmd_state::pass::attachment_count. Array content is > * valid only when recording a render pass instance. > */ > @@ -1465,8 +1479,11 @@ struct anv_pipeline { > > uint32_t cs_right_mask; > > + bool writes_depth; > + bool depth_test_enable; > bool writes_stencil; > bool depth_clamp_enable; > + bool kill_pixel; > > struct { >uint32_t sf[7]; > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c > b/src/intel/vulkan/gen7_cmd_buffer.c > index 013ed87..c1a25e8 100644 > --- a/src/intel/vulkan/gen7_cmd_buffer.c > +++ b/src/intel/vulkan/gen7_cmd_buffer.c > @@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct > anv_cmd_buffer *cmd_buffer) > cmd_buffer->state.dirty = 0; > } > > +void > +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, > +bool enable) > +{ > + /* The NP PMA fix doesn't exist on gen7 */ > +} > + > void genX(CmdSetEvent)( >
[Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
In order to get good performance numbers for this, I had to hack up the driver to whack wm_prog_data::uses_kill to true to emulate a discard and used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped the framerate on the demo by 25-30%. Enabling the PMA fix brought it back up to around 90% of the original framerate. This doesn't seem to really impact Dota 2; probably because it doesn't use 16-bit depth. Reviewed-by: Lionel Landwerlin--- src/intel/vulkan/TODO | 1 - src/intel/vulkan/anv_cmd_buffer.c | 2 + src/intel/vulkan/anv_genX.h| 3 + src/intel/vulkan/anv_private.h | 17 + src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ src/intel/vulkan/gen8_cmd_buffer.c | 133 + src/intel/vulkan/genX_blorp_exec.c | 5 ++ src/intel/vulkan/genX_cmd_buffer.c | 15 - src/intel/vulkan/genX_pipeline.c | 38 +++ 9 files changed, 219 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO index 38acc0d..f8b73a1 100644 --- a/src/intel/vulkan/TODO +++ b/src/intel/vulkan/TODO @@ -12,5 +12,4 @@ Performance: - Compressed multisample support - Pushing pieces of UBOs? - Enable guardband clipping - - pma stall workaround - Use soft-pin to avoid relocations diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 5886fa6..8c08f8d 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) state->restart_index = UINT32_MAX; state->dynamic = default_dynamic_state; state->need_query_wa = true; + state->pma_fix_enabled = false; + state->hiz_enabled = false; if (state->attachments != NULL) { vk_free(_buffer->pool->alloc, state->attachments); diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d04fe38..67147b0 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, + bool enable); + void genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, const struct gen_l3_config *l3_config, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 4fe3ebc..6efe4ea 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1163,6 +1163,20 @@ struct anv_cmd_state { bool need_query_wa; /** +* Whether or not the gen8 PMA fix is enabled. We ensure that, at the top +* of any command buffer it disabled by disabling it in EndCommandBuffer +* and before invoking the secondary in ExecuteCommands. +*/ + bool pma_fix_enabled; + + /** +* Whether or not we now for certain that HiZ is enabled for the current +* subpass. If, for whatever reason, we are unsure as to whether HiZ is +* enabled or not, this will be false. +*/ + bool hiz_enabled; + + /** * Array length is anv_cmd_state::pass::attachment_count. Array content is * valid only when recording a render pass instance. */ @@ -1465,8 +1479,11 @@ struct anv_pipeline { uint32_t cs_right_mask; + bool writes_depth; + bool depth_test_enable; bool writes_stencil; bool depth_clamp_enable; + bool kill_pixel; struct { uint32_t sf[7]; diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 013ed87..c1a25e8 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty = 0; } +void +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, +bool enable) +{ + /* The NP PMA fix doesn't exist on gen7 */ +} + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent event, diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8c8de62..271ab3f 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -155,6 +155,135 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) #endif void