Re: [Mesa-dev] [PATCH 1/7] radv: record if a render pass has depth/stencil resolve attachments

2019-05-27 Thread Bas Nieuwenhuizen
On Mon, May 27, 2019 at 5:38 PM Samuel Pitoiset
 wrote:
>
> Only supported with vkCreateRenderPass2().
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_pass.c| 30 +-
>  src/amd/vulkan/radv_private.h |  3 +++
>  2 files changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_pass.c b/src/amd/vulkan/radv_pass.c
> index 4d1e38a780e..b21bf37e401 100644
> --- a/src/amd/vulkan/radv_pass.c
> +++ b/src/amd/vulkan/radv_pass.c
> @@ -75,6 +75,10 @@ radv_render_pass_compile(struct radv_render_pass *pass)
> subpass->depth_stencil_attachment->attachment == 
> VK_ATTACHMENT_UNUSED)
> subpass->depth_stencil_attachment = NULL;
>
> +   if (subpass->ds_resolve_attachment &&
> +   subpass->ds_resolve_attachment->attachment == 
> VK_ATTACHMENT_UNUSED)
> +   subpass->ds_resolve_attachment = NULL;
> +
> for (uint32_t j = 0; j < subpass->attachment_count; j++) {
> struct radv_subpass_attachment *subpass_att =
> >attachments[j];
> @@ -126,6 +130,9 @@ radv_render_pass_compile(struct radv_render_pass *pass)
> subpass->has_resolve = true;
> }
> }
> +
> +   if (subpass->ds_resolve_attachment)
> +   subpass->has_resolve = true;

I think this makes the code assume that there are also color resolves
to be done, which might not be the case?

> }
>  }
>
> @@ -291,10 +298,15 @@ VkResult radv_CreateRenderPass(
>  static unsigned
>  radv_num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
>  {
> +   const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
> +   vk_find_struct_const(desc->pNext,
> +
> SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
> +
> return desc->inputAttachmentCount +
>desc->colorAttachmentCount +
>(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
> -  (desc->pDepthStencilAttachment != NULL);
> +  (desc->pDepthStencilAttachment != NULL) +
> +  (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
>  }
>
>  VkResult radv_CreateRenderPass2KHR(
> @@ -411,6 +423,22 @@ VkResult radv_CreateRenderPass2KHR(
> .layout = 
> desc->pDepthStencilAttachment->layout,
> };
> }
> +
> +   const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
> +   vk_find_struct_const(desc->pNext,
> +
> SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
> +
> +   if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) 
> {
> +   subpass->ds_resolve_attachment = p++;
> +
> +   *subpass->ds_resolve_attachment = (struct 
> radv_subpass_attachment) {
> +   .attachment =  
> ds_resolve->pDepthStencilResolveAttachment->attachment,
> +   .layout =  
> ds_resolve->pDepthStencilResolveAttachment->layout,
> +   };
> +
> +   subpass->depth_resolve_mode = 
> ds_resolve->depthResolveMode;
> +   subpass->stencil_resolve_mode = 
> ds_resolve->stencilResolveMode;
> +   }
> }
>
> for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 7834a505562..e826740bc9f 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1882,6 +1882,9 @@ struct radv_subpass {
> struct radv_subpass_attachment * color_attachments;
> struct radv_subpass_attachment * resolve_attachments;
> struct radv_subpass_attachment * depth_stencil_attachment;
> +   struct radv_subpass_attachment * ds_resolve_attachment;
> +   VkResolveModeFlagBitsKHR depth_resolve_mode;
> +   VkResolveModeFlagBitsKHR stencil_resolve_mode;
>
> /** Subpass has at least one resolve attachment */
> bool has_resolve;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: add radv_clear_htile() helper

2019-05-27 Thread Bas Nieuwenhuizen
r-b

On Wed, May 22, 2019 at 3:35 PM Samuel Pitoiset
 wrote:
>
> This helper will be useful for clearing HTILE after some
> depth/stencil resolves.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c |  7 +--
>  src/amd/vulkan/radv_meta.h   |  3 +++
>  src/amd/vulkan/radv_meta_clear.c | 12 
>  3 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..43730f0568c 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4389,19 +4389,14 @@ static void radv_initialize_htile(struct 
> radv_cmd_buffer *cmd_buffer,
>  {
> assert(range->baseMipLevel == 0);
> assert(range->levelCount == 1 || range->levelCount == 
> VK_REMAINING_ARRAY_LAYERS);
> -   unsigned layer_count = radv_get_layerCount(image, range);
> -   uint64_t size = image->planes[0].surface.htile_slice_size * 
> layer_count;
> VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
> -   uint64_t offset = image->offset + image->htile_offset +
> - image->planes[0].surface.htile_slice_size * 
> range->baseArrayLayer;
> struct radv_cmd_state *state = _buffer->state;
> VkClearDepthStencilValue value = {};
>
> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
>  RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
>
> -   state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo, offset,
> - size, clear_word);
> +   state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, 
> clear_word);
>
> state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
>
> diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
> index 5e0afd11a00..0bd75d6c207 100644
> --- a/src/amd/vulkan/radv_meta.h
> +++ b/src/amd/vulkan/radv_meta.h
> @@ -211,6 +211,9 @@ uint32_t radv_clear_fmask(struct radv_cmd_buffer 
> *cmd_buffer,
>   struct radv_image *image, uint32_t value);
>  uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
> struct radv_image *image, uint32_t value);
> +uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
> + struct radv_image *image,
> + const VkImageSubresourceRange *range, uint32_t 
> value);
>
>  /* common nir builder helpers */
>  #include "nir/nir_builder.h"
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index 6c038fa779d..0db5e1db05f 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -1344,6 +1344,18 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
> image->planes[0].surface.dcc_size, value);
>  }
>
> +uint32_t
> +radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image 
> *image,
> +const VkImageSubresourceRange *range, uint32_t value)
> +{
> +   unsigned layer_count = radv_get_layerCount(image, range);
> +   uint64_t size = image->planes[0].surface.htile_slice_size * 
> layer_count;
> +   uint64_t offset = image->offset + image->htile_offset +
> + image->planes[0].surface.htile_slice_size * 
> range->baseArrayLayer;
> +
> +   return radv_fill_buffer(cmd_buffer, image->bo, offset, size, value);
> +}
> +
>  static void vi_get_fast_clear_parameters(VkFormat format,
>  const VkClearColorValue *clear_value,
>  uint32_t* reset_value,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: always dirty the framebuffer when restoring a subpass

2019-05-27 Thread Bas Nieuwenhuizen
r-b

On Thu, May 23, 2019 at 2:53 PM Samuel Pitoiset
 wrote:
>
> The old code was not wrong because the transitions performed
> after the resolves should re-emit the framebuffer if needed.
>
> This change is mostly a no-op but it improves consistency
> regarding other meta operations that need to save/restore subpasses.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_resolve.c| 3 ++-
>  src/amd/vulkan/radv_meta_resolve_fs.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_resolve.c 
> b/src/amd/vulkan/radv_meta_resolve.c
> index ade5d438438..b9f5106ef01 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -691,7 +691,8 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> *cmd_buffer)
>  &(VkExtent2D) { fb->width, fb->height });
> }
>
> -   cmd_buffer->state.subpass = subpass;
> +   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
> +
> radv_meta_restore(_state, cmd_buffer);
>  }
>
> diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c 
> b/src/amd/vulkan/radv_meta_resolve_fs.c
> index 3c63195f48f..9f20f6753e2 100644
> --- a/src/amd/vulkan/radv_meta_resolve_fs.c
> +++ b/src/amd/vulkan/radv_meta_resolve_fs.c
> @@ -632,6 +632,7 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer 
> *cmd_buffer)
>  &(VkExtent2D) { fb->width, fb->height });
> }
>
> -   cmd_buffer->state.subpass = subpass;
> +   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
> +
> radv_meta_restore(_state, cmd_buffer);
>  }
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: ignore the loadOp if the first use of an attachment is a resolve

2019-05-27 Thread Bas Nieuwenhuizen
R-b

On Mon, May 27, 2019, 10:16 AM Samuel Pitoiset 
wrote:

> Based on ANV.
>
> v2: - remove the if statement
> - update the comment
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_resolve.c | 12 +++-
>  1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_resolve.c
> b/src/amd/vulkan/radv_meta_resolve.c
> index ade5d438438..ca5cf22f0d7 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -618,15 +618,6 @@ radv_cmd_buffer_resolve_subpass(struct
> radv_cmd_buffer *cmd_buffer)
> struct radv_meta_saved_state saved_state;
> enum radv_resolve_method resolve_method = RESOLVE_HW;
>
> -   /* FINISHME(perf): Skip clears for resolve attachments.
> -*
> -* From the Vulkan 1.0 spec:
> -*
> -*If the first use of an attachment in a render pass is as a
> resolve
> -*attachment, then the loadOp is effectively ignored as the
> resolve is
> -*guaranteed to overwrite all pixels in the render area.
> -*/
> -
> if (!subpass->has_resolve)
> return;
>
> @@ -637,6 +628,9 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer
> *cmd_buffer)
> if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
> continue;
>
> +   /* Make sure to not clear color attachments after
> resolves. */
> +
>  cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects =
> 0;
> +
> struct radv_image *dst_img =
> cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
> struct radv_image *src_img =
> cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600: Cleanup "nir" debug option.

2019-05-26 Thread Bas Nieuwenhuizen
r600g does not have a nir compiler, and radeonsi does not use the
option either.
---
 src/gallium/drivers/r600/r600_pipe_common.c | 1 -
 src/gallium/drivers/r600/r600_pipe_common.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe_common.c 
b/src/gallium/drivers/r600/r600_pipe_common.c
index 5177ff4e1c6..b1ad0d7d23b 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -741,7 +741,6 @@ void r600_common_context_cleanup(struct r600_common_context 
*rctx)
 static const struct debug_named_value common_debug_options[] = {
/* logging */
{ "tex", DBG_TEX, "Print texture info" },
-   { "nir", DBG_NIR, "Enable experimental NIR shaders" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
{ "info", DBG_INFO, "Print driver information" },
diff --git a/src/gallium/drivers/r600/r600_pipe_common.h 
b/src/gallium/drivers/r600/r600_pipe_common.h
index b43b7eecd10..4136ba5b565 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.h
+++ b/src/gallium/drivers/r600/r600_pipe_common.h
@@ -78,7 +78,7 @@ struct u_log_context;
 #define DBG_ALL_SHADERS(DBG_FS - 1)
 #define DBG_FS (1 << 6) /* fetch shader */
 #define DBG_TEX(1 << 7)
-#define DBG_NIR(1 << 8)
+/* gap */
 #define DBG_COMPUTE(1 << 9)
 /* gap */
 #define DBG_VM (1 << 11)
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: tidy up GetQueryPoolResults for occlusion queries

2019-05-26 Thread Bas Nieuwenhuizen
r-b

On Wed, May 22, 2019 at 5:43 PM Samuel Pitoiset
 wrote:
>
> Just move the block that checks the availability bit into the
> switch like other query types.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_query.c | 12 +---
>  1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> index 0bc7f8390b6..c0f470da888 100644
> --- a/src/amd/vulkan/radv_query.c
> +++ b/src/amd/vulkan/radv_query.c
> @@ -1121,13 +1121,6 @@ VkResult radv_GetQueryPoolResults(
> char *src = pool->ptr + query * pool->stride;
> uint32_t available;
>
> -   if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
> -   if (flags & VK_QUERY_RESULT_WAIT_BIT)
> -   while(!*(volatile uint32_t*)(pool->ptr + 
> pool->availability_offset + 4 * query))
> -   ;
> -   available = *(uint32_t*)(pool->ptr + 
> pool->availability_offset + 4 * query);
> -   }
> -
> switch (pool->type) {
> case VK_QUERY_TYPE_TIMESTAMP: {
> available = *(uint64_t *)src != TIMESTAMP_NOT_READY;
> @@ -1187,6 +1180,11 @@ VkResult radv_GetQueryPoolResults(
> break;
> }
> case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
> +   if (flags & VK_QUERY_RESULT_WAIT_BIT)
> +   while(!*(volatile uint32_t*)(pool->ptr + 
> pool->availability_offset + 4 * query))
> +   ;
> +   available = *(uint32_t*)(pool->ptr + 
> pool->availability_offset + 4 * query);
> +
> if (!available && !(flags & 
> VK_QUERY_RESULT_PARTIAL_BIT))
> result = VK_NOT_READY;
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: ignore the loadOp if the first use of an attachment is a resolve

2019-05-26 Thread Bas Nieuwenhuizen
On Sun, May 26, 2019 at 2:50 PM Bas Nieuwenhuizen
 wrote:
>
> On Wed, May 22, 2019 at 11:20 AM Samuel Pitoiset
>  wrote:
> >
> > Based on ANV.
> >
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_meta_resolve.c | 21 -
> >  1 file changed, 12 insertions(+), 9 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_meta_resolve.c 
> > b/src/amd/vulkan/radv_meta_resolve.c
> > index ade5d438438..6a8abce1ddb 100644
> > --- a/src/amd/vulkan/radv_meta_resolve.c
> > +++ b/src/amd/vulkan/radv_meta_resolve.c
> > @@ -618,15 +618,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> > *cmd_buffer)
> > struct radv_meta_saved_state saved_state;
> > enum radv_resolve_method resolve_method = RESOLVE_HW;
> >
> > -   /* FINISHME(perf): Skip clears for resolve attachments.
> > -*
> > -* From the Vulkan 1.0 spec:
> > -*
> > -*If the first use of an attachment in a render pass is as a 
> > resolve
> > -*attachment, then the loadOp is effectively ignored as the 
> > resolve is
> > -*guaranteed to overwrite all pixels in the render area.
> > -*/
> > -
> > if (!subpass->has_resolve)
> > return;
> >
> > @@ -637,6 +628,18 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> > *cmd_buffer)
> > if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
> > continue;
> >
> > +   if 
> > (cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects) {
> > +   /* From the Vulkan 1.0 spec:
> > +*
> > +*If the first use of an attachment in a render
> > +*pass is as a resolve attachment, then the 
> > loadOp
> > +*is effectively ignored as the resolve is
> > +*guaranteed to overwrite all pixels in the 
> > render
> > +*area.
> > +   */
> > +   
> > cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 
> > 0;
> > +   }
> > +
>
> You can drop the if statement. Also this seems like a correctness
> issue, as we only cleared the first time it was used as a color
> attachment, which might happen after a resolve?

Forgot to say, r-b with the if removed, and maybe want to cc stable?
>
> > struct radv_image *dst_img = 
> > cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
> > struct radv_image *src_img = 
> > cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
> >
> > --
> > 2.21.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: ignore the loadOp if the first use of an attachment is a resolve

2019-05-26 Thread Bas Nieuwenhuizen
On Wed, May 22, 2019 at 11:20 AM Samuel Pitoiset
 wrote:
>
> Based on ANV.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_resolve.c | 21 -
>  1 file changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_resolve.c 
> b/src/amd/vulkan/radv_meta_resolve.c
> index ade5d438438..6a8abce1ddb 100644
> --- a/src/amd/vulkan/radv_meta_resolve.c
> +++ b/src/amd/vulkan/radv_meta_resolve.c
> @@ -618,15 +618,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> *cmd_buffer)
> struct radv_meta_saved_state saved_state;
> enum radv_resolve_method resolve_method = RESOLVE_HW;
>
> -   /* FINISHME(perf): Skip clears for resolve attachments.
> -*
> -* From the Vulkan 1.0 spec:
> -*
> -*If the first use of an attachment in a render pass is as a 
> resolve
> -*attachment, then the loadOp is effectively ignored as the 
> resolve is
> -*guaranteed to overwrite all pixels in the render area.
> -*/
> -
> if (!subpass->has_resolve)
> return;
>
> @@ -637,6 +628,18 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer 
> *cmd_buffer)
> if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
> continue;
>
> +   if 
> (cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects) {
> +   /* From the Vulkan 1.0 spec:
> +*
> +*If the first use of an attachment in a render
> +*pass is as a resolve attachment, then the loadOp
> +*is effectively ignored as the resolve is
> +*guaranteed to overwrite all pixels in the render
> +*area.
> +   */
> +   
> cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
> +   }
> +

You can drop the if statement. Also this seems like a correctness
issue, as we only cleared the first time it was used as a color
attachment, which might happen after a resolve?

> struct radv_image *dst_img = 
> cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
> struct radv_image *src_img = 
> cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-21 Thread Bas Nieuwenhuizen
So this does not seem to use the sample locations during layout transitions?

AFAIK those are needed for e.g. HTILE decompression as it is based on
equations somehow.

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
 wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. It doesn't support variable sample locations
> during subpass. Note that we don't have to upload the user
> sample locations because the spec doesn't allow this.
>
> Only enabled on VI+ because it's untested on older chips.
>
> v2: - change sampleLocationCoordinateRange[1] to 0.9375
> - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
> - rebased on top of master
> - some cleanups
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
>  src/amd/vulkan/radv_device.c  |  27 
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c|  30 
>  src/amd/vulkan/radv_private.h |  26 +++-
>  5 files changed, 300 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..fb79c1c6713 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> dest->viewport.count = src->viewport.count;
> dest->scissor.count = src->scissor.count;
> dest->discard_rectangle.count = src->discard_rectangle.count;
> +   dest->sample_location.count = src->sample_location.count;
>
> if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> if (memcmp(>viewport.viewports, 
> >viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> }
> }
>
> +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +   if (dest->sample_location.per_pixel != 
> src->sample_location.per_pixel ||
> +   dest->sample_location.grid_size.width != 
> src->sample_location.grid_size.width ||
> +   dest->sample_location.grid_size.height != 
> src->sample_location.grid_size.height ||
> +   memcmp(>sample_location.locations,
> +  >sample_location.locations,
> +  src->sample_location.count * 
> sizeof(VkSampleLocationEXT))) {
> +   dest->sample_location.per_pixel = 
> src->sample_location.per_pixel;
> +   dest->sample_location.grid_size = 
> src->sample_location.grid_size;
> +   typed_memcpy(dest->sample_location.locations,
> +src->sample_location.locations,
> +src->sample_location.count);
> +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +   }
> +   }
> +
> cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
> *cmd_buffer,
> }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> + uint32_t x, uint32_t y, VkOffset2D *sample_locs)
> +{
> +   uint32_t x_offset = x % state->grid_size.width;
> +   uint32_t y_offset = y % state->grid_size.height;
> +   uint32_t num_samples = (uint32_t)state->per_pixel;
> +   VkSampleLocationEXT *user_locs;
> +   uint32_t pixel_offset;
> +
> +   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
> num_samples;
> +
> +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +   user_locs = >locations[pixel_offset];
> +
> +   for (uint32_t i = 0; i < num_samples; i++) {
> +   float shifted_pos_x = user_locs[i].x - 0.5;
> +   float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> +   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> +   }
> +}
> +
> +/**
> + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
> + * locations.
> + */
> +static void
> +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
> +  uint32_t *sample_locs_pixel)
> +{
> +   for (uint32_t i = 0; i < num_samples; i++) {
> +   uint32_t sample_reg_idx = i / 4;
> +   uint32_t sample_loc_idx = i % 4;
> +   int32_t pos_x = sample_locs[i].x;
> +   int32_t pos_y = sample_locs[i].y;
> +
> +   uint32_t shift_x = 8 * sample_loc_idx;
> +   uint32_t shift_y = 

Re: [Mesa-dev] [PATCH 2/4] radv: clean up the sample locations codebase

2019-05-21 Thread Bas Nieuwenhuizen
r-b

On Thu, May 16, 2019 at 11:50 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c |   2 +-
>  src/amd/vulkan/radv_pipeline.c   |   2 +-
>  src/amd/vulkan/radv_private.h|   4 +-
>  src/amd/vulkan/si_cmd_buffer.c   | 166 ++-
>  4 files changed, 76 insertions(+), 98 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index e4e5966da54..4f592bc7f68 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -669,7 +669,7 @@ radv_update_multisample_state(struct radv_cmd_buffer 
> *cmd_buffer,
>
> radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, 
> ms->pa_sc_mode_cntl_0);
>
> -   radv_cayman_emit_msaa_sample_locs(cmd_buffer->cs, num_samples);
> +   radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
>
> /* GFX9: Flush DFSM when the AA mode changes. */
> if (cmd_buffer->device->dfsm_allowed) {
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index c89a6f139ba..56fd65bec29 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -1128,7 +1128,7 @@ radv_pipeline_init_multisample_state(struct 
> radv_pipeline *pipeline,
> S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
> S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
> ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) 
> |
> -   
> S_028BE0_MAX_SAMPLE_DIST(radv_cayman_get_maxdist(log_samples)) |
> +   
> S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
> S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* 
> CM_R_028BE0_PA_SC_AA_CONFIG */
> ms->pa_sc_mode_cntl_1 |= 
> S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
> if (ps_iter_samples > 1)
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 9563e86a680..7834a505562 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1212,8 +1212,8 @@ void radv_cmd_buffer_clear_subpass(struct 
> radv_cmd_buffer *cmd_buffer);
>  void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
>  void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
>  void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
> -void radv_cayman_emit_msaa_sample_locs(struct radeon_cmdbuf *cs, int 
> nr_samples);
> -unsigned radv_cayman_get_maxdist(int log_samples);
> +void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int 
> nr_samples);
> +unsigned radv_get_default_max_sample_dist(int log_samples);
>  void radv_device_init_msaa(struct radv_device *device);
>
>  void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index b93cd88a1d1..56d7a9a3682 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -1300,144 +1300,122 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer 
> *cmd_buffer)
>
>  /* For MSAA sample positions. */
>  #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
> -   (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |  \
> -   (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |
>  \
> -   (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |   
>  \
> +   unsigned)(s0x) & 0xf) << 0)  | (((unsigned)(s0y) & 0xf) << 4)  | \
> +(((unsigned)(s1x) & 0xf) << 8)  | (((unsigned)(s1y) & 0xf) << 12) | \
> +(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
>  (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
>
> -
> -/* 2xMSAA
> - * There are two locations (4, 4), (-4, -4). */
> -const uint32_t eg_sample_locs_2x[4] = {
> -   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
> -   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
> -   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
> -   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
> -};
> -const unsigned eg_max_dist_2x = 4;
> -/* 4xMSAA
> - * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
> -const uint32_t eg_sample_locs_4x[4] = {
> -   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
> -   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
> -   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
> -   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
> -};
> -const unsigned eg_max_dist_4x = 6;
> -
> -/* Cayman 8xMSAA */
> -static const uint32_t cm_sample_locs_8x[] = {
> -   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
> -   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
> -   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
> -   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
> -   FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
> -   

Re: [Mesa-dev] [PATCH] radv: do not reset query pool during creation

2019-05-21 Thread Bas Nieuwenhuizen
r-b

On Tue, May 21, 2019 at 1:36 PM Samuel Pitoiset
 wrote:
>
> From the Vulkan spec 1.1.108:
>"After query pool creation, each query must be reset before
> it is used."
>
> So, the driver doesn't need to do this at creation time.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_query.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
> index 520711e4864..be1cf589d63 100644
> --- a/src/amd/vulkan/radv_query.c
> +++ b/src/amd/vulkan/radv_query.c
> @@ -1035,8 +1035,6 @@ VkResult radv_CreateQueryPool(
> struct radv_query_pool *pool = vk_alloc2(>alloc, pAllocator,
>sizeof(*pool), 8,
>
> VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> -   uint32_t initial_value = pCreateInfo->queryType == 
> VK_QUERY_TYPE_TIMESTAMP
> -? TIMESTAMP_NOT_READY : 0;
>
> if (!pool)
> return vk_error(device->instance, 
> VK_ERROR_OUT_OF_HOST_MEMORY);
> @@ -1082,7 +1080,6 @@ VkResult radv_CreateQueryPool(
> vk_free2(>alloc, pAllocator, pool);
> return vk_error(device->instance, 
> VK_ERROR_OUT_OF_DEVICE_MEMORY);
> }
> -   memset(pool->ptr, initial_value, pool->size);
>
> *pQueryPool = radv_query_pool_to_handle(pool);
> return VK_SUCCESS;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] radv: remove remaining code related to 16 samples

2019-05-20 Thread Bas Nieuwenhuizen
r-b

On Thu, May 16, 2019 at 11:50 AM Samuel Pitoiset
 wrote:
>
> The driver only supports up to 8 samples.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_private.h  |  1 -
>  src/amd/vulkan/si_cmd_buffer.c | 50 --
>  2 files changed, 51 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index a88c0f31ad3..9563e86a680 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -701,7 +701,6 @@ struct radv_device {
> float sample_locations_2x[2][2];
> float sample_locations_4x[4][2];
> float sample_locations_8x[8][2];
> -   float sample_locations_16x[16][2];
>
> /* GFX7 and later */
> uint32_t gfx_init_size_dw;
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 0f4bdadc3d2..b93cd88a1d1 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -1337,26 +1337,6 @@ static const uint32_t cm_sample_locs_8x[] = {
> FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
>  };
>  static const unsigned cm_max_dist_8x = 8;
> -/* Cayman 16xMSAA */
> -static const uint32_t cm_sample_locs_16x[] = {
> -   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
> -   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
> -   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
> -   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
> -   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
> -   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
> -   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
> -   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
> -   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
> -   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
> -   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
> -   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
> -   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
> -   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
> -   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
> -   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
> -};
> -static const unsigned cm_max_dist_16x = 8;
>
>  unsigned radv_cayman_get_maxdist(int log_samples)
>  {
> @@ -1365,7 +1345,6 @@ unsigned radv_cayman_get_maxdist(int log_samples)
> eg_max_dist_2x,
> eg_max_dist_4x,
> cm_max_dist_8x,
> -   cm_max_dist_16x
> };
> return max_dist[log_samples];
>  }
> @@ -1409,25 +1388,6 @@ void radv_cayman_emit_msaa_sample_locs(struct 
> radeon_cmdbuf *cs, int nr_samples)
> radeon_emit(cs, cm_sample_locs_8x[3]);
> radeon_emit(cs, cm_sample_locs_8x[7]);
> break;
> -   case 16:
> -   radeon_set_context_reg_seq(cs, 
> R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
> -   radeon_emit(cs, cm_sample_locs_16x[0]);
> -   radeon_emit(cs, cm_sample_locs_16x[4]);
> -   radeon_emit(cs, cm_sample_locs_16x[8]);
> -   radeon_emit(cs, cm_sample_locs_16x[12]);
> -   radeon_emit(cs, cm_sample_locs_16x[1]);
> -   radeon_emit(cs, cm_sample_locs_16x[5]);
> -   radeon_emit(cs, cm_sample_locs_16x[9]);
> -   radeon_emit(cs, cm_sample_locs_16x[13]);
> -   radeon_emit(cs, cm_sample_locs_16x[2]);
> -   radeon_emit(cs, cm_sample_locs_16x[6]);
> -   radeon_emit(cs, cm_sample_locs_16x[10]);
> -   radeon_emit(cs, cm_sample_locs_16x[14]);
> -   radeon_emit(cs, cm_sample_locs_16x[3]);
> -   radeon_emit(cs, cm_sample_locs_16x[7]);
> -   radeon_emit(cs, cm_sample_locs_16x[11]);
> -   radeon_emit(cs, cm_sample_locs_16x[15]);
> -   break;
> }
>  }
>
> @@ -1466,14 +1426,6 @@ static void radv_cayman_get_sample_position(struct 
> radv_device *device,
> val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
> out_value[1] = (float)(val.idx + 8) / 16.0f;
> break;
> -   case 16:
> -   offset = 4 * (sample_index % 4 * 2);
> -   index = (sample_index / 4) * 4;
> -   val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
> -   out_value[0] = (float)(val.idx + 8) / 16.0f;
> -   val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
> -   out_value[1] = (float)(val.idx + 8) / 16.0f;
> -   break;
> }
>  }
>
> @@ -1488,6 +1440,4 @@ void radv_device_init_msaa(struct radv_device *device)
> radv_cayman_get_sample_position(device, 4, i, 
> device->sample_locations_4x[i]);
> for (i = 0; i < 8; i++)
> radv_cayman_get_sample_position(device, 8, i, 
> device->sample_locations_8x[i]);
> -   for (i = 0; i < 16; i++)
> -   radv_cayman_get_sample_position(device, 16, i, 
> device->sample_locations_16x[i]);
>  }
> --
> 2.21.0
>
> 

Re: [Mesa-dev] [PATCH 3/4] radv: emit correct centroid priority based on the number of samples

2019-05-20 Thread Bas Nieuwenhuizen
r-b

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/si_cmd_buffer.c | 19 ---
>  1 file changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 56d7a9a3682..9f6f92a42be 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -197,9 +197,6 @@ si_emit_graphics(struct radv_physical_device 
> *physical_device,
> radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, 
> S_008A14_NUM_CLIP_SEQ(3) |
>   S_008A14_CLIP_VTX_REORDER_ENA(1));
>
> -   radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 
> 0x76543210);
> -   radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 
> 0xfedcba98);
> -
> if (!physical_device->has_clear_state)
> radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 
> 0);
>
> @@ -1315,16 +1312,19 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer 
> *cmd_buffer)
>  static const uint32_t sample_locs_1x =
> FILL_SREG(0, 0,   0, 0,   0, 0,   0, 0);
>  static const unsigned max_dist_1x = 0;
> +static const uint64_t centroid_priority_1x = 0xull;
>
>  /* 2xMSAA */
>  static const uint32_t sample_locs_2x =
> FILL_SREG(4,4,   -4, -4,   0, 0,   0, 0);
>  static const unsigned max_dist_2x = 4;
> +static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
>
>  /* 4xMSAA */
>  static const uint32_t sample_locs_4x =
> FILL_SREG(-2,-6,   6, -2,   -6, 2,  2, 6);
>  static const unsigned max_dist_4x = 6;
> +static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
>
>  /* 8xMSAA */
>  static const uint32_t sample_locs_8x[] = {
> @@ -1336,6 +1336,7 @@ static const uint32_t sample_locs_8x[] = {
> 0,
>  };
>  static const unsigned max_dist_8x = 8;
> +static const uint64_t centroid_priority_8x = 0x7654321076543210ull;
>
>  unsigned radv_get_default_max_sample_dist(int log_samples)
>  {
> @@ -1353,24 +1354,36 @@ void radv_emit_default_sample_locations(struct 
> radeon_cmdbuf *cs, int nr_samples
> switch (nr_samples) {
> default:
> case 1:
> +   radeon_set_context_reg_seq(cs, 
> R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> +   radeon_emit(cs, centroid_priority_1x);
> +   radeon_emit(cs, centroid_priority_1x >> 32);
> radeon_set_context_reg(cs, 
> R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
> radeon_set_context_reg(cs, 
> R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
> radeon_set_context_reg(cs, 
> R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
> radeon_set_context_reg(cs, 
> R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
> break;
> case 2:
> +   radeon_set_context_reg_seq(cs, 
> R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> +   radeon_emit(cs, centroid_priority_2x);
> +   radeon_emit(cs, centroid_priority_2x >> 32);
> radeon_set_context_reg(cs, 
> R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
> radeon_set_context_reg(cs, 
> R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
> radeon_set_context_reg(cs, 
> R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
> radeon_set_context_reg(cs, 
> R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
> break;
> case 4:
> +   radeon_set_context_reg_seq(cs, 
> R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> +   radeon_emit(cs, centroid_priority_4x);
> +   radeon_emit(cs, centroid_priority_4x >> 32);
> radeon_set_context_reg(cs, 
> R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
> radeon_set_context_reg(cs, 
> R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
> radeon_set_context_reg(cs, 
> R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
> radeon_set_context_reg(cs, 
> R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
> break;
> case 8:
> +   radeon_set_context_reg_seq(cs, 
> R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
> +   radeon_emit(cs, centroid_priority_8x);
> +   radeon_emit(cs, centroid_priority_8x >> 32);
> radeon_set_context_reg_seq(cs, 
> R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
> radeon_emit_array(cs, sample_locs_8x, 4);
> radeon_emit_array(cs, sample_locs_8x, 4);
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___

Re: [Mesa-dev] [PATCH 4/4] radv: fix the sample max distance value for 8x

2019-05-20 Thread Bas Nieuwenhuizen
r-b

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
 wrote:
>
> It should be 7, not 8.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/si_cmd_buffer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 9f6f92a42be..c73c6ecd65c 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -1335,7 +1335,7 @@ static const uint32_t sample_locs_8x[] = {
> 0,
> 0,
>  };
> -static const unsigned max_dist_8x = 8;
> +static const unsigned max_dist_8x = 7;
>  static const uint64_t centroid_priority_8x = 0x7654321076543210ull;
>
>  unsigned radv_get_default_max_sample_dist(int log_samples)
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: decompress FMASK before performing a MSAA decompress using FMASK

2019-05-20 Thread Bas Nieuwenhuizen
r-b

On Thu, May 16, 2019 at 9:21 AM Samuel Pitoiset
 wrote:
>
> This fixes some CTS failures related to VK_EXT_sample_locations.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 506efc4dae0..476ae35e0a0 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4833,17 +4833,28 @@ static void radv_handle_color_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> radv_fast_clear_flush_image_inplace(cmd_buffer, 
> image, range);
> }
> } else if (radv_image_has_cmask(image) || 
> radv_image_has_fmask(image)) {
> +   bool fce_eliminate = false, fmask_expand = false;
> +
> if (radv_layout_can_fast_clear(image, src_layout, 
> src_queue_mask) &&
> !radv_layout_can_fast_clear(image, dst_layout, 
> dst_queue_mask)) {
> -   radv_fast_clear_flush_image_inplace(cmd_buffer, 
> image, range);
> +   fce_eliminate = true;
> }
>
> if (radv_image_has_fmask(image)) {
> if (src_layout != VK_IMAGE_LAYOUT_GENERAL &&
> dst_layout == VK_IMAGE_LAYOUT_GENERAL) {
> -   radv_expand_fmask_image_inplace(cmd_buffer, 
> image, range);
> +   /* A FMASK decompress is required before doing
> +* a MSAA decompress using FMASK.
> +*/
> +   fmask_expand = true;
> }
> }
> +
> +   if (fce_eliminate || fmask_expand)
> +   radv_fast_clear_flush_image_inplace(cmd_buffer, 
> image, range);
> +
> +   if (fmask_expand)
> +   radv_expand_fmask_image_inplace(cmd_buffer, image, 
> range);
> }
>  }
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/4] radeonsi: cleanup some #includes

2019-05-13 Thread Bas Nieuwenhuizen
r-b for the series.

On Mon, May 13, 2019 at 11:14 PM Nicolai Hähnle  wrote:
>
> From: Nicolai Hähnle 
>
> ---
>  src/gallium/drivers/radeonsi/si_texture.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
> b/src/gallium/drivers/radeonsi/si_texture.c
> index 59d50376438..b31a2f6428a 100644
> --- a/src/gallium/drivers/radeonsi/si_texture.c
> +++ b/src/gallium/drivers/radeonsi/si_texture.c
> @@ -16,22 +16,22 @@
>   *
>   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>   */
>
> -#include "radeonsi/si_pipe.h"
> -#include "radeonsi/si_query.h"
> +#include "si_pipe.h"
> +#include "si_query.h"
>  #include "util/u_format.h"
>  #include "util/u_log.h"
>  #include "util/u_memory.h"
>  #include "util/u_pack_color.h"
>  #include "util/u_resource.h"
>  #include "util/u_surface.h"
>  #include "util/u_transfer.h"
>  #include "util/os_time.h"
>  #include 
>  #include 
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: clear vertex bindings while resetting command buffer

2019-05-10 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 
Fixes: 5010436e09f "radv: bail out when binding the same vertex buffers"

I'll push after running it through the testsuite.

Thanks!

On Fri, May 10, 2019 at 9:38 PM Józef Kucia  wrote:
>
> Only vertex inputs accessed by vertex shader must have valid buffers
> bound.
>
> Signed-off-by: Józef Kucia 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index b04c998fac2e..890662841d21 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -301,7 +301,6 @@ radv_cmd_buffer_destroy(struct radv_cmd_buffer 
> *cmd_buffer)
>  static VkResult
>  radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
>  {
> -
> cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
>
> list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
> @@ -326,6 +325,8 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
>
> cmd_buffer->record_result = VK_SUCCESS;
>
> +   memset(cmd_buffer->vertex_bindings, 0, 
> sizeof(cmd_buffer->vertex_bindings));
> +
> for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
> cmd_buffer->descriptors[i].dirty = 0;
> cmd_buffer->descriptors[i].valid = 0;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] winsys/amdgpu: add VCN JPEG to no user fence group

2019-05-08 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen  for then,
because the Mesa code indeed prevents AMDGPU_CHUNK_ID_FENCE in
submissions.

On Wed, May 8, 2019 at 3:24 PM Christian König
 wrote:
>
> Am 08.05.19 um 15:23 schrieb Liu, Leo:
> > On 5/8/19 9:19 AM, Koenig, Christian wrote:
> >> Am 08.05.19 um 15:14 schrieb Liu, Leo:
> >>> On 5/8/19 9:02 AM, Christian König wrote:
> >>>> [CAUTION: External Email]
> >>>>
> >>>> Am 08.05.19 um 14:56 schrieb Liu, Leo:
> >>>>> There is no user fence for JPEG, the bug triggering
> >>>>> kernel WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT)
> >>>> Oh, we are probably going to need to check for this in the kernel as
> >>>> well.
> >>>>
> >>>> Currently we only check for UVD and VCE there,
> >>> Are you talking about the checking for JPEG engine? if that, and then
> >>> yes the check of " WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT)" is there,
> >>> that's why current JPEG is triggering that.
> >> Yeah, but this check comes way to late.
> >>
> >> We usually already reject command submissions when they have user fences
> >> for UVD & VCE, see amdgpu_cs_ib_fill():
> >>>   /* UVD & VCE fw doesn't support user fences */
> >>>   ring = to_amdgpu_ring(parser->entity->rq->sched);
> >>>   if (parser->job->uf_addr && (
> >>>   ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
> >>>   ring->funcs->type == AMDGPU_RING_TYPE_VCE))
> >>>   return -EINVAL;
> >> We should probably make that a ring flag or something like that and
> >> generalize he code here.
> >>
> >> Then the WARN_ON in the JPEG fence code can be removed.
> > Yep. I will take a look at this on the kernel side, in the meantime, can
> > I have a RB on the Mesa side?
>
> Well Acked-by: Christian König , cause I don't
> know the Mesa code well enough.
>
> Christian.
>
> >
> > Thanks,
> > Leo
> >
> >
> >> Christian.
> >>
> >>> Regards,
> >>>
> >>> Leo
> >>>
> >>>
> >>>> do you want to take a
> >>>> look Leo or should I do this?
> >>>>
> >>>> Christian.
> >>>>
> >>>>> Signed-off-by: Leo Liu 
> >>>>> Cc: mesa-sta...@lists.freedesktop.org
> >>>>> ---
> >>>>>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 3 ++-
> >>>>>  1 file changed, 2 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> >>>>> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> >>>>> index 4a2377f7e09..972030eaaa8 100644
> >>>>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> >>>>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> >>>>> @@ -378,7 +378,8 @@ static bool amdgpu_cs_has_user_fence(struct
> >>>>> amdgpu_cs_context *cs)
> >>>>>cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
> >>>>>cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
> >>>>>cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
> >>>>> -  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
> >>>>> +  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC &&
> >>>>> +  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_JPEG;
> >>>>>  }
> >>>>>
> >>>>>  static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: call constant folding before opt algebraic

2019-05-07 Thread Bas Nieuwenhuizen
Nope, r-b

On Tue, May 7, 2019 at 8:36 AM Samuel Pitoiset
 wrote:
>
> Seems fine to,
>
> Reviewed-by: Samuel Pitoiset 
>
> Bas, any comments?
>
> On 5/7/19 7:14 AM, Timothy Arceri wrote:
> > ping!
> >
> > On 2/5/19 1:38 pm, Timothy Arceri wrote:
> >> The pattern of calling opt algebraic first seems to have originated
> >> in i965. The order in OpenGL drivers generally doesn't matter
> >> because the GLSL IR optimisations do constant folding before
> >> opt algebraic.
> >>
> >> However in Vulkan drivers calling opt algebraic first can result
> >> in missed constant folding opportunities.
> >>
> >> vkpipeline-db results (VEGA64):
> >>
> >> Totals from affected shaders:
> >> SGPRS: 3160 -> 3176 (0.51 %)
> >> VGPRS: 3588 -> 3580 (-0.22 %)
> >> Spilled SGPRs: 52 -> 44 (-15.38 %)
> >> Spilled VGPRs: 0 -> 0 (0.00 %)
> >> Private memory VGPRs: 0 -> 0 (0.00 %)
> >> Scratch size: 12 -> 12 (0.00 %) dwords per thread
> >> Code Size: 261812 -> 261036 (-0.30 %) bytes
> >> LDS: 7 -> 7 (0.00 %) blocks
> >> Max Waves: 346 -> 348 (0.58 %)
> >> Wait states: 0 -> 0 (0.00 %)
> >> ---
> >>   src/amd/vulkan/radv_shader.c | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> >> index cd5a9f2afb4..ad7b2439735 100644
> >> --- a/src/amd/vulkan/radv_shader.c
> >> +++ b/src/amd/vulkan/radv_shader.c
> >> @@ -162,8 +162,8 @@ radv_optimize_nir(struct nir_shader *shader, bool
> >> optimize_conservatively,
> >>   NIR_PASS(progress, shader, nir_opt_dead_cf);
> >>   NIR_PASS(progress, shader, nir_opt_cse);
> >>   NIR_PASS(progress, shader, nir_opt_peephole_select,
> >> 8, true, true);
> >> -NIR_PASS(progress, shader, nir_opt_algebraic);
> >>   NIR_PASS(progress, shader, nir_opt_constant_folding);
> >> +NIR_PASS(progress, shader, nir_opt_algebraic);
> >>   NIR_PASS(progress, shader, nir_opt_undef);
> >>   NIR_PASS(progress, shader,
> >> nir_opt_conditional_discard);
> >>   if (shader->options->max_unroll_iterations) {
> >>
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Move adriconf to mesa repositories

2019-05-06 Thread Bas Nieuwenhuizen
On Mon, May 6, 2019 at 5:43 PM Emil Velikov  wrote:
>
> On Sun, 28 Apr 2019 at 19:38, Jean Hertel  wrote:
> >
> > >Could not find my original notes, but the idea is roughly as follows:
> >  >- introduce a separate (user only?) library - say libmesa-config.so
> > > - ^^ provides an API to query/set attributes, via numerical tokens
> > > - any localisation is built on top of ^^ as standalone files
> > >
> > >Reasoning:
> > >- library reused by anyone to make a pretty config tool in their
> > >toolkit and/or language
> > > - numerical tokens are trivial to handle and cheap - can be
> > >binned/deprecated easily
> > > - translation lives outside of the driver - the driver doesn't care
> > >about it, so don't bloat
> > > - translators do not need access to mesa - one less hurdle/obstacle
> > >
> > >
> > >Hope it makes sense, not sure if coffee has kicked in fully ;-)
> > >-Emil
> >
> > Hey Emil,
> >
> > I really liked this idea, specially since right now I have a lot of issues 
> > to query which option is exactly supported by each driver. Like in the 
> > scenarios when you have multiple drivers that support the same GPU, or when 
> > you have a difference between userspace and kernel space driver naming.
> >
> > Can you give me more details on the idea?
> > If I got it right, this library would be independent and mesa will itself 
> > use it to query the options it wants/needs.
> >
> This is the tricky part - wish I could find my notes they have better
> brain-dump.
> It's OK to have the library as both front (config tool) and backend
> (used by mesa) although:
>  - special care on splitting and annotating the API is needed
>  - handling this "extra" dependency would be fiddly for slower moving distros
>
> > What about the current configuration files? Do you think there is a better 
> > way to handle them?
> > They are for in a xml format, which is far from optimal.
> >
> What seems to be the problem with XML? The files are meant to be
> read/written to $app.
>
> > What about Vulkan?
> > As far as I known the current setup only handles OpenGL driver 
> > configurations.
> >
> The current setup handles GLX, DRI and Nine IIRC. One of my goals was
> to split and structure this in a more obvious way.

FYI We have Vulkan integrated with driconf configs now too.

>
> HTH
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: add config entry for Counter-Strike Global Offensive

2019-05-06 Thread Bas Nieuwenhuizen
a-b

On Mon, May 6, 2019 at 6:39 AM Timothy Arceri  wrote:
>
> This fixes rendering issues with gun scopes which is rather
> important.
>
> Cc: "19.0" "19.1" 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100239
> ---
>  src/util/00-mesa-defaults.conf | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
> index c704a1756ae..6389b796d33 100644
> --- a/src/util/00-mesa-defaults.conf
> +++ b/src/util/00-mesa-defaults.conf
> @@ -468,6 +468,9 @@ TODO: document the other workarounds.
>  
>   />
>  
> + executable="csgo_linux64">
> +
> +
>  
>  
>  
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] u_dynarray: turn util_dynarray_{grow, resize} into element-oriented macros

2019-05-04 Thread Bas Nieuwenhuizen
On Sat, May 4, 2019 at 3:25 PM Nicolai Hähnle  wrote:
>
> From: Nicolai Hähnle 
>
> The main motivation for this change is API ergonomics: most operations
> on dynarrays are really on elements, not on bytes, so it's weird to have
> grow and resize as the odd operations out.
>
> The secondary motivation is memory safety. Users of the old byte-oriented
> functions would often multiply a number of elements with the element size,
> which could overflow, and checking for overflow is tedious.
>
> With this change, we only need to implement the overflow checks once.
> The checks are cheap: since eltsize is a compile-time constant and the
> functions should be inlined, they only add a single comparison and an
> unlikely branch.
> ---
>  .../drivers/nouveau/nv30/nvfx_fragprog.c  |  2 +-
>  src/gallium/drivers/nouveau/nv50/nv50_state.c |  5 +--
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  5 +--
>  .../compiler/brw_nir_analyze_ubo_ranges.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_bufmgr.c|  4 +-
>  src/util/u_dynarray.h | 38 +--
>  6 files changed, 35 insertions(+), 21 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c 
> b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
> index 86e3599325e..2bcb62b97d8 100644
> --- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
> +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
> @@ -66,21 +66,21 @@ release_temps(struct nvfx_fpc *fpc)
> fpc->r_temps &= ~fpc->r_temps_discard;
> fpc->r_temps_discard = 0ULL;
>  }
>
>  static inline struct nvfx_reg
>  nvfx_fp_imm(struct nvfx_fpc *fpc, float a, float b, float c, float d)
>  {
> float v[4] = {a, b, c, d};
> int idx = fpc->imm_data.size >> 4;
>
> -   memcpy(util_dynarray_grow(>imm_data, sizeof(float) * 4), v, 4 * 
> sizeof(float));
> +   memcpy(util_dynarray_grow(>imm_data, float, 4), v, 4 * 
> sizeof(float));
> return nvfx_reg(NVFXSR_IMM, idx);
>  }
>
>  static void
>  grow_insns(struct nvfx_fpc *fpc, int size)
>  {
> struct nv30_fragprog *fp = fpc->fp;
>
> fp->insn_len += size;
> fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> index 55167a27c09..228feced5d1 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> @@ -1256,24 +1256,23 @@ nv50_set_global_bindings(struct pipe_context *pipe,
>   struct pipe_resource **resources,
>   uint32_t **handles)
>  {
> struct nv50_context *nv50 = nv50_context(pipe);
> struct pipe_resource **ptr;
> unsigned i;
> const unsigned end = start + nr;
>
> if (nv50->global_residents.size <= (end * sizeof(struct pipe_resource 
> *))) {
>const unsigned old_size = nv50->global_residents.size;
> -  const unsigned req_size = end * sizeof(struct pipe_resource *);
> -  util_dynarray_resize(>global_residents, req_size);
> +  util_dynarray_resize(>global_residents, struct pipe_resource *, 
> end);
>memset((uint8_t *)nv50->global_residents.data + old_size, 0,
> - req_size - old_size);
> + nv50->global_residents.size - old_size);
> }
>
> if (resources) {
>ptr = util_dynarray_element(
>   >global_residents, struct pipe_resource *, start);
>for (i = 0; i < nr; ++i) {
>   pipe_resource_reference([i], resources[i]);
>   nv50_set_global_handle(handles[i], resources[i]);
>}
> } else {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index 12e21862ee0..2ab51c8529e 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -1363,24 +1363,23 @@ nvc0_set_global_bindings(struct pipe_context *pipe,
>   struct pipe_resource **resources,
>   uint32_t **handles)
>  {
> struct nvc0_context *nvc0 = nvc0_context(pipe);
> struct pipe_resource **ptr;
> unsigned i;
> const unsigned end = start + nr;
>
> if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource 
> *))) {
>const unsigned old_size = nvc0->global_residents.size;
> -  const unsigned req_size = end * sizeof(struct pipe_resource *);
> -  util_dynarray_resize(>global_residents, req_size);
> +  util_dynarray_resize(>global_residents, struct pipe_resource *, 
> end);
>memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
> - req_size - old_size);
> + nvc0->global_residents.size - old_size);
> }
>
> if (resources) {
>ptr = util_dynarray_element(
>   >global_residents, struct pipe_resource *, start);
>for (i = 0; i < nr; ++i) {
>   pipe_resource_reference([i], resources[i]);
>   

Re: [Mesa-dev] [PATCH 2/3] u_dynarray: return 0 on realloc failure

2019-05-04 Thread Bas Nieuwenhuizen
On Sat, May 4, 2019 at 3:25 PM Nicolai Hähnle  wrote:
>
> From: Nicolai Hähnle 
>
> We're not very good at handling out-of-memory conditions in general, but
> this change at least gives the caller the option of handling it.
>
> This happens to fix an error in out-of-memory handling in i965, which has
> the following code in brw_bufmgr.c:
>
>   node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
>   if (unlikely(!node))
>  return 0ull;
>
> Previously, allocation failure for util_dynarray_grow wouldn't actually
> return NULL when the dynarray was previously non-empty.
> ---
>  src/util/u_dynarray.h | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/util/u_dynarray.h b/src/util/u_dynarray.h
> index b30fd7b1154..f6a81609dbe 100644
> --- a/src/util/u_dynarray.h
> +++ b/src/util/u_dynarray.h
> @@ -85,20 +85,22 @@ util_dynarray_ensure_cap(struct util_dynarray *buf, 
> unsigned newcap)
>   buf->capacity = DYN_ARRAY_INITIAL_SIZE;
>
>while (newcap > buf->capacity)
>   buf->capacity *= 2;
>
>if (buf->mem_ctx) {
>   buf->data = reralloc_size(buf->mem_ctx, buf->data, buf->capacity);
>} else {
>   buf->data = realloc(buf->data, buf->capacity);
>}
> +  if (!buf->data)
> + return 0;

To keep buf->data valid, put the new value in a temporary variable and
copy it into buf->data on success. If realloc and reralloc_size fail,
the original pointer is still valid, while if we overwrite buf->data
we are guaranteed to leak the data on failure.
> }
>
> return (void *)((char *)buf->data + buf->size);
>  }
>
>  static inline void *
>  util_dynarray_grow_cap(struct util_dynarray *buf, int diff)
>  {
> return util_dynarray_ensure_cap(buf, buf->size + diff);
>  }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: apply the indexing workaround for atomic buffer operations on GFX9

2019-05-03 Thread Bas Nieuwenhuizen
On Fri, May 3, 2019 at 11:42 AM Samuel Pitoiset
 wrote:
>
> Because the new raw/struct intrinsics are buggy with LLVM 8
> (they weren't marked as source of divergence), we fallback to the
> old instrinsics for atomic buffer operations. This means we need
> to apply the indexing workaround for GFX9.

Can you make it more clear that we only delayed atomics to LLVM 9 and
not load/store. I was confused on why we needed another variable.

Otherwise r-b
>
> The fact that we need another workaround is painful but we should
> be able to clean up that a bit once LLVM 7 support will be dropped.
>
> This fixes a GPU hang with AC Odyssey and some rendering problems
> with Nioh.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
> Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic 
> intrinsics with LLVM 9+")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c   | 12 +++-
>  src/amd/common/ac_shader_abi.h|  1 +
>  src/amd/vulkan/radv_nir_to_llvm.c |  6 ++
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index c92eaaca31d..151e0d0f961 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context 
> *ctx,
>  }
>
>  static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
> -const nir_intrinsic_instr 
> *instr, bool write)
> +const nir_intrinsic_instr 
> *instr,
> +   bool write, bool atomic)
>  {
> LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, 
> write);
> -   if (ctx->abi->gfx9_stride_size_workaround) {
> +   if (ctx->abi->gfx9_stride_size_workaround ||
> +   (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
> LLVMValueRef elem_count = 
> LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 
> 0), "");
> LLVMValueRef stride = 
> LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 
> 0), "");
> stride = LLVMBuildLShr(ctx->ac.builder, stride, 
> LLVMConstInt(ctx->ac.i32, 16, 0), "");
> @@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct 
> ac_nir_context *ctx,
> unsigned num_channels = util_last_bit(mask);
> LLVMValueRef rsrc, vindex;
>
> -   rsrc = get_image_buffer_descriptor(ctx, instr, false);
> +   rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
> vindex = LLVMBuildExtractElement(ctx->ac.builder, 
> get_src(ctx, instr->src[1]),
>  ctx->ac.i32_0, "");
>
> @@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context 
> *ctx,
> args.cache_policy = get_cache_policy(ctx, access, true, 
> writeonly_memory);
>
> if (dim == GLSL_SAMPLER_DIM_BUF) {
> -   LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, 
> true);
> +   LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, 
> true, false);
> LLVMValueRef src = ac_to_float(>ac, get_src(ctx, 
> instr->src[3]));
> unsigned src_channels = ac_get_llvm_num_components(src);
> LLVMValueRef vindex;
> @@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct 
> ac_nir_context *ctx,
> params[param_count++] = get_src(ctx, instr->src[3]);
>
> if (dim == GLSL_SAMPLER_DIM_BUF) {
> -   params[param_count++] = get_image_buffer_descriptor(ctx, 
> instr, true);
> +   params[param_count++] = get_image_buffer_descriptor(ctx, 
> instr, true, true);
> params[param_count++] = 
> LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
> 
> ctx->ac.i32_0, ""); /* vindex */
> params[param_count++] = ctx->ac.i32_0; /* voffset */
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index 108fe58ce57..8debb1ff986 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -203,6 +203,7 @@ struct ac_shader_abi {
> /* Whether to workaround GFX9 ignoring the stride for the buffer size 
> if IDXEN=0
> * and LLVM optimizes an indexed load with constant index to IDXEN=0. 
> */
> bool gfx9_stride_size_workaround;
> +   bool gfx9_stride_size_workaround_for_atomic;
>  };
>
>  #endif /* AC_SHADER_ABI_H */
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 796d78e34f4..d83f0bd547f 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3687,6 +3687,12 @@ 

Re: [Mesa-dev] Mesa (staging/19.0): radv: enable descriptor indexing capabilities

2019-05-01 Thread Bas Nieuwenhuizen
Well, kinda a fix.  Jason noticed that there were parts of the ext I
disabled but apparently implementations are required to enable them
(see 028ce527395642b68612d10c6030be5d4706a65e).

Though, I think this was backported under the assumption that
028ce527395642b68612d10c6030be5d4706a65e also was, and it looks like I
did not CC that to 19.0, partially because of a lot of dependencies.
Might make more sense to disable the ext on the 19.0 branch.


On Wed, May 1, 2019 at 9:56 PM Samuel Pitoiset
 wrote:
>
> wait what? Are we backporting a new feature into a stable branch? Do we
> really need that?
>
> On 5/1/19 6:35 PM, GitLab Mirror wrote:
> > Module: Mesa
> > Branch: staging/19.0
> > Commit: a1cdab7bd4ecad7c1c518fb8430613a7559d4f9d
> > URL:
> > http://cgit.freedesktop.org/mesa/mesa/commit/?id=a1cdab7bd4ecad7c1c518fb8430613a7559d4f9d
> >
> > Author: Juan A. Suarez Romero 
> > Date:   Mon Apr 29 17:05:13 2019 +0200
> >
> > radv: enable descriptor indexing capabilities
> >
> > This enables the remaining capabilities in SPV_EXT_descriptor_indexing.
> >
> > Fixes: 0e10790558b "radv: Enable VK_EXT_descriptor_indexing."
> >
> > Reviewed-by: Jason Ekstrand 
> > Reviewed-by: Bas Nieuwenhuizen 
> > (cherry picked from commit 06c9d7f9f94d9ab44a2b6148d9b5ec3f76c8d3db)
> >
> > ---
> >
> >   src/amd/vulkan/radv_shader.c | 2 ++
> >   1 file changed, 2 insertions(+)
> >
> > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> > index 2b45576bd41..7dd2fa2105c 100644
> > --- a/src/amd/vulkan/radv_shader.c
> > +++ b/src/amd/vulkan/radv_shader.c
> > @@ -222,6 +222,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
> >   .lower_ubo_ssbo_access_to_offsets = true,
> >   .caps = {
> >   .descriptor_array_dynamic_indexing = true,
> > + .descriptor_array_non_uniform_indexing = true,
> > + .descriptor_indexing = true,
> >   .device_group = true,
> >   .draw_parameters = true,
> >   .float64 = true,
> >
> > ___
> > mesa-commit mailing list
> > mesa-com...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-commit
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] vulkan/wsi: check if the display_fd given is master

2019-04-25 Thread Bas Nieuwenhuizen
r-b

On Thu, Apr 25, 2019 at 12:22 PM Emil Velikov  wrote:
>
> On Fri, 19 Apr 2019 at 16:01, Emil Velikov  wrote:
> >
> > From: Emil Velikov 
> >
> > As effectively required by the extension, we need to ensure we're master
> >
> > Currently drivers employ vendor specific solutions, which check if the
> > device behind the fd is capable*, yet none of them do the master check.
> >
> > *In the radv case, if acceleration is available.
> >
> > Instead of duplicating the check in each driver, keep it where it's
> > needed and used.
> >
> > Note this copies libdrm's drmIsMaster() to avoid depending on bleeding
> > edge version of the library.
> >
> > v2: set the fd to -1 if not master (Bas)
> >
> > Cc: Keith Packard 
> > Cc: Jason Ekstrand 
> > Cc: Bas Nieuwenhuizen 
> > Cc: Andres Rodriguez 
> > Reported-by: Andres Rodriguez 
> > Fixes: da997ebec92 ("vulkan: Add KHR_display extension using DRM [v10]")
> > Signed-off-by: Emil Velikov 
> > ---
> >  src/vulkan/wsi/wsi_common_display.c | 27 +++
> >  1 file changed, 27 insertions(+)
> >
> > diff --git a/src/vulkan/wsi/wsi_common_display.c 
> > b/src/vulkan/wsi/wsi_common_display.c
> > index 74ed36ed646..2be20e85046 100644
> > --- a/src/vulkan/wsi/wsi_common_display.c
> > +++ b/src/vulkan/wsi/wsi_common_display.c
> > @@ -1812,6 +1812,30 @@ fail_attr_init:
> > return ret;
> >  }
> >
> > +
> > +/*
> > + * Local version fo the libdrm helper. Added to avoid depending on bleeding
> > + * edge version of the library.
> > + */
> > +static int
> > +local_drmIsMaster(int fd)
> > +{
> > +   /* Detect master by attempting something that requires master.
> > +*
> > +* Authenticating magic tokens requires master and 0 is an
> > +* internal kernel detail which we could use. Attempting this on
> > +* a master fd would fail therefore fail with EINVAL because 0
> > +* is invalid.
> > +*
> > +* A non-master fd will fail with EACCES, as the kernel checks
> > +* for master before attempting to do anything else.
> > +*
> > +* Since we don't want to leak implementation details, use
> > +* EACCES.
> > +*/
> > +   return drmAuthMagic(fd, 0) != -EACCES;
> > +}
> > +
> >  VkResult
> >  wsi_display_init_wsi(struct wsi_device *wsi_device,
> >   const VkAllocationCallbacks *alloc,
> > @@ -1827,6 +1851,9 @@ wsi_display_init_wsi(struct wsi_device *wsi_device,
> > }
> >
> > wsi->fd = display_fd;
> > +   if (wsi->fd != -1 && !local_drmIsMaster(wsi->fd))
> > +  wsi->fd = -1;
> > +
> > wsi->alloc = alloc;
> >
> Humble ping?
>
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vulkan/wsi: don't use DUMB_CLOSE for normal GEM handles

2019-04-25 Thread Bas Nieuwenhuizen
r-b

On Thu, Apr 25, 2019 at 12:22 PM Emil Velikov  wrote:
>
> On Fri, 19 Apr 2019 at 16:03, Emil Velikov  wrote:
> >
> > From: Emil Velikov 
> >
> > Currently we get normal GEM handles from PrimeFDToHandle, yet we close
> > then with DUMB_CLOSE. Use GEM_CLOSE instead.
> >
> > Cc: Keith Packard 
> > Cc: Jason Ekstrand 
> > Cc: Bas Nieuwenhuizen 
> > Fixes: da997ebec92 ("vulkan: Add KHR_display extension using DRM [v10]")
> > Signed-off-by: Emil Velikov 
> > ---
> >  src/vulkan/wsi/wsi_common_display.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/vulkan/wsi/wsi_common_display.c 
> > b/src/vulkan/wsi/wsi_common_display.c
> > index 2be20e85046..66e191906fc 100644
> > --- a/src/vulkan/wsi/wsi_common_display.c
> > +++ b/src/vulkan/wsi/wsi_common_display.c
> > @@ -974,8 +974,8 @@ static void
> >  wsi_display_destroy_buffer(struct wsi_display *wsi,
> > uint32_t buffer)
> >  {
> > -   (void) drmIoctl(wsi->fd, DRM_IOCTL_MODE_DESTROY_DUMB,
> > -   &((struct drm_mode_destroy_dumb) { .handle = buffer }));
> > +   (void) drmIoctl(wsi->fd, DRM_IOCTL_GEM_CLOSE,
> > +   &((struct drm_gem_close) { .handle = buffer }));
> >  }
> >
> Humble ping anyone?
>
> AFAICT closing handles from PrimeFDToHandle() with DUMB_CLOSE is a
> violation, even if it somehow works today.
>
> Thanks
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: add BOs after need_cs_space

2019-04-24 Thread Bas Nieuwenhuizen
R-b

On Wed, Apr 24, 2019, 11:36 PM Marek Olšák  wrote:

> From: Marek Olšák 
>
> need_cs_space may clear the buffer list.
>
> Fixes: 951d60f8cdc88 "radeonsi: delay adding BOs at the beginning of IBs
> until the first draw"
> ---
>  src/gallium/drivers/radeonsi/si_compute.c| 6 +++---
>  src/gallium/drivers/radeonsi/si_state_draw.c | 6 +++---
>  2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 2f444a3a1b8..541d7e6f118 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -878,40 +878,40 @@ static void si_launch_grid(
>
> if (sctx->has_graphics) {
> if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
> si_update_fb_dirtiness_after_rendering(sctx);
> sctx->last_num_draw_calls = sctx->num_draw_calls;
> }
>
> si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
> }
>
> -   if (sctx->bo_list_add_all_compute_resources)
> -   si_compute_resources_add_all_to_bo_list(sctx);
> -
> /* Add buffer sizes for memory checking in need_cs_space. */
> si_context_add_resource_size(sctx, >shader.bo->b.b);
> /* TODO: add the scratch buffer */
>
> if (info->indirect) {
> si_context_add_resource_size(sctx, info->indirect);
>
> /* Indirect buffers use TC L2 on GFX9, but not older hw. */
> if (sctx->chip_class <= VI &&
> si_resource(info->indirect)->TC_L2_dirty) {
> sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
> si_resource(info->indirect)->TC_L2_dirty = false;
> }
> }
>
> si_need_gfx_cs_space(sctx);
>
> +   if (sctx->bo_list_add_all_compute_resources)
> +   si_compute_resources_add_all_to_bo_list(sctx);
> +
> if (!sctx->cs_shader_state.initialized)
> si_initialize_compute(sctx);
>
> if (sctx->flags)
> si_emit_cache_flush(sctx);
>
> if (!si_switch_compute_shader(sctx, program, >shader,
> code_object, info->pc))
> return;
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index d2c74152337..8e01e1b35e1 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1287,23 +1287,20 @@ static void si_draw_vbo(struct pipe_context *ctx,
> const struct pipe_draw_info *i
> sctx->last_dirty_tex_counter = dirty_tex_counter;
> sctx->framebuffer.dirty_cbufs |=
> ((1 << sctx->framebuffer.state.nr_cbufs) - 1);
> sctx->framebuffer.dirty_zsbuf = true;
> si_mark_atom_dirty(sctx, >atoms.s.framebuffer);
> si_update_all_texture_descriptors(sctx);
> }
>
> si_decompress_textures(sctx, u_bit_consecutive(0,
> SI_NUM_GRAPHICS_SHADERS));
>
> -   if (sctx->bo_list_add_all_gfx_resources)
> -   si_gfx_resources_add_all_to_bo_list(sctx);
> -
> /* Set the rasterization primitive type.
>  *
>  * This must be done after si_decompress_textures, which can call
>  * draw_vbo recursively, and before si_update_shaders, which uses
>  * current_rast_prim for this draw_vbo call. */
> if (sctx->gs_shader.cso)
> rast_prim = sctx->gs_shader.cso->gs_output_prim;
> else if (sctx->tes_shader.cso) {
> if
> (sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_POINT_MODE])
> rast_prim = PIPE_PRIM_POINTS;
> @@ -1431,20 +1428,23 @@ static void si_draw_vbo(struct pipe_context *ctx,
> const struct pipe_draw_info *i
> if (indirect->indirect_draw_count &&
>
> si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
> sctx->flags |=
> SI_CONTEXT_WRITEBACK_GLOBAL_L2;
>
> si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
> }
> }
> }
>
> si_need_gfx_cs_space(sctx);
>
> +   if (sctx->bo_list_add_all_gfx_resources)
> +   si_gfx_resources_add_all_to_bo_list(sctx);
> +
> /* Since we've called si_context_add_resource_size for vertex
> buffers,
>  * this must be called after si_need_cs_space, because we must let
>  * need_cs_space flush before we add buffers to the buffer list.
>  */
> if (!si_upload_vertex_buffer_descriptors(sctx))
> goto return_cleanup;
>
> /* Vega10/Raven scissor bug workaround. When any context register
> is
>  * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
>  * 

Re: [Mesa-dev] [PATCH 1/1] radv: consider MESA_VK_VERSION_OVERRIDE when setting the api version

2019-04-24 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Wed, Apr 24, 2019 at 2:40 PM Eleni Maria Stea  wrote:
>
> Before setting the physical device API version, we should check if the
> MESA_VK_VERSION_OVERRIDE environment variable is set and take it into
> account.
> ---
>  src/amd/vulkan/radv_extensions.py | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 9743ce1a774..8f29f4ca40f 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -333,9 +333,13 @@ VkResult radv_EnumerateInstanceVersion(
>  uint32_t
>  radv_physical_device_api_version(struct radv_physical_device *dev)
>  {
> +uint32_t override = vk_get_version_override();
> +uint32_t version = VK_MAKE_VERSION(1, 0, 68);
> +
>  if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
> -return ${MAX_API_VERSION.c_vk_version()};
> -return VK_MAKE_VERSION(1, 0, 68);
> +version = ${MAX_API_VERSION.c_vk_version()};
> +
> +return override ? MIN2(override, version) : version;
>  }
>  """)
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] nir: add an option for skipping split_alu_of_phi

2019-04-23 Thread Bas Nieuwenhuizen
On Tue, Apr 23, 2019 at 9:35 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.c |  2 +-
>  src/compiler/nir/nir.h   |  3 ++-
>  src/compiler/nir/nir_opt_if.c| 17 ++---
>  src/freedreno/ir3/ir3_nir.c  |  2 +-
>  src/gallium/auxiliary/nir/tgsi_to_nir.c  |  2 +-
>  src/gallium/drivers/freedreno/a2xx/ir2_nir.c |  2 +-
>  src/gallium/drivers/radeonsi/si_shader_nir.c |  2 +-
>  src/intel/compiler/brw_nir.c |  2 +-
>  src/mesa/state_tracker/st_glsl_to_nir.cpp|  2 +-
>  9 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 13f1f9aa9dc..54a4e732230 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -158,7 +158,7 @@ radv_optimize_nir(struct nir_shader *shader, bool 
> optimize_conservatively,
> NIR_PASS(progress, shader, nir_opt_remove_phis);
>  NIR_PASS(progress, shader, nir_opt_dce);
>  }
> -NIR_PASS(progress, shader, nir_opt_if, true);
> +NIR_PASS(progress, shader, nir_opt_if, true, false);
>  NIR_PASS(progress, shader, nir_opt_dead_cf);
>  NIR_PASS(progress, shader, nir_opt_cse);
>  NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, 
> true);
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 7d2062d3691..d7506d6ddd1 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3474,7 +3474,8 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number);
>
>  bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
>
> -bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
> +bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue,
> +bool skip_alu_of_phi);

Can we have a flag for this instead (e.g. something like
nir_opt_if_skip_alu_of_phi)? I think have a function with a bunch of
bools is less than ideal as you can't see at the calling site what is
for what arg.
>
>  bool nir_opt_intrinsics(nir_shader *shader);
>
> diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
> index f674185f1e2..149b3bd1659 100644
> --- a/src/compiler/nir/nir_opt_if.c
> +++ b/src/compiler/nir/nir_opt_if.c
> @@ -1385,7 +1385,8 @@ opt_if_cf_list(nir_builder *b, struct exec_list 
> *cf_list,
>   * not do anything to cause the metadata to become invalid.
>   */
>  static bool
> -opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
> +opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list,
> +bool skip_alu_of_phi)
>  {
> bool progress = false;
> foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
> @@ -1395,16 +1396,17 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list 
> *cf_list)
>
>case nir_cf_node_if: {
>   nir_if *nif = nir_cf_node_as_if(cf_node);
> - progress |= opt_if_safe_cf_list(b, >then_list);
> - progress |= opt_if_safe_cf_list(b, >else_list);
> + progress |= opt_if_safe_cf_list(b, >then_list, 
> skip_alu_of_phi);
> + progress |= opt_if_safe_cf_list(b, >else_list, 
> skip_alu_of_phi);
>   progress |= opt_if_evaluate_condition_use(b, nif);
>   break;
>}
>
>case nir_cf_node_loop: {
>   nir_loop *loop = nir_cf_node_as_loop(cf_node);
> - progress |= opt_if_safe_cf_list(b, >body);
> - progress |= opt_split_alu_of_phi(b, loop);
> + progress |= opt_if_safe_cf_list(b, >body, skip_alu_of_phi);
> + if (!skip_alu_of_phi)
> +progress |= opt_split_alu_of_phi(b, loop);
>   break;
>}
>
> @@ -1417,7 +1419,8 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list 
> *cf_list)
>  }
>
>  bool
> -nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
> +nir_opt_if(nir_shader *shader, bool aggressive_last_continue,
> +   bool skip_alu_of_phi)
>  {
> bool progress = false;
>
> @@ -1430,7 +1433,7 @@ nir_opt_if(nir_shader *shader, bool 
> aggressive_last_continue)
>
>nir_metadata_require(function->impl, nir_metadata_block_index |
> nir_metadata_dominance);
> -  progress = opt_if_safe_cf_list(, >impl->body);
> +  progress = opt_if_safe_cf_list(, >impl->body, 
> skip_alu_of_phi);
>nir_metadata_preserve(function->impl, nir_metadata_block_index |
>  nir_metadata_dominance);
>
> diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
> index 76230e3be50..1bec3c030a9 100644
> --- a/src/freedreno/ir3/ir3_nir.c
> +++ b/src/freedreno/ir3/ir3_nir.c
> @@ -147,7 +147,7 @@ ir3_optimize_loop(nir_shader *s)
> OPT(s, nir_copy_prop);
> OPT(s, nir_opt_dce);
> }
> - 

Re: [Mesa-dev] [PATCH v2] radv: only load 2-dwords for vertex buffers when robustness is disabled

2019-04-22 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Tue, Mar 19, 2019 at 9:42 AM Samuel Pitoiset
 wrote:
>
> This patch requires the typed vertex fetches series.
>
> Totals from affected shaders:
> SGPRS: 445574 -> 452638 (1.59 %)
> VGPRS: 373392 -> 370436 (-0.79 %)
> Spilled SGPRs: 77 -> 14 (-81.82 %)
> Spilled VGPRs: 0 -> 0 (0.00 %)
> Code Size: 14162288 -> 14413036 (1.77 %) bytes
> Max Waves: 11 -> 120509 (0.43 %)
>
> v2: - fix vertex descriptors
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 32 +++
>  src/amd/vulkan/radv_device.c  |  2 ++
>  src/amd/vulkan/radv_nir_to_llvm.c | 21 +++-
>  src/amd/vulkan/radv_private.h |  1 +
>  src/amd/vulkan/radv_shader.c  |  1 +
>  src/amd/vulkan/radv_shader.h  |  1 +
>  6 files changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index ae8f50d0348..0c8572bd1e5 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1991,6 +1991,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> cmd_buffer->state.pipeline->num_vertex_bindings &&
> radv_get_shader(cmd_buffer->state.pipeline, 
> MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
> struct radv_vertex_elements_info *velems = 
> _buffer->state.pipeline->vertex_elements;
> +   unsigned desc_size_bytes = 
> cmd_buffer->device->robust_buffer_access ? 16 : 8;
> unsigned vb_offset;
> void *vb_ptr;
> uint32_t i = 0;
> @@ -1998,12 +1999,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> uint64_t va;
>
> /* allocate some descriptor state for vertex buffers */
> -   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256,
> +   if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
> + count * desc_size_bytes, 
> 256,
>   _offset, _ptr))
> return;
>
> for (i = 0; i < count; i++) {
> -   uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
> +   uint32_t *desc = &((uint32_t *)vb_ptr)[i * 
> (desc_size_bytes / 4)];
> uint32_t offset;
> struct radv_buffer *buffer = 
> cmd_buffer->vertex_bindings[i].buffer;
> uint32_t stride = 
> cmd_buffer->state.pipeline->binding_stride[i];
> @@ -2017,16 +2019,22 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> va += offset + buffer->offset;
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 
> S_008F04_STRIDE(stride);
> -   if 
> (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
> -   desc[2] = (buffer->size - offset - 
> velems->format_size[i]) / stride + 1;
> -   else
> -   desc[2] = buffer->size - offset;
> -   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> - 
> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
> - 
> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
> +
> +   if (cmd_buffer->device->robust_buffer_access) {
> +   /* Enable out of bounds checking only when
> +* robust buffer access is requested.
> +*/
> +   if 
> (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
> +   desc[2] = (buffer->size - offset - 
> velems->format_size[i]) / stride + 1;
> +   else
> +   desc[2] = buffer->size - offset;
> +   desc[3] = 
> S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> + 
> S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> +

Re: [Mesa-dev] [PATCH] radv: add VK_NV_compute_shader_derivates support

2019-04-22 Thread Bas Nieuwenhuizen
On Fri, Apr 19, 2019 at 12:37 PM Samuel Pitoiset
 wrote:
>
> Only computeDerivativeGroupLinear is supported for now.

Reviewed-by: Bas Nieuwenhuizen 
>
> All crucible tests pass.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c  | 7 +++
>  src/amd/vulkan/radv_extensions.py | 1 +
>  src/amd/vulkan/radv_shader.c  | 1 +
>  3 files changed, 9 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 774ee5b91df..a31860eabf4 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -927,6 +927,13 @@ void radv_GetPhysicalDeviceFeatures2(
> 
> features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
> break;
> }
> +   case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
> +   VkPhysicalDeviceComputeShaderDerivativesFeaturesNV 
> *features =
> +   
> (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
> +   features->computeDerivativeGroupQuads = false;
> +   features->computeDerivativeGroupLinear = true;
> +   break;
> +   }
> default:
> break;
> }
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 40fc585c503..9743ce1a774 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -133,6 +133,7 @@ EXTENSIONS = [
>  Extension('VK_AMD_shader_trinary_minmax', 1, True),
>  Extension('VK_GOOGLE_decorate_string',1, True),
>  Extension('VK_GOOGLE_hlsl_functionality1',1, True),
> +Extension('VK_NV_compute_shader_derivatives', 1, 
> 'device->rad_info.chip_class >= VI'),
>  ]
>
>  class VkVersion:
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index c802abb0e08..13f1f9aa9dc 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -223,6 +223,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
> const struct spirv_to_nir_options spirv_options = {
> .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> +   .derivative_group = true,
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> .draw_parameters = true,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: Support VK_EXT_inline_uniform_block.

2019-04-17 Thread Bas Nieuwenhuizen
So I have trouble making sense of what did you change but on its own
the patch looks good to me. r-b

On Tue, Apr 16, 2019 at 5:26 PM Samuel Pitoiset
 wrote:
>
> From: Bas Nieuwenhuizen 
>
> Basically just reserve the memory in the descriptor sets.
>
> On the shader side we construct a buffer descriptor, since
> AFAIU VGPR indexing on 32-bit pointers in LLVM is still broken.
>
> This fully supports update after bind and variable descriptor set
> sizes. However, the limits are somewhat arbitrary and are mostly
> about finding a reasonable division of a 2 GiB max memory size over
> the set.
>
> v2: - rebased on top of master (Samuel)
> - remove the loading resources rework (Samuel)
> - only load UBO descriptors if it's a pointer (Samuel)
> - use LLVMBuildPtrToInt to avoid IR failures (Samuel)
> ---
>  src/amd/vulkan/radv_descriptor_set.c | 83 
>  src/amd/vulkan/radv_device.c | 22 +++-
>  src/amd/vulkan/radv_extensions.py|  1 +
>  src/amd/vulkan/radv_nir_to_llvm.c| 31 ++-
>  src/amd/vulkan/radv_private.h|  2 +
>  5 files changed, 124 insertions(+), 15 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_descriptor_set.c 
> b/src/amd/vulkan/radv_descriptor_set.c
> index 68171b5d244..6c6b88a4553 100644
> --- a/src/amd/vulkan/radv_descriptor_set.c
> +++ b/src/amd/vulkan/radv_descriptor_set.c
> @@ -127,6 +127,7 @@ VkResult radv_CreateDescriptorSetLayout(
> uint32_t b = binding->binding;
> uint32_t alignment;
> unsigned binding_buffer_count = 0;
> +   uint32_t descriptor_count = binding->descriptorCount;
>
> switch (binding->descriptorType) {
> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
> @@ -164,6 +165,11 @@ VkResult radv_CreateDescriptorSetLayout(
> set_layout->binding[b].size = 16;
> alignment = 16;
> break;
> +   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
> +   alignment = 16;
> +   set_layout->binding[b].size = descriptor_count;
> +   descriptor_count = 1;
> +   break;
> default:
> unreachable("unknown descriptor type\n");
> break;
> @@ -171,7 +177,7 @@ VkResult radv_CreateDescriptorSetLayout(
>
> set_layout->size = align(set_layout->size, alignment);
> set_layout->binding[b].type = binding->descriptorType;
> -   set_layout->binding[b].array_size = binding->descriptorCount;
> +   set_layout->binding[b].array_size = descriptor_count;
> set_layout->binding[b].offset = set_layout->size;
> set_layout->binding[b].buffer_offset = buffer_count;
> set_layout->binding[b].dynamic_offset_offset = 
> dynamic_offset_count;
> @@ -207,9 +213,9 @@ VkResult radv_CreateDescriptorSetLayout(
> samplers_offset += 4 * sizeof(uint32_t) * 
> binding->descriptorCount;
> }
>
> -   set_layout->size += binding->descriptorCount * 
> set_layout->binding[b].size;
> -   buffer_count += binding->descriptorCount * 
> binding_buffer_count;
> -   dynamic_offset_count += binding->descriptorCount *
> +   set_layout->size += descriptor_count * 
> set_layout->binding[b].size;
> +   buffer_count += descriptor_count * binding_buffer_count;
> +   dynamic_offset_count += descriptor_count *
> set_layout->binding[b].dynamic_offset_count;
> set_layout->shader_stages |= binding->stageFlags;
> }
> @@ -264,6 +270,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
>
> uint64_t descriptor_size = 0;
> uint64_t descriptor_alignment = 1;
> +   uint32_t descriptor_count = binding->descriptorCount;
> switch (binding->descriptorType) {
> case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
> case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
> @@ -282,7 +289,7 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
> descriptor_alignment = 32;
> break;
> case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
> -   if 
> (!has_equal_immutable_samplers(binding->pImmutableSamplers, 
> binding->descriptorCount)) {
> +   if 
> (!has_equal

Re: [Mesa-dev] [PATCH 3/3] radv: add VK_KHR_shader_atomic_int64 but disable it for now

2019-04-17 Thread Bas Nieuwenhuizen
hmm, should work by design if we keep the entry but make it False. Let
me look into it.

On Wed, Apr 17, 2019 at 9:59 PM Samuel Pitoiset
 wrote:
>
>
> On 4/17/19 9:05 PM, Samuel Pitoiset wrote:
> >
> > On 4/17/19 8:52 PM, Bas Nieuwenhuizen wrote:
> >> On Tue, Apr 16, 2019 at 10:35 AM Samuel Pitoiset
> >>  wrote:
> >>> No support for 64-bit compare atomic operations.
> >>>
> >>> Signed-off-by: Samuel Pitoiset 
> >>> ---
> >>>   src/amd/vulkan/radv_device.c  | 10 ++
> >>>   src/amd/vulkan/radv_extensions.py |  1 +
> >>>   src/amd/vulkan/radv_shader.c  |  1 +
> >>>   3 files changed, 12 insertions(+)
> >>>
> >>> diff --git a/src/amd/vulkan/radv_device.c
> >>> b/src/amd/vulkan/radv_device.c
> >>> index 1f77dcadb17..13021a9f2da 100644
> >>> --- a/src/amd/vulkan/radv_device.c
> >>> +++ b/src/amd/vulkan/radv_device.c
> >>> @@ -906,6 +906,16 @@ void radv_GetPhysicalDeviceFeatures2(
> >>>  features->shaderInt8 = true;
> >>>  break;
> >>>  }
> >>> +   case
> >>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
> >>> + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
> >>> + (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
> >>> +   /* TODO: Enable this once the driver
> >>> supports 64-bit
> >>> +* compare atomic operations.
> >>> +*/
> >>> +   features->shaderBufferInt64Atomics = false;
> >>> +   features->shaderSharedInt64Atomics = false;
> >>> +   break;
> >>> +   }
> >>>  default:
> >>>  break;
> >>>  }
> >>> diff --git a/src/amd/vulkan/radv_extensions.py
> >>> b/src/amd/vulkan/radv_extensions.py
> >>> index 13fe391e623..2d2d3f0c447 100644
> >>> --- a/src/amd/vulkan/radv_extensions.py
> >>> +++ b/src/amd/vulkan/radv_extensions.py
> >>> @@ -81,6 +81,7 @@ EXTENSIONS = [
> >>>   Extension('VK_KHR_push_descriptor',   1, True),
> >>>   Extension('VK_KHR_relaxed_block_layout',  1, True),
> >>>   Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
> >>> +Extension('VK_KHR_shader_atomic_int64',   1,
> >>> 'HAVE_LLVM >= 0x900'),
> >> I'd prefer we make this constant False as long we never enable any of
> >> the features.
> >>
> >> Otherwise r-b for the series.
> > Looks good to me, I will fix before pushing, thanks!
>
> Oh we can't do this, it introduces a compilation error.
>
> src/amd/vulkan/radv_extensions.c:163:4: note: (near initialization for
> ‘radv_device_extensions’)
> src/amd/vulkan/radv_extensions.c: In function
> ‘radv_fill_device_extension_table’:
> src/amd/vulkan/radv_extensions.c:214:11: error: ‘struct
> radv_device_extension_table’ has no member named
> ‘KHR_shader_atomic_int64’; did you mean ‘KHR_shader_float16_int8’?
>  table->KHR_shader_atomic_int64 = false &&
> (instance->enabled_extensions.KHR_get_physical_device_properties2 ||
> instance->apiVersion >= VK_API_VERSION_1_1);
> ^~~
> KHR_shader_float16_int8
>
> >>
> >>> Extension('VK_KHR_shader_draw_parameters',1, True),
> >>>   Extension('VK_KHR_shader_float16_int8',   1, True),
> >>>   Extension('VK_KHR_storage_buffer_storage_class',  1, True),
> >>> diff --git a/src/amd/vulkan/radv_shader.c
> >>> b/src/amd/vulkan/radv_shader.c
> >>> index a9677094772..c802abb0e08 100644
> >>> --- a/src/amd/vulkan/radv_shader.c
> >>> +++ b/src/amd/vulkan/radv_shader.c
> >>> @@ -235,6 +235,7 @@ radv_shader_compile_to_nir(struct radv_device
> >>> *device,
> >>>  .int8 = true,
> >>>  .int16 = true,
> >>>  .int64 = true,
> >>> +   .int64_atomics = true,
> >>>  .multiview = true,
> >>> .physical_storage_buffer_address = true,
> >>>  .runtime_descriptor_array = true,
> >>> --
> >>> 2.21.0
> >>>
> >>> ___
> >>> mesa-dev mailing list
> >>> mesa-dev@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] ac: add support for more types with struct/raw LLVM intrinsics

2019-04-17 Thread Bas Nieuwenhuizen
r-b for the series

On Tue, Mar 26, 2019 at 12:36 PM Samuel Pitoiset
 wrote:
>
> LLVM 9+ now supports 8-bit and 16-bit types.
>
> This changes requires LLVM r356465.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 51 +++---
>  1 file changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index a816327ce95..88df82dcc54 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1136,6 +1136,7 @@ ac_build_llvm8_buffer_store_common(struct 
> ac_llvm_context *ctx,
>LLVMValueRef voffset,
>LLVMValueRef soffset,
>unsigned num_channels,
> +  LLVMTypeRef base_type,
>bool glc,
>bool slc,
>bool writeonly_memory,
> @@ -1151,21 +1152,22 @@ ac_build_llvm8_buffer_store_common(struct 
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 
> 0);
> -   unsigned func = CLAMP(num_channels, 1, 4) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4);
> +   const char *indexing_kind = structurized ? "struct" : "raw";
> +   char name[256], type_name[8];
>
> -   if (HAVE_LLVM == 0x800 && func == 2)
> -   func = 3; /* Only LLVM 9+ supports vec3 */
> +   if (HAVE_LLVM == 0x800 && func == 3)
> +   func = 4; /* Only LLVM 9+ supports vec3 */
>
> -   const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
> -   const char *indexing_kind = structurized ? "struct" : "raw";
> -   char name[256];
> +   LLVMTypeRef type = func > 1 ? LLVMVectorType(base_type, func) : 
> base_type;
> +   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
>
> if (use_format) {
> snprintf(name, sizeof(name), 
> "llvm.amdgcn.%s.buffer.store.format.%s",
> -indexing_kind, type_names[func]);
> +indexing_kind, type_name);
> } else {
> snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
> -indexing_kind, type_names[func]);
> +indexing_kind, type_name);
> }
>
> ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
> @@ -1185,8 +1187,8 @@ ac_build_buffer_store_format(struct ac_llvm_context 
> *ctx,
> if (HAVE_LLVM >= 0x800) {
> ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
>voffset, NULL, 
> num_channels,
> -  glc, false, 
> writeonly_memory,
> -  true, true);
> +  ctx->f32, glc, false,
> +  writeonly_memory, true, 
> true);
> } else {
> ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset,
>  num_channels, glc, false,
> @@ -1249,6 +1251,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
>ctx->i32_0,
>voffset, offset,
>num_channels,
> +  ctx->f32,
>glc, slc,
>writeonly_memory,
>false, false);
> @@ -1324,6 +1327,7 @@ ac_build_llvm8_buffer_load_common(struct 
> ac_llvm_context *ctx,
>   LLVMValueRef voffset,
>   LLVMValueRef soffset,
>   unsigned num_channels,
> + LLVMTypeRef base_type,
>   bool glc,
>   bool slc,
>   bool can_speculate,
> @@ -1338,26 +1342,26 @@ ac_build_llvm8_buffer_load_common(struct 
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 
> 0);
> -   unsigned func = CLAMP(num_channels, 1, 4) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4);
>
> -   if (HAVE_LLVM == 0x800 && func == 2)
> -   func = 3; /* Only LLVM 9+ supports vec3 */
> +   if (HAVE_LLVM == 0x800 

Re: [Mesa-dev] [PATCH 3/3] radv: add VK_KHR_shader_atomic_int64 but disable it for now

2019-04-17 Thread Bas Nieuwenhuizen
On Tue, Apr 16, 2019 at 10:35 AM Samuel Pitoiset
 wrote:
>
> No support for 64-bit compare atomic operations.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c  | 10 ++
>  src/amd/vulkan/radv_extensions.py |  1 +
>  src/amd/vulkan/radv_shader.c  |  1 +
>  3 files changed, 12 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 1f77dcadb17..13021a9f2da 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -906,6 +906,16 @@ void radv_GetPhysicalDeviceFeatures2(
> features->shaderInt8 = true;
> break;
> }
> +   case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
> +   VkPhysicalDeviceShaderAtomicInt64FeaturesKHR 
> *features =
> +   (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR 
> *)ext;
> +   /* TODO: Enable this once the driver supports 64-bit
> +* compare atomic operations.
> +*/
> +   features->shaderBufferInt64Atomics = false;
> +   features->shaderSharedInt64Atomics = false;
> +   break;
> +   }
> default:
> break;
> }
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 13fe391e623..2d2d3f0c447 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -81,6 +81,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_push_descriptor',   1, True),
>  Extension('VK_KHR_relaxed_block_layout',  1, True),
>  Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
> +Extension('VK_KHR_shader_atomic_int64',   1, 'HAVE_LLVM >= 
> 0x900'),

I'd prefer we make this constant False as long we never enable any of
the features.

Otherwise r-b for the series.

>  Extension('VK_KHR_shader_draw_parameters',1, True),
>  Extension('VK_KHR_shader_float16_int8',   1, True),
>  Extension('VK_KHR_storage_buffer_storage_class',  1, True),
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index a9677094772..c802abb0e08 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -235,6 +235,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
> .int8 = true,
> .int16 = true,
> .int64 = true,
> +   .int64_atomics = true,
> .multiview = true,
> .physical_storage_buffer_address = true,
> .runtime_descriptor_array = true,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] vulkan/wsi: check if the display_fd given is master

2019-04-17 Thread Bas Nieuwenhuizen
This will not work as-is for radv, as failure to initialize from
wsi_display_init_wsi (->wsi_device_init -> radv_init_wsi) will cause
us to fail initializing the whole device.

On Wed, Apr 17, 2019 at 7:02 PM Emil Velikov  wrote:
>
> From: Emil Velikov 
>
> As effectively required by the extension, we need to ensure we're master
>
> Currently drivers employ vendor specific solutions, which check if the
> device behind the fd is capable*, yet none of them do the master check.
>
> *In the radv case, if acceleration is available.
>
> Instead of duplicating the check in each driver, keep it where it's
> needed and used.
>
> Note this copies libdrm's drmIsMaster() to avoid depending on bleeding
> edge version of the library.
>
> Cc: Keith Packard 
> Cc: Jason Ekstrand 
> Cc: Bas Nieuwenhuizen 
> Cc: Andres Rodriguez 
> Reported-by: Andres Rodriguez 
> Fixes: da997ebec92 ("vulkan: Add KHR_display extension using DRM [v10]")
> Signed-off-by: Emil Velikov 
> ---
>  src/vulkan/wsi/wsi_common_display.c | 30 +
>  1 file changed, 30 insertions(+)
>
> diff --git a/src/vulkan/wsi/wsi_common_display.c 
> b/src/vulkan/wsi/wsi_common_display.c
> index 74ed36ed646..d6b2ae004ce 100644
> --- a/src/vulkan/wsi/wsi_common_display.c
> +++ b/src/vulkan/wsi/wsi_common_display.c
> @@ -1812,6 +1812,30 @@ fail_attr_init:
> return ret;
>  }
>
> +
> +/*
> + * Local version fo the libdrm helper. Added to avoid depending on bleeding
> + * edge version of the library.
> + */
> +static int
> +local_drmIsMaster(int fd)
> +{
> +   /* Detect master by attempting something that requires master.
> +*
> +* Authenticating magic tokens requires master and 0 is an
> +* internal kernel detail which we could use. Attempting this on
> +* a master fd would fail therefore fail with EINVAL because 0
> +* is invalid.
> +*
> +* A non-master fd will fail with EACCES, as the kernel checks
> +* for master before attempting to do anything else.
> +*
> +* Since we don't want to leak implementation details, use
> +* EACCES.
> +*/
> +   return drmAuthMagic(fd, 0) != -EACCES;
> +}
> +
>  VkResult
>  wsi_display_init_wsi(struct wsi_device *wsi_device,
>   const VkAllocationCallbacks *alloc,
> @@ -1826,6 +1850,11 @@ wsi_display_init_wsi(struct wsi_device *wsi_device,
>goto fail;
> }
>
> +   if (!local_drmIsMaster(display_fd)) {
> +  result = VK_ERROR_INITIALIZATION_FAILED;
> +  goto fail_fd;
> +   }
> +
> wsi->fd = display_fd;
> wsi->alloc = alloc;
>
> @@ -1857,6 +1886,7 @@ wsi_display_init_wsi(struct wsi_device *wsi_device,
>  fail_cond:
> pthread_mutex_destroy(>wait_mutex);
>  fail_mutex:
> +fail_fd:
> vk_free(alloc, wsi);
>  fail:
> return result;
> --
> 2.21.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: enable shaderInt8 on SI and CIK

2019-04-15 Thread Bas Nieuwenhuizen
r-b

On Mon, Apr 15, 2019 at 5:42 PM Samuel Pitoiset
 wrote:
>
> No CTS failures.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c  | 5 ++---
>  src/amd/vulkan/radv_extensions.py | 2 +-
>  2 files changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index c517b56cd0f..998d6f02eb2 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -901,9 +901,8 @@ void radv_GetPhysicalDeviceFeatures2(
> case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
> VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
> (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
> -   bool enabled = pdevice->rad_info.chip_class >= VI;
> -   features->shaderFloat16 = enabled && HAVE_LLVM >= 
> 0x0800;
> -   features->shaderInt8 = enabled;
> +   features->shaderFloat16 = 
> pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800;
> +   features->shaderInt8 = true;
> break;
> }
> default:
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 3d0a0c4343e..13fe391e623 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -82,7 +82,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_relaxed_block_layout',  1, True),
>  Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
>  Extension('VK_KHR_shader_draw_parameters',1, True),
> -Extension('VK_KHR_shader_float16_int8',   1, 
> 'device->rad_info.chip_class >= VI'),
> +Extension('VK_KHR_shader_float16_int8',   1, True),
>  Extension('VK_KHR_storage_buffer_storage_class',  1, True),
>  Extension('VK_KHR_surface',  25, 
> 'RADV_HAS_SURFACE'),
>  Extension('VK_KHR_surface_protected_capabilities',1, 
> 'RADV_HAS_SURFACE'),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: enable GL_EXT_shader_image_load_formatted

2019-04-15 Thread Bas Nieuwenhuizen
I don't see this cap defined anywhere?

If it depends on an earlier series that is not pushed yet, but your
are going to,

Reviewed-by: Bas Nieuwenhuizen 

On Mon, Apr 15, 2019 at 7:19 PM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> no changes - the driver doesn't use the format
> ---
>  docs/relnotes/19.1.0.html | 1 +
>  src/gallium/drivers/radeonsi/si_get.c | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/docs/relnotes/19.1.0.html b/docs/relnotes/19.1.0.html
> index 1c5f8d034b7..36d6dbe27c3 100644
> --- a/docs/relnotes/19.1.0.html
> +++ b/docs/relnotes/19.1.0.html
> @@ -33,20 +33,21 @@ Compatibility contexts may report a lower version 
> depending on each driver.
>  SHA256 checksums
>  
>  TBD.
>  
>
>
>  New features
>
>  
>  GL_ARB_parallel_shader_compile on all drivers.
> +GL_EXT_shader_image_load_formatted on radeonsi.
>  GL_EXT_texture_compression_s3tc_srgb on Gallium drivers and i965 (ES 
> extension).
>  GL_NV_compute_shader_derivatives on iris and i965.
>  GL_KHR_parallel_shader_compile on all drivers.
>  VK_EXT_buffer_device_address on Intel and RADV.
>  VK_NV_compute_shader_derivatives on Intel.
>  
>
>  Bug fixes
>
>  
> diff --git a/src/gallium/drivers/radeonsi/si_get.c 
> b/src/gallium/drivers/radeonsi/si_get.c
> index 2142d5a33f2..67fbc50998b 100644
> --- a/src/gallium/drivers/radeonsi/si_get.c
> +++ b/src/gallium/drivers/radeonsi/si_get.c
> @@ -154,20 +154,21 @@ static int si_get_param(struct pipe_screen *pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_INT64_DIVMOD:
> case PIPE_CAP_TGSI_CLOCK:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
> case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
> case PIPE_CAP_TGSI_BALLOT:
> case PIPE_CAP_TGSI_VOTE:
> case PIPE_CAP_TGSI_FS_FBFETCH:
> case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
> +   case PIPE_CAP_IMAGE_LOAD_FORMATTED:
> return 1;
>
> case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
> return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
>
> case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
> return sscreen->info.has_gpu_reset_status_query ||
>sscreen->info.has_gpu_reset_counter_query;
>
> case PIPE_CAP_TEXTURE_MULTISAMPLE:
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: set ACCESS_NON_READABLE on stores for copy/fill/clear meta shaders

2019-04-15 Thread Bas Nieuwenhuizen
r-b

On Mon, Apr 15, 2019 at 6:38 PM Samuel Pitoiset
 wrote:
>
> The compiler will emit GLC=1.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_buffer.c | 2 ++
>  src/amd/vulkan/radv_meta_clear.c  | 1 +
>  2 files changed, 3 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_meta_buffer.c 
> b/src/amd/vulkan/radv_meta_buffer.c
> index b3aed109a5e..3e4f63ad3c1 100644
> --- a/src/amd/vulkan/radv_meta_buffer.c
> +++ b/src/amd/vulkan/radv_meta_buffer.c
> @@ -51,6 +51,7 @@ build_buffer_fill_shader(struct radv_device *dev)
> store->src[1] = nir_src_for_ssa(_buf->dest.ssa);
> store->src[2] = nir_src_for_ssa(offset);
> nir_intrinsic_set_write_mask(store, 0xf);
> +   nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
> store->num_components = 4;
> nir_builder_instr_insert(, >instr);
>
> @@ -110,6 +111,7 @@ build_buffer_copy_shader(struct radv_device *dev)
> store->src[1] = nir_src_for_ssa(_buf->dest.ssa);
> store->src[2] = nir_src_for_ssa(offset);
> nir_intrinsic_set_write_mask(store, 0xf);
> +   nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
> store->num_components = 4;
> nir_builder_instr_insert(, >instr);
>
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index 4407bd75ee9..101ef4344f4 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -1114,6 +1114,7 @@ build_clear_htile_mask_shader()
> store->src[1] = nir_src_for_ssa(>dest.ssa);
> store->src[2] = nir_src_for_ssa(offset);
> nir_intrinsic_set_write_mask(store, 0xf);
> +   nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
> store->num_components = 4;
> nir_builder_instr_insert(, >instr);
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: use correct intrinsic type for bindless atomic_{min, max}

2019-04-15 Thread Bas Nieuwenhuizen
r-b

On Mon, Apr 15, 2019 at 2:17 AM Timothy Arceri  wrote:
>
> Coverity: CID 1444664
>
> Fixes: d62d434fe920 ("ac/nir_to_llvm: add image bindless support")
> ---
>  src/amd/common/ac_nir_to_llvm.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 61b33c74e6c..8266e7e9f68 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2549,8 +2549,8 @@ static LLVMValueRef visit_image_atomic(struct 
> ac_nir_context *ctx,
> bool is_unsigned;
> bool is_array;
> if (bindless) {
> -   if (instr->intrinsic == nir_intrinsic_image_atomic_min ||
> -   instr->intrinsic == nir_intrinsic_image_atomic_max) {
> +   if (instr->intrinsic == 
> nir_intrinsic_bindless_image_atomic_min ||
> +   instr->intrinsic == 
> nir_intrinsic_bindless_image_atomic_max) {
> const GLenum format = nir_intrinsic_format(instr);
> assert(format == GL_R32UI || format == GL_R32I);
> is_unsigned = format == GL_R32UI;
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: enable VK_AMD_gpu_shader_half_float

2019-04-09 Thread Bas Nieuwenhuizen
a-b now.

On Mon, Apr 8, 2019 at 9:31 AM Samuel Pitoiset
 wrote:
>
>
> On 3/21/19 11:09 AM, Bas Nieuwenhuizen wrote:
> > Honestly the zero tests is worrying me. This is a pretty big extension
> > and I have questions like:
> >
> > to 16-bit loads + 16-bit ALU actually work together or have we been
> > silently relying on the fact there is always a ZExt cast after and
> > that did not care about input size?
>
> With the two 16-bit fixes I have just sent, it should be now safe to
> enable it.
>
> VK_KHR_shader_float16_int8 now works too, I'm just waiting for CTS
> bugfixes before enabling it by default.
>
> Can you Rb this one or do you still think it's too fragile?
>
> >
> > On Thu, Mar 21, 2019 at 10:03 AM Samuel Pitoiset
> >  wrote:
> >> Should be safe to enable as all instructions seem to support 16-bit.
> >> Unfortunately, there is no CTS test.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/vulkan/radv_extensions.py | 1 +
> >>   1 file changed, 1 insertion(+)
> >>
> >> diff --git a/src/amd/vulkan/radv_extensions.py 
> >> b/src/amd/vulkan/radv_extensions.py
> >> index 421f8b926ea..23106765c2a 100644
> >> --- a/src/amd/vulkan/radv_extensions.py
> >> +++ b/src/amd/vulkan/radv_extensions.py
> >> @@ -122,6 +122,7 @@ EXTENSIONS = [
> >>   Extension('VK_EXT_vertex_attribute_divisor',  3, True),
> >>   Extension('VK_AMD_draw_indirect_count',   1, True),
> >>   Extension('VK_AMD_gcn_shader',1, True),
> >> +Extension('VK_AMD_gpu_shader_half_float', 1, 
> >> 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
> >>   Extension('VK_AMD_rasterization_order',   1, 
> >> 'device->has_out_of_order_rast'),
> >>   Extension('VK_AMD_shader_core_properties',1, True),
> >>   Extension('VK_AMD_shader_info',   1, True),
> >> --
> >> 2.21.0
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] ac: add 16-bit support to ac_build_ddxy()

2019-04-09 Thread Bas Nieuwenhuizen
r-b for both.

On Mon, Apr 8, 2019 at 9:22 AM Samuel Pitoiset
 wrote:
>
> From: Rhys Perry 
>
> Signed-off-by: Rhys Perry 
> ---
>  src/amd/common/ac_llvm_build.c | 22 +-
>  1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 54e90288bda..3e3ca5c7fdb 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1862,9 +1862,16 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>   LLVMValueRef val)
>  {
> unsigned tl_lanes[4], trbl_lanes[4];
> +   char name[32], type[8];
> LLVMValueRef tl, trbl;
> +   LLVMTypeRef result_type;
> LLVMValueRef result;
>
> +   result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
> +
> +   if (result_type == ctx->f16)
> +   val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
> +
> for (unsigned i = 0; i < 4; ++i) {
> tl_lanes[i] = i & mask;
> trbl_lanes[i] = (i & mask) + idx;
> @@ -1877,14 +1884,19 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>  trbl_lanes[0], trbl_lanes[1],
>  trbl_lanes[2], trbl_lanes[3]);
>
> -   tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> -   trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
> +   if (result_type == ctx->f16) {
> +   tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
> +   trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
> +   }
> +
> +   tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
> +   trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
> result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
>
> -   result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
> -   , 1, 0);
> +   ac_build_type_name_for_intr(result_type, type, sizeof(type));
> +   snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
>
> -   return result;
> +   return ac_build_intrinsic(ctx, name, result_type, , 1, 0);
>  }
>
>  void
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir: fix intrinsic names for atomic operations with LLVM 9+

2019-04-08 Thread Bas Nieuwenhuizen
r-b

On Mon, Apr 8, 2019 at 12:36 PM Samuel Pitoiset
 wrote:
>
>
> On 4/8/19 12:32 PM, Erik Faye-Lund wrote:
> > On Mon, 2019-04-08 at 11:39 +0200, Samuel Pitoiset wrote:
> >> This fixes the following LLVM error when using RADV_DEBUG=checkir:
> >> Intrinsic name not mangled correctly for type arguments! Should be:
> >> llvm.amdgcn.buffer.atomic.add.i32
> >> i32 (i32, <4 x i32>, i32, i32, i1)* @llvm.amdgcn.buffer.atomic.add
> >>
> >> The cmpswap operation still uses the old intrinsic.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/common/ac_nir_to_llvm.c | 32 +
> >> ---
> >>   1 file changed, 21 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/src/amd/common/ac_nir_to_llvm.c
> >> b/src/amd/common/ac_nir_to_llvm.c
> >> index 6739551ca26..cc819286c65 100644
> >> --- a/src/amd/common/ac_nir_to_llvm.c
> >> +++ b/src/amd/common/ac_nir_to_llvm.c
> >> @@ -1679,7 +1679,8 @@ static void visit_store_ssbo(struct
> >> ac_nir_context *ctx,
> >>   static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
> >> const nir_intrinsic_instr
> >> *instr)
> >>   {
> >> -const char *name;
> >> +const char *op;
> >> +char name[64];
> >>  LLVMValueRef params[6];
> >>  int arg_count = 0;
> >>
> >> @@ -1696,39 +1697,48 @@ static LLVMValueRef visit_atomic_ssbo(struct
> >> ac_nir_context *ctx,
> >>
> >>  switch (instr->intrinsic) {
> >>  case nir_intrinsic_ssbo_atomic_add:
> >> -name = "llvm.amdgcn.buffer.atomic.add";
> >> +op = "add";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_imin:
> >> -name = "llvm.amdgcn.buffer.atomic.smin";
> >> +op = "smin";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_umin:
> >> -name = "llvm.amdgcn.buffer.atomic.umin";
> >> +op = "umin";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_imax:
> >> -name = "llvm.amdgcn.buffer.atomic.smax";
> >> +op = "smax";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_umax:
> >> -name = "llvm.amdgcn.buffer.atomic.umax";
> >> +op = "umax";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_and:
> >> -name = "llvm.amdgcn.buffer.atomic.and";
> >> +op = "and";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_or:
> >> -name = "llvm.amdgcn.buffer.atomic.or";
> >> +op = "or";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_xor:
> >> -name = "llvm.amdgcn.buffer.atomic.xor";
> >> +op = "xor";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_exchange:
> >> -name = "llvm.amdgcn.buffer.atomic.swap";
> >> +op = "swap";
> >>  break;
> >>  case nir_intrinsic_ssbo_atomic_comp_swap:
> >> -name = "llvm.amdgcn.buffer.atomic.cmpswap";
> >> +op = "cmpswap";
> >>  break;
> >>  default:
> >>  abort();
> >>  }
> >>
> >> +if (HAVE_LLVM >= 0x900 &&
> >> +instr->intrinsic != nir_intrinsic_ssbo_atomic_comp_swap) {
> >> +snprintf(name, sizeof(name),
> >> + "llvm.amdgcn.buffer.atomic.%s.i32", op);
> > The indention here seems off, compared to the else-case... (tabs vs
> > spaces?)
> Will fix before pushing.
> >
> >> +} else {
> >> +snprintf(name, sizeof(name),
> >> + "llvm.amdgcn.buffer.atomic.%s", op);
> >> +}
> >> +
> >>  return ac_build_intrinsic(>ac, name, ctx->ac.i32, params,
> >> arg_count, 0);
> >>   }
> >>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] amd/addrlib: fix uninitialized values for Addr2ComputeDccAddrFromCoord

2019-04-03 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Wed, Apr 3, 2019 at 11:17 PM Marek Olšák  wrote:
>
> From: Marek Olšák 
>
> ---
>  src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp 
> b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
> index b3cbccf..9be775f 100644
> --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
> +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
> @@ -129,20 +129,22 @@ const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, 
> {4, 4, 8}, {4, 4, 4}, {4, 2
>  
> 
>  */
>  Gfx9Lib::Gfx9Lib(const Client* pClient)
>  :
>  Lib(pClient),
>  m_numEquations(0)
>  {
>  m_class = AI_ADDRLIB;
>  memset(_settings, 0, sizeof(m_settings));
>  memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
> +m_metaEqOverrideIndex = 0;
> +memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
>  }
>
>  /**
>  
> 
>  *   Gfx9Lib::~Gfx9Lib
>  *
>  *   @brief
>  *   Destructor
>  
> 
>  */
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] docs: Fix 19.0.x version numbers

2019-04-01 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Mon, Apr 1, 2019 at 5:58 PM Guido Günther  wrote:
>
> The list has 19.0.2 twice.
>
> Signed-off-by: Guido Günther 
> ---
>  docs/release-calendar.html | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/docs/release-calendar.html b/docs/release-calendar.html
> index 0b0bb138b12..35965f68de8 100644
> --- a/docs/release-calendar.html
> +++ b/docs/release-calendar.html
> @@ -64,25 +64,25 @@ if you'd like to nominate a patch in the next stable 
> release.
>  
>  
>  2019-04-23
> -19.0.2
> +19.0.3
>  Dylan Baker
>  
>  
>  
>  2019-05-07
> -19.0.3
> +19.0.4
>  Dylan Baker
>  
>  
>  
>  2019-05-21
> -19.0.4
> +19.0.5
>  Dylan Baker
>  
>  
>  
>  2019-06-04
> -19.0.5
> +19.0.6
>  Dylan Baker
>  Last planned 19.0.x release
>  
> --
> 2.20.1
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] radv: partially enable VK_KHR_shader_float16_int8

2019-04-01 Thread Bas Nieuwenhuizen
hmm, okay, r-b then.

On Mon, Apr 1, 2019 at 5:24 PM Samuel Pitoiset
 wrote:
>
>
> On 4/1/19 5:22 PM, Bas Nieuwenhuizen wrote:
> > On Mon, Apr 1, 2019 at 4:15 PM Samuel Pitoiset
> >  wrote:
> >> Only 8-bit integers for now, float16 requires a bit more work.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/vulkan/radv_device.c  | 8 
> >>   src/amd/vulkan/radv_extensions.py | 1 +
> >>   src/amd/vulkan/radv_shader.c  | 1 +
> >>   3 files changed, 10 insertions(+)
> >>
> >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> >> index d26557fd449..ffec7cfdb19 100644
> >> --- a/src/amd/vulkan/radv_device.c
> >> +++ b/src/amd/vulkan/radv_device.c
> >> @@ -902,6 +902,14 @@ void radv_GetPhysicalDeviceFeatures2(
> >>  features->storagePushConstant8 = enabled;
> >>  break;
> >>  }
> >> +   case 
> >> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
> >> +   VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
> >> +   
> >> (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
> >> +   bool enabled = pdevice->rad_info.chip_class >= VI;
> >> +   features->shaderFloat16 = VK_FALSE;
> >> +   features->shaderInt8 = enabled;
> >> +   break;
> >> +   }
> >>  default:
> >>  break;
> >>  }
> >> diff --git a/src/amd/vulkan/radv_extensions.py 
> >> b/src/amd/vulkan/radv_extensions.py
> >> index 79562ff6e97..4b118b4ba82 100644
> >> --- a/src/amd/vulkan/radv_extensions.py
> >> +++ b/src/amd/vulkan/radv_extensions.py
> >> @@ -82,6 +82,7 @@ EXTENSIONS = [
> >>   Extension('VK_KHR_relaxed_block_layout',  1, True),
> >>   Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
> >>   Extension('VK_KHR_shader_draw_parameters',1, True),
> >> +Extension('VK_KHR_shader_float16_int8',   1, 
> >> 'device->rad_info.chip_class >= VI'),
> > Why only VI+? float16 I understand, but int8 should be universal, no?
>
> Because it's untested, and also because I plan to enable 16bit integer
> for SI/CIK at the same time.
>
> >
> >>   Extension('VK_KHR_storage_buffer_storage_class',  1, True),
> >>   Extension('VK_KHR_surface',  25, 
> >> 'RADV_HAS_SURFACE'),
> >>   Extension('VK_KHR_surface_protected_capabilities',1, 
> >> 'RADV_HAS_SURFACE'),
> >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> >> index f905dfd9e5b..d3d073d1db8 100644
> >> --- a/src/amd/vulkan/radv_shader.c
> >> +++ b/src/amd/vulkan/radv_shader.c
> >> @@ -251,6 +251,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
> >>  .trinary_minmax = true,
> >>  .variable_pointers = true,
> >>  .storage_8bit = true,
> >> +   .int8 = true,
> >>  },
> >>  .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 
> >> 2),
> >>  .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 
> >> 2),
> >> --
> >> 2.21.0
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/6] ac: add 8-bit and 64-bit support to ac_build_bitfield_reverse()

2019-04-01 Thread Bas Nieuwenhuizen
Patches 1-5 are r-b me.

On Mon, Apr 1, 2019 at 4:15 PM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 8d749cc8eb0..54e90288bda 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2585,6 +2585,13 @@ LLVMValueRef ac_build_bitfield_reverse(struct 
> ac_llvm_context *ctx,
> bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
>
> switch (bitsize) {
> +   case 64:
> +   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", 
> ctx->i64,
> +   (LLVMValueRef []) { src0 }, 1,
> +   AC_FUNC_ATTR_READNONE);
> +
> +   result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
> +   break;
> case 32:
> result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", 
> ctx->i32,
> (LLVMValueRef []) { src0 }, 1,
> @@ -2595,6 +2602,13 @@ LLVMValueRef ac_build_bitfield_reverse(struct 
> ac_llvm_context *ctx,
> (LLVMValueRef []) { src0 }, 1,
> AC_FUNC_ATTR_READNONE);
>
> +   result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
> +   break;
> +   case 8:
> +   result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", 
> ctx->i8,
> +   (LLVMValueRef []) { src0 }, 1,
> +   AC_FUNC_ATTR_READNONE);
> +
> result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
> break;
> default:
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] radv: partially enable VK_KHR_shader_float16_int8

2019-04-01 Thread Bas Nieuwenhuizen
On Mon, Apr 1, 2019 at 4:15 PM Samuel Pitoiset
 wrote:
>
> Only 8-bit integers for now, float16 requires a bit more work.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c  | 8 
>  src/amd/vulkan/radv_extensions.py | 1 +
>  src/amd/vulkan/radv_shader.c  | 1 +
>  3 files changed, 10 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index d26557fd449..ffec7cfdb19 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -902,6 +902,14 @@ void radv_GetPhysicalDeviceFeatures2(
> features->storagePushConstant8 = enabled;
> break;
> }
> +   case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
> +   VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
> +   (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
> +   bool enabled = pdevice->rad_info.chip_class >= VI;
> +   features->shaderFloat16 = VK_FALSE;
> +   features->shaderInt8 = enabled;
> +   break;
> +   }
> default:
> break;
> }
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 79562ff6e97..4b118b4ba82 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -82,6 +82,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_relaxed_block_layout',  1, True),
>  Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
>  Extension('VK_KHR_shader_draw_parameters',1, True),
> +Extension('VK_KHR_shader_float16_int8',   1, 
> 'device->rad_info.chip_class >= VI'),

Why only VI+? float16 I understand, but int8 should be universal, no?

>  Extension('VK_KHR_storage_buffer_storage_class',  1, True),
>  Extension('VK_KHR_surface',  25, 
> 'RADV_HAS_SURFACE'),
>  Extension('VK_KHR_surface_protected_capabilities',1, 
> 'RADV_HAS_SURFACE'),
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index f905dfd9e5b..d3d073d1db8 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -251,6 +251,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
> .trinary_minmax = true,
> .variable_pointers = true,
> .storage_8bit = true,
> +   .int8 = true,
> },
> .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
> .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radv: do not lower 16-bit FMA

2019-04-01 Thread Bas Nieuwenhuizen
This seems weird, do you know which tests are affected?

(I suspect what we really want to do is split into fmuladd and fma,
and only lower fmuladd. If that does not work, something else is
definitely going on)

On Mon, Mar 25, 2019 at 4:10 PM Samuel Pitoiset
 wrote:
>
> The lowering needs to be disabled for sufficient precision to pass
> deqp-vk's 16-bit fma test on radv.
>
> Original patch from Rhys.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 63d4147460c..05041b2eae6 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -70,7 +70,6 @@ static const struct nir_shader_compiler_options nir_options 
> = {
> .lower_unpack_unorm_4x8 = true,
> .lower_extract_byte = true,
> .lower_extract_word = true,
> -   .lower_ffma16 = true,
> .lower_ffma32 = true,
> .lower_ffma64 = true,
> .lower_fpow = true,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/5] ac/nir: fix nir_op_b2i16

2019-04-01 Thread Bas Nieuwenhuizen
r-b for the series.

On Tue, Mar 26, 2019 at 11:31 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 2321fed69f3..d74693ddd68 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -344,10 +344,16 @@ static LLVMValueRef emit_b2i(struct ac_llvm_context 
> *ctx,
>  {
> LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, 
> "");
>
> -   if (bitsize == 32)
> +   switch (bitsize) {
> +   case 16:
> +   return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
> +   case 32:
> return result;
> -
> -   return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
> +   case 64:
> +   return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
> +   default:
> +   unreachable("Unsupported bit size.");
> +   }
>  }
>
>  static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not always initialize HTILE in compressed state

2019-03-28 Thread Bas Nieuwenhuizen
r-b

On Thu, Mar 28, 2019 at 4:00 PM Samuel Pitoiset
 wrote:
>
> Especially when performing a transtion from UNDEFINED->GENERAL,
> the driver shouldn't initialize HTILE metadata in compressed
> state because it doesn't decompress when the src layout is
> GENERAL.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110259
> Fixes: 3a2e93147f7 ("radv: always initialize HTILE when the src layout is 
> UNDEFINED")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index bed899d686e..7ee5a5ca7dc 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4478,8 +4478,14 @@ static void radv_handle_depth_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> return;
>
> if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
> -   /* TODO: merge with the clear if applicable */
> -   radv_initialize_htile(cmd_buffer, image, range, 0);
> +   uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> +
> +   if (radv_layout_is_htile_compressed(image, dst_layout,
> +   dst_queue_mask)) {
> +   clear_value = 0;
> +   }
> +
> +   radv_initialize_htile(cmd_buffer, image, range, clear_value);
> } else if (!radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
>radv_layout_is_htile_compressed(image, dst_layout, 
> dst_queue_mask)) {
> uint32_t clear_value = vk_format_is_stencil(image->vk_format) 
> ? 0xf30f : 0xfffc000f;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: skip updating clear/color metadata for conditional rendering

2019-03-28 Thread Bas Nieuwenhuizen
r-b, though technically you may want something older for the fixes tag.

On Thu, Mar 28, 2019 at 12:20 PM Samuel Pitoiset
 wrote:
>
> I don't think we should update metadata when conditional rendering
> is enabled. For some reasons, some CTS breaks only on SI.
>
> This fixes the following CTS on SI:
> dEQP-VK.conditional_rendering.draw_clear.clear.depth.*
>
> Fixes: a777c3d7cb0 ("radv: Use correct image view comparison for fast 
> clears.")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index fdf3024147d..bed899d686e 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1275,7 +1275,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer 
> *cmd_buffer,
> if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
> ++reg_count;
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> @@ -1299,7 +1299,7 @@ radv_set_tc_compat_zrange_metadata(struct 
> radv_cmd_buffer *cmd_buffer,
> uint64_t va = radv_buffer_get_va(image->bo);
> va += image->offset + image->tc_compat_zrange_offset;
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> @@ -1493,7 +1493,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer 
> *cmd_buffer,
>
> assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
>
> -   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
> +   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 
> cmd_buffer->state.predicating));
> radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
> S_370_WR_CONFIRM(1) |
> S_370_ENGINE_SEL(V_370_PFP));
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radv: enable VK_AMD_gpu_shader_int16

2019-03-28 Thread Bas Nieuwenhuizen
r-b

For series

On Fri, Mar 22, 2019, 2:49 PM Samuel Pitoiset 
wrote:

> This extension allows 16-bit support to Frexp/FrexpStruct.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_extensions.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 23106765c2a..e97f320e8a1 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -123,6 +123,7 @@ EXTENSIONS = [
>  Extension('VK_AMD_draw_indirect_count',   1, True),
>  Extension('VK_AMD_gcn_shader',1, True),
>  Extension('VK_AMD_gpu_shader_half_float', 1,
> 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
> +Extension('VK_AMD_gpu_shader_int16',  1,
> 'device->rad_info.chip_class >= VI'),
>  Extension('VK_AMD_rasterization_order',   1,
> 'device->has_out_of_order_rast'),
>  Extension('VK_AMD_shader_core_properties',1, True),
>  Extension('VK_AMD_shader_info',   1, True),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] ac: allow to use vec3 for typed/untyped buffer stores/loads with LLVM 9+

2019-03-28 Thread Bas Nieuwenhuizen
r-b

On Tue, Mar 26, 2019, 10:13 AM Samuel Pitoiset 
wrote:

> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1236757 (0.45 %)
> VGPRS: 866056 -> 867488 (0.17 %)
> Spilled SGPRs: 24201 -> 24169 (-0.13 %)
> Code Size: 46134836 -> 46115944 (-0.04 %) bytes
> Max Waves: 232287 -> 232070 (-0.09 %)
>
> Totals from affected shaders:
> SGPRS: 247624 -> 253208 (2.26 %)
> VGPRS: 214952 -> 216384 (0.67 %)
> Spilled SGPRs: 63 -> 31 (-50.79 %)
> Code Size: 7633772 -> 7614880 (-0.25 %) bytes
> Max Waves: 62065 -> 61848 (-0.35 %)
>
> This changes requires LLVM r356755.
>
> v2: - fix llvm 8
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 33 +++--
>  src/amd/common/ac_llvm_build.h |  1 +
>  2 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c
> b/src/amd/common/ac_llvm_build.c
> index 1123dce2cc8..a816327ce95 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -83,6 +83,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
> ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
> ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
> +   ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
> ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
> ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
> @@ -1150,9 +1151,12 @@ ac_build_llvm8_buffer_store_common(struct
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1334,10 +1338,13 @@ ac_build_llvm8_buffer_load_common(struct
> ac_llvm_context *ctx,
> args[idx++] = voffset ? voffset : ctx->i32_0;
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
> -   const char *type_names[] = {"f32", "v2f32", "v4f32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v3f32,
> ctx->v4f32};
> +   const char *type_names[] = {"f32", "v2f32", "v3f32", "v4f32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1490,10 +1497,13 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context
> *ctx,
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
> -   const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v3i32,
> ctx->v4i32};
> +   const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> @@ -1651,9 +1661,12 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context
> *ctx,
> args[idx++] = soffset ? soffset : ctx->i32_0;
> args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
> args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 :
> 0), 0);
> -   unsigned func = CLAMP(num_channels, 1, 3) - 1;
> +   unsigned func = CLAMP(num_channels, 1, 4) - 1;
>
> -   const char *type_names[] = {"i32", "v2i32", "v4i32"};
> +   if (HAVE_LLVM == 0x800 && func == 2)
> +   func = 3; /* Only LLVM 9+ supports vec3 */
> +
> +   const char *type_names[] = {"i32", "v2i32", "v3i32", "v4i32"};
> const char *indexing_kind = structurized ? "struct" : "raw";
> char name[256];
>
> diff --git a/src/amd/common/ac_llvm_build.h
> b/src/amd/common/ac_llvm_build.h
> index 9151c743bed..d2f8cd5e08b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -71,6 +71,7 @@ struct 

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Bas Nieuwenhuizen
R-b

Though not sure it really helps given code size increase?

On Wed, Mar 27, 2019, 10:13 AM Samuel Pitoiset 
wrote:

> This helps few compute shaders, mostly for F12017.
>
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1231173 (0.00 %)
> VGPRS: 866056 -> 865928 (-0.01 %)
> Spilled SGPRs: 24201 -> 24201 (0.00 %)
> Code Size: 46137040 -> 46144868 (0.02 %) bytes
> Max Waves: 232287 -> 232302 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 24624 -> 24624 (0.00 %)
> VGPRS: 25960 -> 25832 (-0.49 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Code Size: 2922632 -> 2930460 (0.27 %) bytes
> Max Waves: 1216 -> 1231 (1.23 %)
>
> Suggested-by: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 8 
>  src/amd/vulkan/radv_shader.c| 5 -
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index b25cc6a0a84..c46d98e6dd9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_vulkan_resource_reindex:
> result = visit_vulkan_resource_reindex(ctx, instr);
> break;
> +   case nir_intrinsic_load_vulkan_descriptor: {
> +   LLVMValueRef values[2] = {
> +   get_src(ctx, instr->src[0]),
> +   ctx->ac.i32_0,
> +   };
> +   result = ac_build_gather_values(>ac, values, 2);
> +   break;
> +   }
> case nir_intrinsic_store_ssbo:
> visit_store_ssbo(ctx, instr);
> break;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 19a807df199..2751302e8b9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
> }
> }
> const struct spirv_to_nir_options spirv_options = {
> -   .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
> NIR_PASS_V(nir, nir_lower_system_values);
> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
> NIR_PASS_V(nir, nir_lower_frexp);
> +
> +   NIR_PASS_V(nir, nir_lower_explicit_io,
> + nir_var_mem_ubo | nir_var_mem_ssbo,
> + nir_address_format_32bit_index_offset);
> }
>
> /* Vulkan uses the separate-shader linking model */
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: use llvm.amdgcn.fmed3 intrinsic for nir_op_fmed3

2019-03-27 Thread Bas Nieuwenhuizen
r-b

On Mon, Mar 25, 2019 at 1:34 PM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c  | 27 +++
>  src/amd/common/ac_llvm_build.h  |  4 
>  src/amd/common/ac_nir_to_llvm.c | 13 +
>  3 files changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 1123dce2cc8..04d4b377fd1 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2430,6 +2430,33 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, 
> unsigned simm16)
>ctx->voidt, args, 1, 0);
>  }
>
> +LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
> +   LLVMValueRef src1, LLVMValueRef src2,
> +   unsigned bitsize)
> +{
> +   LLVMTypeRef type;
> +   char *intr;
> +
> +   if (bitsize == 16) {
> +   intr = "llvm.amdgcn.fmed3.f16";
> +   type = ctx->f16;
> +   } else if (bitsize == 32) {
> +   intr = "llvm.amdgcn.fmed3.f32";
> +   type = ctx->f32;
> +   } else {
> +   intr = "llvm.amdgcn.fmed3.f64";
> +   type = ctx->f64;
> +   }
> +
> +   LLVMValueRef params[] = {
> +   src0,
> +   src1,
> +   src2,
> +   };
> +   return ac_build_intrinsic(ctx, intr, type, params, 3,
> + AC_FUNC_ATTR_READNONE);
> +}
> +
>  LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
> unsigned bitsize)
>  {
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 9151c743bed..14c1c56522b 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -549,6 +549,10 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, 
> unsigned simm16);
>  LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
>unsigned bitsize);
>
> +LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
> +   LLVMValueRef src1, LLVMValueRef src2,
> +   unsigned bitsize);
> +
>  LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
> unsigned bitsize);
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 675623cbfeb..7fd6437049a 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -,14 +,11 @@ static void visit_alu(struct ac_nir_context *ctx, 
> const nir_alu_instr *instr)
> result = emit_minmax_int(>ac, LLVMIntSGT, result, 
> src[2]);
> break;
> case nir_op_fmed3: {
> -   LLVMValueRef tmp1 = emit_intrin_2f_param(>ac, 
> "llvm.minnum",
> -   ac_to_float_type(>ac, 
> def_type), src[0], src[1]);
> -   LLVMValueRef tmp2 = emit_intrin_2f_param(>ac, 
> "llvm.maxnum",
> -   ac_to_float_type(>ac, 
> def_type), src[0], src[1]);
> -   tmp2 = emit_intrin_2f_param(>ac, "llvm.minnum",
> -   ac_to_float_type(>ac, 
> def_type), tmp2, src[2]);
> -   result = emit_intrin_2f_param(>ac, "llvm.maxnum",
> -   ac_to_float_type(>ac, 
> def_type), tmp1, tmp2);
> +   src[0] = ac_to_float(>ac, src[0]);
> +   src[1] = ac_to_float(>ac, src[1]);
> +   src[2] = ac_to_float(>ac, src[2]);
> +   result = ac_build_fmed3(>ac, src[0], src[1], src[2],
> +   instr->dest.dest.ssa.bit_size);
> break;
> }
> case nir_op_imed3: {
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: fix incorrect argument type for tbuffer.{load, store} with LLVM 7

2019-03-21 Thread Bas Nieuwenhuizen
r-b

On Thu, Mar 21, 2019 at 12:41 PM Samuel Pitoiset
 wrote:
>
> GLC/SLC are boolean.
>
> This fixes the following LLVM error when checkir is set:
> Intrinsic has incorrect argument type!
> void (i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32)* 
> @llvm.amdgcn.tbuffer.store.i32
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 8701d2ca43a..1123dce2cc8 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1537,8 +1537,8 @@ ac_build_tbuffer_load(struct ac_llvm_context *ctx,
> immoffset,
> LLVMConstInt(ctx->i32, dfmt, false),
> LLVMConstInt(ctx->i32, nfmt, false),
> -   LLVMConstInt(ctx->i32, glc, false),
> -   LLVMConstInt(ctx->i32, slc, false),
> +   LLVMConstInt(ctx->i1, glc, false),
> +   LLVMConstInt(ctx->i1, slc, false),
> };
> unsigned func = CLAMP(num_channels, 1, 3) - 1;
> LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
> @@ -1699,8 +1699,8 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx,
> immoffset,
> LLVMConstInt(ctx->i32, dfmt, false),
> LLVMConstInt(ctx->i32, nfmt, false),
> -   LLVMConstInt(ctx->i32, glc, false),
> -   LLVMConstInt(ctx->i32, slc, false),
> +   LLVMConstInt(ctx->i1, glc, false),
> +   LLVMConstInt(ctx->i1, slc, false),
> };
> unsigned func = CLAMP(num_channels, 1, 3) - 1;
> const char *type_names[] = {"i32", "v2i32", "v4i32"};
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: fix 16-bit shifts

2019-03-21 Thread Bas Nieuwenhuizen
On Thu, Mar 21, 2019 at 11:59 AM Samuel Pitoiset
 wrote:
>
> This fixes the following LLVM error when ckeckir is set:
> Type too small for ZExt
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 33 +
>  1 file changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 3681c07c364..925073efb85 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -671,22 +671,31 @@ static void visit_alu(struct ac_nir_context *ctx, const 
> nir_alu_instr *instr)
> result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
> break;
> case nir_op_ishl:
> -   result = LLVMBuildShl(ctx->ac.builder, src[0],
> - LLVMBuildZExt(ctx->ac.builder, src[1],
> -   LLVMTypeOf(src[0]), ""),
> - "");
> +   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[1])) < 
> ac_get_elem_bits(>ac, LLVMTypeOf(src[0])))
> +   src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
> +  LLVMTypeOf(src[0]), "");
> +   else

Maybe make this an "else if" with the reverse check so we don't do
anything in the == case, for all three instructions?

Otherwise, r-b
> +   src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
> +   LLVMTypeOf(src[0]), "");
> +   result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
> break;
> case nir_op_ishr:
> -   result = LLVMBuildAShr(ctx->ac.builder, src[0],
> -  LLVMBuildZExt(ctx->ac.builder, src[1],
> -LLVMTypeOf(src[0]), ""),
> -  "");
> +   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[1])) < 
> ac_get_elem_bits(>ac, LLVMTypeOf(src[0])))
> +   src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
> +  LLVMTypeOf(src[0]), "");
> +   else
> +   src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
> +   LLVMTypeOf(src[0]), "");
> +   result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
> break;
> case nir_op_ushr:
> -   result = LLVMBuildLShr(ctx->ac.builder, src[0],
> -  LLVMBuildZExt(ctx->ac.builder, src[1],
> -LLVMTypeOf(src[0]), ""),
> -  "");
> +   if (ac_get_elem_bits(>ac, LLVMTypeOf(src[1])) < 
> ac_get_elem_bits(>ac, LLVMTypeOf(src[0])))
> +   src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
> +  LLVMTypeOf(src[0]), "");
> +   else
> +   src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
> +   LLVMTypeOf(src[0]), "");
> +   result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
> break;
> case nir_op_ilt32:
> result = emit_int_cmp(>ac, LLVMIntSLT, src[0], src[1]);
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: enable VK_AMD_gpu_shader_half_float

2019-03-21 Thread Bas Nieuwenhuizen
Honestly the zero tests is worrying me. This is a pretty big extension
and I have questions like:

to 16-bit loads + 16-bit ALU actually work together or have we been
silently relying on the fact there is always a ZExt cast after and
that did not care about input size?

On Thu, Mar 21, 2019 at 10:03 AM Samuel Pitoiset
 wrote:
>
> Should be safe to enable as all instructions seem to support 16-bit.
> Unfortunately, there is no CTS test.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_extensions.py | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index 421f8b926ea..23106765c2a 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -122,6 +122,7 @@ EXTENSIONS = [
>  Extension('VK_EXT_vertex_attribute_divisor',  3, True),
>  Extension('VK_AMD_draw_indirect_count',   1, True),
>  Extension('VK_AMD_gcn_shader',1, True),
> +Extension('VK_AMD_gpu_shader_half_float', 1, 
> 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
>  Extension('VK_AMD_rasterization_order',   1, 
> 'device->has_out_of_order_rast'),
>  Extension('VK_AMD_shader_core_properties',1, True),
>  Extension('VK_AMD_shader_info',   1, True),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] ac: add f16_0 and f16_1 constants

2019-03-21 Thread Bas Nieuwenhuizen
r-b for both.

On Thu, Mar 21, 2019 at 10:11 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 2 ++
>  src/amd/common/ac_llvm_build.h | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 2c6fecb91db..608f16a78f8 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -95,6 +95,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
> ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
> ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
> ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
> +   ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
> +   ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
> ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
> ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
> ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index aa6e913c6c6..9151c743bed 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -82,6 +82,8 @@ struct ac_llvm_context {
> LLVMValueRef i32_1;
> LLVMValueRef i64_0;
> LLVMValueRef i64_1;
> +   LLVMValueRef f16_0;
> +   LLVMValueRef f16_1;
> LLVMValueRef f32_0;
> LLVMValueRef f32_1;
> LLVMValueRef f64_0;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: add 16-bit support to fract

2019-03-21 Thread Bas Nieuwenhuizen
r-b

On Thu, Mar 21, 2019 at 9:16 AM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index b5bb399eef1..2c6fecb91db 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2434,7 +2434,10 @@ LLVMValueRef ac_build_fract(struct ac_llvm_context 
> *ctx, LLVMValueRef src0,
> LLVMTypeRef type;
> char *intr;
>
> -   if (bitsize == 32) {
> +   if (bitsize == 16) {
> +   intr = "llvm.amdgcn.fract.f16";
> +   type = ctx->f16;
> +   } else if (bitsize == 32) {
> intr = "llvm.amdgcn.fract.f32";
> type = ctx->f32;
> } else {
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/8] radv: VK_KHR_8bit_storage

2019-03-20 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Tue, Mar 19, 2019 at 9:28 AM Samuel Pitoiset
 wrote:
>
> Hi,
>
> This series implements VK_KHR_8bit_storage for RADV. Original work
> is from Rhys Perry, I did rebase, update some patches and test.
>
> Please review,
> thanks!
>
> Rhys Perry (5):
>   ac/nir: implement 8-bit push constant, ssbo and ubo loads
>   ac/nir: implement 8-bit ssbo stores
>   ac/nir: add 8-bit types to glsl_base_to_llvm_type
>   ac/nir: implement 8-bit conversions
>   radv: enable VK_KHR_8bit_storage
>
> Samuel Pitoiset (3):
>   ac: add various int8 definitions
>   ac: add ac_build_tbuffer_load_byte() helper
>   ac: add ac_build_tbuffer_store_byte() helper
>
>  docs/features.txt |  2 +-
>  src/amd/common/ac_llvm_build.c| 47 +-
>  src/amd/common/ac_llvm_build.h| 19 
>  src/amd/common/ac_nir_to_llvm.c   | 81 ++-
>  src/amd/vulkan/radv_device.c  |  9 
>  src/amd/vulkan/radv_extensions.py |  1 +
>  src/amd/vulkan/radv_shader.c  |  1 +
>  7 files changed, 145 insertions(+), 15 deletions(-)
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/8] ac/nir: implement 8-bit ssbo stores

2019-03-19 Thread Bas Nieuwenhuizen
On Tue, Mar 19, 2019 at 9:28 AM Samuel Pitoiset
 wrote:
>
> From: Rhys Perry 
>
> Signed-off-by: Rhys Perry 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 34c4e2a69fa..f3e8f89ba9b 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1553,7 +1553,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
>
> LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> get_src(ctx, instr->src[1]), true);
> -   LLVMValueRef base_data = ac_to_float(>ac, src_data);
> +   LLVMValueRef base_data = src_data;

Does this work with LLVM 7? (I have vague recollection that the
earlier intrinsics only did floats).

> base_data = ac_trim_vector(>ac, base_data, 
> instr->num_components);
> LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
>
> @@ -1591,7 +1591,12 @@ static void visit_store_ssbo(struct ac_nir_context 
> *ctx,
> offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
>   LLVMConstInt(ctx->ac.i32, start * 
> elem_size_bytes, false), "");
>
> -   if (num_bytes == 2) {
> +   if (num_bytes == 1) {
> +   ac_build_tbuffer_store_byte(>ac, rsrc, data,
> +   offset, ctx->ac.i32_0,
> +   cache_policy & ac_glc,
> +   writeonly_memory);
> +   } else if (num_bytes == 2) {
> ac_build_tbuffer_store_short(>ac, rsrc, data,
>  offset, ctx->ac.i32_0,
>  cache_policy & ac_glc,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac: use llvm.amdgcn.fract intrinsic for nir_op_ffract

2019-03-19 Thread Bas Nieuwenhuizen
r-b

On Tue, Mar 19, 2019 at 11:37 PM Samuel Pitoiset
 wrote:
>
> Noticed with a Doom shader.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1282125 -> 1282133 (0.00 %)
> VGPRS: 908716 -> 908616 (-0.01 %)
> Spilled SGPRs: 24811 -> 24779 (-0.13 %)
> Code Size: 49048176 -> 48936488 (-0.23 %) bytes
> Max Waves: 244232 -> 244226 (-0.00 %)
>
> Totals from affected shaders:
> SGPRS: 229584 -> 229592 (0.00 %)
> VGPRS: 163268 -> 163168 (-0.06 %)
> Spilled SGPRs: 8682 -> 8650 (-0.37 %)
> Code Size: 12819572 -> 12707884 (-0.87 %) bytes
> Max Waves: 24398 -> 24392 (-0.02 %)
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 0cab4d4a9b5..9cf1c6f8792 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -2155,19 +2155,18 @@ LLVMValueRef ac_build_fract(struct ac_llvm_context 
> *ctx, LLVMValueRef src0,
> char *intr;
>
> if (bitsize == 32) {
> -   intr = "llvm.floor.f32";
> +   intr = "llvm.amdgcn.fract.f32";
> type = ctx->f32;
> } else {
> -   intr = "llvm.floor.f64";
> +   intr = "llvm.amdgcn.fract.f64";
> type = ctx->f64;
> }
>
> LLVMValueRef params[] = {
> src0,
> };
> -   LLVMValueRef floor = ac_build_intrinsic(ctx, intr, type, params, 1,
> -   AC_FUNC_ATTR_READNONE);
> -   return LLVMBuildFSub(ctx->builder, src0, floor, "");
> +   return ac_build_intrinsic(ctx, intr, type, params, 1,
> + AC_FUNC_ATTR_READNONE);
>  }
>
>  LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 11/11] ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()

2019-03-19 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

FYI since the new intrinsics don't merge voffset and soffset anymore,
you can remove the tbuffer variants for LLVM8+.

On Wed, Mar 13, 2019 at 5:38 PM Samuel Pitoiset
 wrote:
>
> New buffer intrinsics have a separate soffset parameter.
>
> v3: - use ac_build_raw_tbuffer_store()
> v2: - use the raw version as vindex is not used
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 66 ++
>  1 file changed, 26 insertions(+), 40 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index c86a4f98864..541ad75c877 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1227,59 +1227,45 @@ ac_build_buffer_store_dword(struct ac_llvm_context 
> *ctx,
> if (!swizzle_enable_hint) {
> LLVMValueRef offset = soffset;
>
> -   static const char *types[] = {"f32", "v2f32", "v4f32"};
> -
> if (inst_offset)
> offset = LLVMBuildAdd(ctx->builder, offset,
>   LLVMConstInt(ctx->i32, 
> inst_offset, 0), "");
> -   if (voffset)
> -   offset = LLVMBuildAdd(ctx->builder, offset, voffset, 
> "");
> -
> -   LLVMValueRef args[] = {
> -   ac_to_float(ctx, vdata),
> -   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
> -   ctx->i32_0,
> -   offset,
> -   LLVMConstInt(ctx->i1, glc, 0),
> -   LLVMConstInt(ctx->i1, slc, 0),
> -   };
> -
> -   char name[256];
> -   snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
> -types[CLAMP(num_channels, 1, 3) - 1]);
>
> -   ac_build_intrinsic(ctx, name, ctx->voidt,
> -  args, ARRAY_SIZE(args),
> -  
> ac_get_store_intr_attribs(writeonly_memory));
> +   if (HAVE_LLVM >= 0x800) {
> +   ac_build_llvm8_buffer_store_common(ctx, rsrc,
> +  ac_to_float(ctx, 
> vdata),
> +  ctx->i32_0,
> +  voffset, offset,
> +  num_channels,
> +  glc, slc,
> +  writeonly_memory,
> +  false, false);
> +   } else {
> +   if (voffset)
> +   offset = LLVMBuildAdd(ctx->builder, offset, 
> voffset, "");
> +
> +   ac_build_buffer_store_common(ctx, rsrc,
> +ac_to_float(ctx, vdata),
> +ctx->i32_0, offset,
> +num_channels, glc, slc,
> +writeonly_memory, false);
> +   }
> return;
> }
>
> -   static const unsigned dfmt[] = {
> +   static const unsigned dfmts[] = {
> V_008F0C_BUF_DATA_FORMAT_32,
> V_008F0C_BUF_DATA_FORMAT_32_32,
> V_008F0C_BUF_DATA_FORMAT_32_32_32,
> V_008F0C_BUF_DATA_FORMAT_32_32_32_32
> };
> -   static const char *types[] = {"i32", "v2i32", "v4i32"};
> -   LLVMValueRef args[] = {
> -   vdata,
> -   LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
> -   ctx->i32_0,
> -   voffset ? voffset : ctx->i32_0,
> -   soffset,
> -   LLVMConstInt(ctx->i32, inst_offset, 0),
> -   LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
> -   LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
> -   LLVMConstInt(ctx->i1, glc, 0),
> -   LLVMConstInt(ctx->i1, slc, 0),
> -   };
> -   char name[256];
> -   snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
> -types[CLAMP(num_channels, 1, 3) - 1]);
> +   unsigned dfmt = dfmts[num_channels - 1];
> +   unsigned nfmt = V_008F0C_BUF_

Re: [Mesa-dev] [PATCH v3 06/11] ac/nir: use ac_build_buffer_load() for SSBO load operations

2019-03-19 Thread Bas Nieuwenhuizen
On Wed, Mar 13, 2019 at 5:38 PM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 35 ++---
>  1 file changed, 6 insertions(+), 29 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 7f63b506b93..949a7a74834 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1704,7 +1704,6 @@ static LLVMValueRef visit_load_buffer(struct 
> ac_nir_context *ctx,
> int num_components = instr->num_components;
> enum gl_access_qualifier access = nir_intrinsic_access(instr);
> unsigned cache_policy = get_cache_policy(ctx, access, false, false);
> -   LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : 
> ctx->ac.i1false;
>
> LLVMValueRef offset = get_src(ctx, instr->src[1]);
> LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
> @@ -1734,34 +1733,12 @@ static LLVMValueRef visit_load_buffer(struct 
> ac_nir_context *ctx,
>   immoffset,
>   cache_policy & 
> ac_glc);
> } else {
> -   const char *load_name;
> -   LLVMTypeRef data_type;
> -   switch (load_bytes) {
> -   case 16:
> -   case 12:
> -   load_name = "llvm.amdgcn.buffer.load.v4f32";
> -   data_type = ctx->ac.v4f32;
> -   break;
> -   case 8:
> -   case 6:
> -   load_name = "llvm.amdgcn.buffer.load.v2f32";
> -   data_type = ctx->ac.v2f32;
> -   break;
> -   case 4:
> -   load_name = "llvm.amdgcn.buffer.load.f32";
> -   data_type = ctx->ac.f32;
> -   break;
> -   default:
> -   unreachable("Malformed load buffer.");
> -   }
> -   LLVMValueRef params[] = {
> -   rsrc,
> -   vindex,
> -   LLVMBuildAdd(ctx->ac.builder, offset, 
> immoffset, ""),
> -   glc,
> -   ctx->ac.i1false,
> -   };
> -   ret = ac_build_intrinsic(>ac, load_name, 
> data_type, params, 5, 0);
> +   int num_channels = util_next_power_of_two(load_bytes) 
> / 4;
> +
> +   ret = ac_build_buffer_load(>ac, rsrc, 
> num_channels,
> +  vindex, offset, immoffset, 
> 0,

immoffset is constant right? Why do we use soffset instead of
inst_offset? (Not blocking anything since pre-existing, just curious)

> +  cache_policy & ac_glc, 
> false,
> +  false, false);

Change the slc arg to 0? (since it  is unsigned)

> }
>
> LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, 
> ac_get_type_size(LLVMTypeOf(ret)));
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] radv: remove unnecessary FLUSH_AND_INV_CB when initializing DCC

2019-03-19 Thread Bas Nieuwenhuizen
That it does not use it is exactly why we need to make sure the CB
data is not in the CB cache by flushing it?

On Tue, Mar 19, 2019 at 12:15 PM Samuel Pitoiset
 wrote:
>
> The clear operation (ie. compute) doesn't use the CB caches.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 5bb3b51684e..b6035dfbbc5 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4583,8 +4583,7 @@ static void radv_init_color_image_metadata(struct 
> radv_cmd_buffer *cmd_buffer,
>
> state->flush_bits |= radv_clear_dcc(cmd_buffer, image, value);
>
> -   state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
> -RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
> +   state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
>
> radv_update_fce_metadata(cmd_buffer, image,
>  need_decompress_pass);
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fix the NUM_RECORDS field for vertex bindings on GFX6/GFX7

2019-03-18 Thread Bas Nieuwenhuizen
I think this needs to be modified to get the vulkan out of bounds behavior.

In particular whether a VS input is out of bounds or not can differ
between attributes from the same buffer with the same index, and that
is something not handled here.

I guess we could fix by only using the offset in the shader and
calculating the offset by adding index * stride to it, but that is
extra instructions. Is doing the typed loads still a win.

(Aside, don't we need to do the NUM_RECORDS logic for Vega too?)

On Wed, Mar 13, 2019 at 5:09 PM Samuel Pitoiset
 wrote:
>
> Since the driver now uses typed buffer loads, we don't have to
> account for the format.
>
> This fixes few CTS regressions on SI.
>
> Fixes: a66b186bebf ("radv: use typed buffer loads for vertex input fetches")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c |  3 +--
>  src/amd/vulkan/radv_pipeline.c   | 12 
>  src/amd/vulkan/radv_private.h|  6 --
>  3 files changed, 1 insertion(+), 20 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 06806ed6fce..d14bb1093c5 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1990,7 +1990,6 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
> cmd_buffer->state.pipeline->num_vertex_bindings &&
> radv_get_shader(cmd_buffer->state.pipeline, 
> MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
> -   struct radv_vertex_elements_info *velems = 
> _buffer->state.pipeline->vertex_elements;
> unsigned vb_offset;
> void *vb_ptr;
> uint32_t i = 0;
> @@ -2018,7 +2017,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 
> S_008F04_STRIDE(stride);
> if 
> (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
> -   desc[2] = (buffer->size - offset - 
> velems->format_size[i]) / stride + 1;
> +   desc[2] = (buffer->size - offset) / stride + 
> 1;
> else
> desc[2] = buffer->size - offset;
> desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 7f2f96c540a..793508d15d6 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -3531,18 +3531,6 @@ radv_compute_vertex_input_state(struct radv_pipeline 
> *pipeline,
>  {
> const VkPipelineVertexInputStateCreateInfo *vi_info =
> pCreateInfo->pVertexInputState;
> -   struct radv_vertex_elements_info *velems = >vertex_elements;
> -
> -   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; 
> i++) {
> -   const VkVertexInputAttributeDescription *desc =
> -   _info->pVertexAttributeDescriptions[i];
> -   unsigned loc = desc->location;
> -   const struct vk_format_description *format_desc;
> -
> -   format_desc = vk_format_description(desc->format);
> -
> -   velems->format_size[loc] = format_desc->block.bits / 8;
> -   }
>
> for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) 
> {
> const VkVertexInputBindingDescription *desc =
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 39fa6110fde..5c6258a2952 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1341,10 +1341,6 @@ struct radv_prim_vertex_count {
> uint8_t incr;
>  };
>
> -struct radv_vertex_elements_info {
> -   uint32_t format_size[MAX_VERTEX_ATTRIBS];
> -};
> -
>  struct radv_ia_multi_vgt_param_helpers {
> uint32_t base;
> bool partial_es_wave;
> @@ -1371,8 +1367,6 @@ struct radv_pipeline {
> uint32_t  ctx_cs_hash;
> struct radeon_cmdbuf  ctx_cs;
>
> -   struct radv_vertex_elements_info vertex_elements;
> -
> uint32_t binding_stride[MAX_VBS];
> uint8_t  num_vertex_bindings;
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir_to_llvm: add assert to emit_bcsel()

2019-03-17 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Sun, Mar 17, 2019 at 11:04 AM Timothy Arceri  wrote:
>
> nir to llvm assumes we have already split vectors to scalars via
> nir_lower_alu_to_scalar().
> ---
>  src/amd/common/ac_nir_to_llvm.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 18297ed99b1..0ca3f83a248 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -268,6 +268,8 @@ static LLVMValueRef emit_intrin_3f_param(struct 
> ac_llvm_context *ctx,
>  static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
>LLVMValueRef src0, LLVMValueRef src1, 
> LLVMValueRef src2)
>  {
> +   assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
> +
> LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
>ctx->i32_0, "");
> return LLVMBuildSelect(ctx->builder, v,
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Building error in android-x86 due to recent mesa commit

2019-03-16 Thread Bas Nieuwenhuizen
Should be fixed when

https://gitlab.freedesktop.org/mesa/mesa/merge_requests/456

is merged.

On Sat, Mar 16, 2019 at 10:16 PM Mauro Rossi  wrote:
>
> Hi Marek,
>
> I'm getting the following building error after commit [1]
> but I don't understand why.
>
> Mauro
>
> external/mesa/src/gallium/drivers/radeonsi/si_compute.c:807:8: error:
> initializing 'uint *' (aka 'unsigned int *') with an expression of
> type 'uint const[3]' discards qualifiers
> [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
> uint *last_block = info->last_block;
>   ^
> 1 error generated.
>
>
> [1] 
> https://cgit.freedesktop.org/mesa/mesa/commit/?id=b9e02fe138ef181f02fd739129517fbe70604af6
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: always initialize HTILE when the src layout is UNDEFINED

2019-03-14 Thread Bas Nieuwenhuizen
r-b

On Thu, Mar 14, 2019 at 2:24 PM Samuel Pitoiset
 wrote:
>
> HTILE should always be initialized when transitioning from
> VK_IMAGE_LAYOUT_UNDEFINED to other image layouts. Otherwise,
> if an app does a transition from UNDEFINED to GENERAL, the
> driver doesn't initialize HTILE and it tries to decompress
> the depth surface. For some reasons, this results in VM faults.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107563
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 06806ed6fce..ae8f50d0348 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4477,8 +4477,7 @@ static void radv_handle_depth_image_transition(struct 
> radv_cmd_buffer *cmd_buffe
> if (!radv_image_has_htile(image))
> return;
>
> -   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
> -  radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
> +   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
> /* TODO: merge with the clear if applicable */
> radv_initialize_htile(cmd_buffer, image, range, 0);
> } else if (!radv_layout_is_htile_compressed(image, src_layout, 
> src_queue_mask) &&
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 01/12] ac: do not force enable IDXEN for 16-bit SSBO loads

2019-03-13 Thread Bas Nieuwenhuizen
NAK.  The entire thing about an index being used and possibly still
constant 0 (and hence the index being constant 0 is not a sign to use
the raw intrinsics) is why we now have both structurized and raw
intrinsics. Don't just introduce that mistake again 

On Wed, Mar 13, 2019 at 11:47 AM Samuel Pitoiset
 wrote:
>
> The struct version enables IDXEN, while the raw one disables it.
> When vindex is unused, the raw version is enough.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index bc64f0bb7e3..8960b5ad4ff 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1383,11 +1383,13 @@ ac_build_tbuffer_load_short(struct ac_llvm_context 
> *ctx,
> LLVMValueRef res;
>
> if (HAVE_LLVM >= 0x0800) {
> +   bool structurized = vindex && vindex != ctx->i32_0;
> +
> voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
>
> res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
>   soffset, 1, dfmt, nfmt, glc,
> - false, true, true);
> + false, true, structurized);
> } else {
> const char *name = "llvm.amdgcn.tbuffer.load.i32";
> LLVMTypeRef type = ctx->i32;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] RFC: Workaround for pthread_setaffinity_np() seccomp filtering

2019-03-12 Thread Bas Nieuwenhuizen
On Tue, Mar 12, 2019 at 9:59 AM Marc-André Lureau
 wrote:
>
> Hi
>
> On Fri, Mar 1, 2019 at 12:13 PM Mathias Fröhlich
>  wrote:
> >
> > On Friday, 1 March 2019 12:15:08 CET Eero Tamminen wrote:
> > > Hi,
> > >
> > > On 1.3.2019 11.12, Michel Dänzer wrote:
> > > > On 2019-02-28 8:41 p.m., Marek Olšák wrote:
> > > >>> On Thu, Feb 28, 2019 at 1:37 PM Eero Tamminen 
> > > >>> 
> > >  Why distro versions of Qemu filter sched_setaffinity() syscall?
> > > >>>
> > > >>> (https://bugs.launchpad.net/ubuntu/+source/qemu/+bug/1815889)
> > > >>>
> > > >>> Daniel Berrange (berrange) wrote on 2019-02-27: #19
> > > >>>
> > > >>> "IMHO that mesa change is not valid. It is settings its affinity to
> > > >>> run on all threads which is definitely *NOT* something we want to be
> > > >>> allowed. Management applications want to control which CPUs QEMU runs
> > > >>> on, and as such Mesa should honour the CPU placement that the QEMU
> > > >>> process has.
> > > >>>
> > > >>> This is a great example of why QEMU wants to use seccomp to block
> > > >>> affinity changes to prevent something silently trying to use more CPUs
> > > >>> than are assigned to this QEMU."
> > > >>>
> > > >>
> > > >> Mesa uses thread affinity to optimize memory access performance on some
> > > >> CPUs (see util_pin_thread_to_L3). Other places in Mesa need to restore 
> > > >> the
> > > >> original thread affinity for some child threads. Additionally, if games
> > > >> limit the thread affinity, Mesa needs to restore the full thread 
> > > >> affinity
> > > >> for some of its child threads.
> > > >
> > > > The last part sounds like Mesa clearly overstepping its authority.
> > > >
> > > >
> > > >> In essence, the thread affinity should only be considered a hint for 
> > > >> the
> > > >> kernel for optimal performance. There is no reason to kill the process 
> > > >> if
> > > >> it's disallowed. Just ignore the call or modify the thread mask to 
> > > >> make it
> > > >> legal.
> > > >
> > > > The fundamental issue here is that Mesa is using the thread affinity API
> > > > for something else than it's intended for. If there was an API for what
> > > > Mesa wants (encouraging certain sets of threads to run on topologically
> > > > close cores), there should be no need to block that.
> > >
> > > Why such process needs to be killed instead the request being masked
> > > suitably, is there some program that breaks subtly if affinity request
> > > is masked (and that being worse than the program being killed)?
> >
> > But that is still a situation that could be nicely handled with a
> > EPERM error return. Way better than just kill a process.
> > That 'badly affected' program still can call abort then.
> > But nicely working programs don't get just killed then!!
>
>
> Returning an error seems less secure that prohibiting it completely.
> And it may lead to subtle bugs in rarely tested code paths.
>
> It's legitimate that QEMU and management layers want to prevent
> arbitrary code from changing resource allocation etc.
>
> There are no easy way I can think of for mesa (and other libraries) to
> probe the seccomp filters and associated action.
>
> So we need a way to tell mesa not to call setaffinity() (and other
> syscalls). MESA_NO_THREAD_AFFINITY or MESA_NO_SYSCALLS=setaffinity,...
> seem like a relatively easy way to go.

I strongly believe we should not be going the route of adding another
environment variable.

Primarily because this is adding a big pitfall for users that have
their software not work and have to spend significant efforts to
figure out the environment variable and get things work. (as well as
associated costs of some users not getting that far and filing bugs or
proclaiming on other sites that this thing is buggy for them).

As such I'd strongly appreciate it if people look further than the
immediate crash  and figure out a way to make graceful degradation
happen without user intervention.

Is there really no way to figure out that calling setaffinity is going
to kill the process, and what would be needed to add a method?

>
> thanks
>
>
> --
> Marc-André Lureau
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] radv: use typed buffer loads for vertex input fetches

2019-03-12 Thread Bas Nieuwenhuizen
r-b for the series

On Tue, Feb 26, 2019 at 1:39 PM Samuel Pitoiset
 wrote:
>
> This drastically reduces the number of SGPRs because the driver
> now uses descriptors per vertex binding, instead of per vertex
> attribute format.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1354285 -> 1282109 (-5.33 %)
> VGPRS: 909896 -> 908800 (-0.12 %)
> Spilled SGPRs: 24840 -> 24811 (-0.12 %)
> Code Size: 49221144 -> 48986628 (-0.48 %) bytes
> Max Waves: 243930 -> 244229 (0.12 %)
>
> Totals from affected shaders:
> SGPRS: 390648 -> 318472 (-18.48 %)
> VGPRS: 288432 -> 287336 (-0.38 %)
> Spilled SGPRs: 94 -> 65 (-30.85 %)
> Code Size: 11548412 -> 11313896 (-2.03 %) bytes
> Max Waves: 86460 -> 86759 (0.35 %)
>
> This gives a really tiny boost.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 21 +-
>  src/amd/vulkan/radv_nir_to_llvm.c | 47 +--
>  src/amd/vulkan/radv_pipeline.c| 37 ++--
>  src/amd/vulkan/radv_private.h |  5 +---
>  4 files changed, 57 insertions(+), 53 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index ad0b934ddfc..5ab93d11d68 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1985,13 +1985,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
>  {
> if ((pipeline_is_dirty ||
> (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
> -   cmd_buffer->state.pipeline->vertex_elements.count &&
> +   cmd_buffer->state.pipeline->num_vertex_bindings &&
> radv_get_shader(cmd_buffer->state.pipeline, 
> MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
> struct radv_vertex_elements_info *velems = 
> _buffer->state.pipeline->vertex_elements;
> unsigned vb_offset;
> void *vb_ptr;
> uint32_t i = 0;
> -   uint32_t count = velems->count;
> +   uint32_t count = 
> cmd_buffer->state.pipeline->num_vertex_bindings;
> uint64_t va;
>
> /* allocate some descriptor state for vertex buffers */
> @@ -2002,13 +2002,15 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> for (i = 0; i < count; i++) {
> uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
> uint32_t offset;
> -   int vb = velems->binding[i];
> -   struct radv_buffer *buffer = 
> cmd_buffer->vertex_bindings[vb].buffer;
> -   uint32_t stride = 
> cmd_buffer->state.pipeline->binding_stride[vb];
> +   struct radv_buffer *buffer = 
> cmd_buffer->vertex_bindings[i].buffer;
> +   uint32_t stride = 
> cmd_buffer->state.pipeline->binding_stride[i];
> +
> +   if (!buffer)
> +   continue;
>
> va = radv_buffer_get_va(buffer->bo);
>
> -   offset = cmd_buffer->vertex_bindings[vb].offset + 
> velems->offset[i];
> +   offset = cmd_buffer->vertex_bindings[i].offset;
> va += offset + buffer->offset;
> desc[0] = va;
> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 
> S_008F04_STRIDE(stride);
> @@ -2016,7 +2018,12 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
> *cmd_buffer,
> desc[2] = (buffer->size - offset - 
> velems->format_size[i]) / stride + 1;
> else
> desc[2] = buffer->size - offset;
> -   desc[3] = velems->rsrc_word3[i];
> +   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
> + 
> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
> + 
> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
> }
>
> va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 36f499be212..e6c8f3ecb92 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context 
> *ctx,
>
> LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
>
> +   alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
> +
> if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
> alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, 
> "");
> else
> @@ -2035,7 +2037,7 @@ 

Re: [Mesa-dev] [PATCH] radv: fix pointSizeRange limits

2019-03-11 Thread Bas Nieuwenhuizen
r-b

On Mon, Mar 11, 2019 at 10:23 AM Samuel Pitoiset
 wrote:
>
> The values should match the ones that are emitted.
>
> This fixes new CTS dEQP-VK.rasterization.primitive_size.points.*.
>
> Fixes: f4e499ec791 ("radv: add initial non-conformant radv vulkan driver")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index fc04de21025..83d218fb6bf 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1016,7 +1016,7 @@ void radv_GetPhysicalDeviceProperties(
> .maxCullDistances = 8,
> .maxCombinedClipAndCullDistances  = 8,
> .discreteQueuePriorities  = 2,
> -   .pointSizeRange   = { 0.125, 255.875 
> },
> +   .pointSizeRange   = { 0.0, 8192.0 },
> .lineWidthRange   = { 0.0, 7.9921875 
> },
> .pointSizeGranularity = (1.0 / 8.0),
> .lineWidthGranularity = (1.0 / 128.0),
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fix binding transform feedback buffers

2019-03-11 Thread Bas Nieuwenhuizen
hmm, nothing to disabled them again?

Reviewed-by: Bas Nieuwenhuizen 

On Tue, Mar 5, 2019 at 6:06 PM Samuel Pitoiset
 wrote:
>
> The mask should be accumulated if two calls are used for
> binding two buffers at different indexes. Otherwise, the
> driver only accounts for the last one.
>
> Noticed while glancing at this code.
>
> Cc: 18.3 19.0 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 5b66930d137..b8d8583c1b0 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4987,7 +4987,7 @@ void radv_CmdBindTransformFeedbackBuffersEXT(
> enabled_mask |= 1 << idx;
> }
>
> -   cmd_buffer->state.streamout.enabled_mask = enabled_mask;
> +   cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
>
> cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
>  }
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Revert "radv: execute external subpass barriers after ending subpasses"

2019-03-08 Thread Bas Nieuwenhuizen
oh, also add a Fixes tag please.

On Fri, Mar 8, 2019 at 2:50 PM Bas Nieuwenhuizen
 wrote:
>
> I actually think it is partially right, but lets indeed revert for now
>
> Reviewed-by: Bas Nieuwenhuizen 
>
> On Fri, Mar 8, 2019 at 2:48 PM Samuel Pitoiset
>  wrote:
> >
> > This changes is actually wrong because we have to sync
> > before doing image layout transitions.
> >
> > This fixes rendering issues in Batman, Path of Exile and
> > probably more titles.
> >
> > This reverts commit 76c17cfd8da017ebd19be33ba6cef888957a6758.
> >
> > Cc: 19.0 
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_cmd_buffer.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> > b/src/amd/vulkan/radv_cmd_buffer.c
> > index 5b66930d137..d8aceb8b082 100644
> > --- a/src/amd/vulkan/radv_cmd_buffer.c
> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
> > @@ -4395,10 +4395,10 @@ void radv_CmdEndRenderPass(
> >  {
> > RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> >
> > -   radv_cmd_buffer_end_subpass(cmd_buffer);
> > -
> > radv_subpass_barrier(cmd_buffer, 
> > _buffer->state.pass->end_barrier);
> >
> > +   radv_cmd_buffer_end_subpass(cmd_buffer);
> > +
> > vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
> >
> > cmd_buffer->state.pass = NULL;
> > --
> > 2.21.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Revert "radv: execute external subpass barriers after ending subpasses"

2019-03-08 Thread Bas Nieuwenhuizen
I actually think it is partially right, but lets indeed revert for now

Reviewed-by: Bas Nieuwenhuizen 

On Fri, Mar 8, 2019 at 2:48 PM Samuel Pitoiset
 wrote:
>
> This changes is actually wrong because we have to sync
> before doing image layout transitions.
>
> This fixes rendering issues in Batman, Path of Exile and
> probably more titles.
>
> This reverts commit 76c17cfd8da017ebd19be33ba6cef888957a6758.
>
> Cc: 19.0 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 5b66930d137..d8aceb8b082 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4395,10 +4395,10 @@ void radv_CmdEndRenderPass(
>  {
> RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
>
> -   radv_cmd_buffer_end_subpass(cmd_buffer);
> -
> radv_subpass_barrier(cmd_buffer, 
> _buffer->state.pass->end_barrier);
>
> +   radv_cmd_buffer_end_subpass(cmd_buffer);
> +
> vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
>
> cmd_buffer->state.pass = NULL;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: allocate enough space in cmdbuf when starting a subpass

2019-03-05 Thread Bas Nieuwenhuizen
On Tue, Mar 5, 2019 at 10:42 AM Samuel Pitoiset
 wrote:
>
> This fixes some CTS crashes with:
> dEQP-VK.renderpass2.suballocation.attachment_write_mask.attachment_count_8.start_index_*
>
> Ideally, we should check cmd_buffer->cs->max_dw because there is
> likely enough space (the internal clear draws allocate space), but
> keep that way for consistency.

Isn't this what check_space does?

Reviewed-by: Bas Nieuwenhuizen 
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index ad0b934ddfc..3e652018499 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -3446,7 +3446,7 @@ radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer 
> *cmd_buffer,
> struct radv_subpass *subpass = >pass->subpasses[subpass_id];
>
> MAYBE_UNUSED unsigned cdw_max = 
> radeon_check_space(cmd_buffer->device->ws,
> -  cmd_buffer->cs, 
> 2048);
> +  cmd_buffer->cs, 
> 4096);
>
> radv_subpass_barrier(cmd_buffer, >start_barrier);
>
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac/nir: implement emit_{imul, umul}_2x32_64 opcodes

2019-03-05 Thread Bas Nieuwenhuizen
On Tue, Mar 5, 2019 at 10:30 AM Samuel Pitoiset
 wrote:
>
> Fixes: 58bcebd987b ("spirv: Allow [i/u]mulExtended to use new nir opcode")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 36 +
>  1 file changed, 36 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index af7a95137c2..74ae690e845 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -423,6 +423,32 @@ static LLVMValueRef emit_imul_high(struct 
> ac_llvm_context *ctx,
> return result;
>  }
>
> +static LLVMValueRef emit_umul_2x32_64(struct ac_llvm_context *ctx,
> + LLVMValueRef src0, LLVMValueRef src1)
> +{
> +   LLVMValueRef result[2];
> +
> +   result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
> +   result[1] = emit_umul_high(ctx, src0, src1);
> +
> +   LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);

This tmp assignment is dead?
> +   tmp = ac_build_gather_values(ctx, result, 2);
> +   return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
> +}
> +
> +static LLVMValueRef emit_imul_2x32_64(struct ac_llvm_context *ctx,
> + LLVMValueRef src0, LLVMValueRef src1)
> +{
> +   LLVMValueRef result[2];
> +
> +   result[0] = LLVMBuildMul(ctx->builder, src0, src1, "");
> +   result[1] = emit_imul_high(ctx, src0, src1);

If we do this lowering, why not just set options->lower_mul_2x32_64?

does it result in better code from LLVM if we convert both args to 64
bit and do a 64-bit mul?

> +
> +   LLVMValueRef tmp = LLVMGetUndef(ctx->v2i32);

This tmp assignment is dead?
> +   tmp = ac_build_gather_values(ctx, result, 2);
> +   return LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
> +}
> +
>  static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
>   bool is_signed,
>   const LLVMValueRef srcs[3])
> @@ -977,6 +1003,16 @@ static void visit_alu(struct ac_nir_context *ctx, const 
> nir_alu_instr *instr)
> src[1] = ac_to_integer(>ac, src[1]);
> result = emit_imul_high(>ac, src[0], src[1]);
> break;
> +   case nir_op_umul_2x32_64:
> +   src[0] = ac_to_integer(>ac, src[0]);
> +   src[1] = ac_to_integer(>ac, src[1]);
> +   result = emit_umul_2x32_64(>ac, src[0], src[1]);
> +   break;
> +   case nir_op_imul_2x32_64:
> +   src[0] = ac_to_integer(>ac, src[0]);
> +   src[1] = ac_to_integer(>ac, src[1]);
> +   result = emit_imul_2x32_64(>ac, src[0], src[1]);
> +   break;
> case nir_op_pack_half_2x16:
> result = emit_pack_half_2x16(>ac, src[0]);
> break;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] radv: properly align the fence and EOP bug VA on GFX9

2019-03-05 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Mon, Mar 4, 2019 at 2:22 PM Samuel Pitoiset
 wrote:
>
> If alignement is 0, offets returned by
> radv_cmd_buffer_upload_alloc() are always 0. These two
> virtual addresses were pointing at the same location.
>
> v2: - add an asertion that checks if alignment is power of two
>
> Cc: 18.3 19.0 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index ad0b934ddfc..7cc7b3b9195 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -338,14 +338,15 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer 
> *cmd_buffer)
> unsigned fence_offset, eop_bug_offset;
> void *fence_ptr;
>
> -   radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, _offset,
> +   radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, _offset,
>  _ptr);
> +
> cmd_buffer->gfx9_fence_va =
> radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> cmd_buffer->gfx9_fence_va += fence_offset;
>
> /* Allocate a buffer for the EOP bug on GFX9. */
> -   radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
> +   radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
>  _bug_offset, _ptr);
> cmd_buffer->gfx9_eop_bug_va =
> radv_buffer_get_va(cmd_buffer->upload.upload_bo);
> @@ -416,6 +417,8 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer 
> *cmd_buffer,
>  unsigned *out_offset,
>  void **ptr)
>  {
> +   assert(util_is_power_of_two_nonzero(alignment));
> +
> uint64_t offset = align(cmd_buffer->upload.offset, alignment);
> if (offset + size > cmd_buffer->upload.size) {
> if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] rav: use 32_AR instead of 32_ABGR when alpha coverage is required

2019-03-04 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Mar 1, 2019 at 6:25 PM Samuel Pitoiset
 wrote:
>
> This export format is faster. Seems to improve performance in
> Wreckfest.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_pipeline.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 30c3f60790e..7f2f96c540a 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -529,7 +529,7 @@ radv_pipeline_compute_spi_color_formats(struct 
> radv_pipeline *pipeline,
>  * alpha channel of MRT0 when alpha coverage is enabled 
> because
>  * the depth attachment needs it.
>  */
> -   col_format |= V_028714_SPI_SHADER_32_ABGR;
> +   col_format |= V_028714_SPI_SHADER_32_AR;
> }
>
> /* If the i-th target format is set, all previous target formats must
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir/builder: Don't emit no-op swizzles

2019-02-22 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Sat, Feb 23, 2019 at 1:14 AM Ian Romanick  wrote:
>
> Reviewed-by: Ian Romanick 
>
> On 2/22/19 4:03 PM, Jason Ekstrand wrote:
> > The nir_swizzle helper is used some on it's own but it's also called by
> > nir_channel and nir_channels which are used everywhere.  It's pretty
> > quick to check while we're walking the swizzle anyway whether or not
> > it's an identity swizzle.  If it is, we now don't bother emitting the
> > instruction.  Sure, copy-prop will clean it up for us but there's no
> > sense making more work for the optimizer than we have to.
> > ---
> >  src/compiler/nir/nir_builder.h | 10 +-
> >  1 file changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
> > index c6e80e729a8..253ca5941cb 100644
> > --- a/src/compiler/nir/nir_builder.h
> > +++ b/src/compiler/nir/nir_builder.h
> > @@ -497,8 +497,16 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, 
> > const unsigned *swiz,
> > assert(num_components <= NIR_MAX_VEC_COMPONENTS);
> > nir_alu_src alu_src = { NIR_SRC_INIT };
> > alu_src.src = nir_src_for_ssa(src);
> > -   for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; 
> > i++)
> > +
> > +   bool is_identity_swizzle = true;
> > +   for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; 
> > i++) {
> > +  if (swiz[i] != i)
> > + is_identity_swizzle = false;
> >alu_src.swizzle[i] = swiz[i];
> > +   }
> > +
> > +   if (num_components == src->num_components && is_identity_swizzle)
> > +  return src;
> >
> > return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
> >   nir_imov_alu(build, alu_src, num_components);
> >
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: Sync ETC2 whitelisted devices.

2019-02-19 Thread Bas Nieuwenhuizen
Fixes: 4bb6c49375e "radv: Allow ETC2 on RAVEN and VEGA10 instead of all GFX9."
---
 src/amd/vulkan/radv_device.c  |  3 +--
 src/amd/vulkan/radv_formats.c | 12 +---
 src/amd/vulkan/radv_private.h |  1 +
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 190de86b2cd..53f2c05fc42 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -739,8 +739,7 @@ void radv_GetPhysicalDeviceFeatures(
.alphaToOne   = true,
.multiViewport= true,
.samplerAnisotropy= true,
-   .textureCompressionETC2   = 
pdevice->rad_info.chip_class >= GFX9 ||
-   
pdevice->rad_info.family == CHIP_STONEY,
+   .textureCompressionETC2   = 
radv_device_supports_etc(pdevice),
.textureCompressionASTC_LDR   = false,
.textureCompressionBC = true,
.occlusionQueryPrecise= true,
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index 499d94befeb..0a3ff9ebbd9 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -595,6 +595,14 @@ static bool 
radv_is_filter_minmax_format_supported(VkFormat format)
}
 }
 
+bool
+radv_device_supports_etc(struct radv_physical_device *physical_device)
+{
+   return physical_device->rad_info.family == CHIP_VEGA10 ||
+  physical_device->rad_info.family == CHIP_RAVEN ||
+  physical_device->rad_info.family == CHIP_STONEY;
+}
+
 static void
 radv_physical_device_get_format_properties(struct radv_physical_device 
*physical_device,
   VkFormat format,
@@ -612,9 +620,7 @@ radv_physical_device_get_format_properties(struct 
radv_physical_device *physical
}
 
if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
-   physical_device->rad_info.family != CHIP_VEGA10 &&
-   physical_device->rad_info.family != CHIP_RAVEN &&
-   physical_device->rad_info.family != CHIP_STONEY) {
+   !radv_device_supports_etc(physical_device)) {
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = tiled;
out_properties->bufferFeatures = buffer;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ddabcedc958..27b5a9e77cd 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1465,6 +1465,7 @@ bool radv_format_pack_clear_color(VkFormat format,
 bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
 bool radv_dcc_formats_compatible(VkFormat format1,
  VkFormat format2);
+bool radv_device_supports_etc(struct radv_physical_device *physical_device);
 
 struct radv_fmask_info {
uint64_t offset;
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] ac: use new LLVM 8 intrinsic when loading 16-bit values

2019-02-18 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for both.

On Thu, Feb 14, 2019 at 2:39 PM Samuel Pitoiset
 wrote:
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_llvm_build.c | 41 ++
>  1 file changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 3acf41728ac..867a13622f9 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -1347,20 +1347,33 @@ ac_build_tbuffer_load_short(struct ac_llvm_context 
> *ctx,
> LLVMValueRef immoffset,
> LLVMValueRef glc)
>  {
> -   const char *name = "llvm.amdgcn.tbuffer.load.i32";
> -   LLVMTypeRef type = ctx->i32;
> -   LLVMValueRef params[] = {
> -   rsrc,
> -   vindex,
> -   voffset,
> -   soffset,
> -   immoffset,
> -   LLVMConstInt(ctx->i32, 
> V_008F0C_BUF_DATA_FORMAT_16, false),
> -   LLVMConstInt(ctx->i32, 
> V_008F0C_BUF_NUM_FORMAT_UINT, false),
> -   glc,
> -   ctx->i1false,
> -   };
> -   LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
> +   unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
> +   unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
> +   LLVMValueRef res;
> +
> +   if (HAVE_LLVM >= 0x0800) {
> +   voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
> +
> +   res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
> + soffset, 1, dfmt, nfmt, glc,
> + false, true, true);
> +   } else {
> +   const char *name = "llvm.amdgcn.tbuffer.load.i32";
> +   LLVMTypeRef type = ctx->i32;
> +   LLVMValueRef params[] = {
> +   rsrc,
> +   vindex,
> +   voffset,
> +   soffset,
> +   immoffset,
> +   LLVMConstInt(ctx->i32, dfmt, false),
> +   LLVMConstInt(ctx->i32, nfmt, false),
> +   glc,
> +   ctx->i1false,
> +   };
> +   res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
> +   }
> +
> return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
>  }
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: write the alpha channel of MRT0 when alpha coverage is enabled

2019-02-18 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 15, 2019 at 6:00 PM Samuel Pitoiset
 wrote:
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109597
> Cc: 18.3 19.0 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_pipeline.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 9745a1f2aa7..6b54da2e31b 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -511,6 +511,13 @@ radv_pipeline_compute_spi_color_formats(struct 
> radv_pipeline *pipeline,
>
> if (subpass->color_attachments[i].attachment == 
> VK_ATTACHMENT_UNUSED) {
> cf = V_028714_SPI_SHADER_ZERO;
> +
> +   if (blend->need_src_alpha & (1 << i)) {
> +   /* Write the alpha channel of MRT0 when alpha 
> coverage is
> +* enabled because the depth attachment needs 
> it.
> +*/
> +   col_format |= V_028714_SPI_SHADER_32_ABGR;
> +   }
> } else {
> struct radv_render_pass_attachment *attachment = 
> pass->attachments + subpass->color_attachments[i].attachment;
> bool blend_enable =
> @@ -689,6 +696,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline 
> *pipeline,
>
> if (vkms && vkms->alphaToCoverageEnable) {
> blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
> +   blend.need_src_alpha |= 0x1;
> }
>
> blend.cb_target_mask = 0;
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fix invalid element type when filling vertex input default values

2019-02-15 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 15, 2019 at 3:57 PM Samuel Pitoiset
 wrote:
>
> The elements added into a vector should have the same type as the
> first one, otherwise this hits an assertion in LLVM.
>
> Fixes: 4b3549c0846 ("radv: reduce the number of loaded channels for vertex 
> input fetches")
> reported-by: Philip Rebohle 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index f1fc392292a..28221b2889a 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -2089,8 +2089,10 @@ radv_fixup_vertex_input_fetches(struct 
> radv_shader_context *ctx,
> elemtype = LLVMTypeOf(value);
> }
>
> -   for (unsigned i = num_channels; i < 4; i++)
> +   for (unsigned i = num_channels; i < 4; i++) {
> chan[i] = i == 3 ? one : zero;
> +   chan[i] = ac_to_float(>ac, chan[i]);
> +   }
>
> return ac_build_gather_values(>ac, chan, 4);
>  }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radv: reduce the number of loaded channels for vertex input fetches

2019-02-13 Thread Bas Nieuwenhuizen
On Tue, Feb 12, 2019 at 3:07 PM Samuel Pitoiset
 wrote:
>
> It's unnecessary to load more channels than the vertex attribute
> format. The remaining channels are filled with 0 for y and z,
> and 1 for w.
>
> 29077 shaders in 15096 tests
> Totals:
> SGPRS: 1321605 -> 1318869 (-0.21 %)
> VGPRS: 935236 -> 932252 (-0.32 %)
> Spilled SGPRs: 24860 -> 24776 (-0.34 %)
> Code Size: 49832348 -> 49819464 (-0.03 %) bytes
> Max Waves: 242101 -> 242611 (0.21 %)
>
> Totals from affected shaders:
> SGPRS: 93675 -> 90939 (-2.92 %)
> VGPRS: 58016 -> 55032 (-5.14 %)
> Spilled SGPRs: 172 -> 88 (-48.84 %)
> Code Size: 2862740 -> 2849856 (-0.45 %) bytes
> Max Waves: 15474 -> 15984 (3.30 %)
>
> This mostly helps Croteam games (Talos/Sam2017).
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c | 83 ++-
>  1 file changed, 81 insertions(+), 2 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 7f74678d5f1..b1e0c64e4e1 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -1967,6 +1967,72 @@ adjust_vertex_fetch_alpha(struct radv_shader_context 
> *ctx,
> return alpha;
>  }
>
> +static unsigned
> +get_num_channels_from_data_format(unsigned data_format)
> +{
> +   switch (data_format) {
> +   case V_008F0C_BUF_DATA_FORMAT_8:
> +   case V_008F0C_BUF_DATA_FORMAT_16:
> +   case V_008F0C_BUF_DATA_FORMAT_32:
> +   return 1;
> +   case V_008F0C_BUF_DATA_FORMAT_8_8:
> +   case V_008F0C_BUF_DATA_FORMAT_16_16:
> +   case V_008F0C_BUF_DATA_FORMAT_32_32:
> +   return 2;
> +   case V_008F0C_BUF_DATA_FORMAT_10_11_11:
> +   case V_008F0C_BUF_DATA_FORMAT_11_11_10:
> +   case V_008F0C_BUF_DATA_FORMAT_32_32_32:
> +   return 3;
> +   case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
> +   case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
> +   case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
> +   case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
> +   case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
> +   return 4;
> +   default:
> +   break;
> +   }
> +
> +   return 4;
> +}
> +
> +static LLVMValueRef
> +radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
> +   LLVMValueRef value,
> +   unsigned num_channels,
> +   bool is_float)
> +{
> +   LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
> +   LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
> +   LLVMTypeRef elemtype;
> +   LLVMValueRef chan[4];
> +
> +   if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
> +   unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
> +
> +   if (num_channels == 4 && vec_size == 4)
> +   return value;

Just num_channels == vec_size ?

> +
> +   num_channels = MIN2(num_channels, vec_size);
> +
> +   for (unsigned i = 0; i < num_channels; i++)
> +   chan[i] = ac_llvm_extract_elem(>ac, value, i);
> +
> +   elemtype = LLVMGetElementType(LLVMTypeOf(value));
> +   } else {
> +   if (num_channels) {
> +   assert(num_channels == 1);
> +   chan[0] = value;
> +   }
> +   elemtype = LLVMTypeOf(value);
> +   }
> +
> +   for (unsigned i = num_channels; i < 4; i++)
> +   chan[i] = i == 3 ? one : zero;
> +
> +   return ac_build_gather_values(>ac, chan, 4);
> +}
> +
>  static void
>  handle_vs_input_decl(struct radv_shader_context *ctx,
>  struct nir_variable *variable)
> @@ -1979,7 +2045,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
> unsigned attrib_count = glsl_count_attribute_slots(variable->type, 
> true);
> uint8_t input_usage_mask =
> 
> ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
> -   unsigned num_channels = util_last_bit(input_usage_mask);
> +   unsigned num_input_channels = util_last_bit(input_usage_mask);
>
> variable->data.driver_location = variable->data.location * 4;
>
> @@ -1987,6 +2053,10 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
> for (unsigned i = 0; i < attrib_count; ++i) {
> LLVMValueRef output[4];
> unsigned attrib_index = variable->data.location + i - 
> VERT_ATTRIB_GENERIC0;
> +   unsigned attrib_format = 
> ctx->options->key.vs.vertex_attribute_formats[attrib_index];
> +   unsigned data_format = attrib_format & 0x0f;
> +   unsigned num_format = (attrib_format >> 4) & 0x07;
> +   bool is_float = num_format == V_008F0C_BUF_NUM_FORMAT_FLOAT;
>
> if (ctx->options->key.vs.instance_rate_inputs & (1u << 
> attrib_index)) {
>

Re: [Mesa-dev] [PATCH] radv: always export gl_SampleMask when the fragment shader uses it

2019-02-13 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

I'm assuming the real fix is for the shader to not write the sample
mask if we have 1 sample?

On Tue, Feb 12, 2019 at 6:52 PM Samuel Pitoiset
 wrote:
>
> For some reasons, this breaks trees rendering in Project Cars.
>
> Fixes: 85010585cde ("radv: only enable gl_SampleMask if MSAA is enabled too")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109401
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_pipeline.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index fb6c61cf3f0..a9df2b94b93 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -3183,11 +3183,11 @@ radv_compute_db_shader_control(const struct 
> radv_device *device,
> bool disable_rbplus = device->physical_device->has_rbplus &&
>   !device->physical_device->rbplus_allowed;
>
> -   /* Do not enable the gl_SampleMask fragment shader output if MSAA is
> -* disabled.
> +   /* It shouldn't be needed to export gl_SampleMask when MSAA is 
> disabled
> +* but this appears to break Project Cars (DXVK). See
> +* https://bugs.freedesktop.org/show_bug.cgi?id=109401
>  */
> -   bool mask_export_enable = ms->num_samples > 1 &&
> - ps->info.info.ps.writes_sample_mask;
> +   bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
>
> return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
> 
> S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fix compiler issues with GCC 9

2019-02-12 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Mon, Feb 11, 2019 at 1:16 PM Gustaw Smolarczyk  wrote:
>
> FWIW,
>
> Reviewed-by: Gustaw Smolarczyk 
>
> pon., 11 lut 2019 o 10:15 Samuel Pitoiset 
> napisał(a):
> >
> > "The C standard says that compound literals which occur inside of
> > the body of a function have automatic storage duration associated
> > with the enclosing block. Older GCC releases were putting such
> > compound literals into the scope of the whole function, so their
> > lifetime actually ended at the end of containing function. This
> > has been fixed in GCC 9. Code that relied on this extended lifetime
> > needs to be fixed, move the compound literals to whatever scope
> > they need to accessible in."
> >
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109543
> > Cc: 
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_meta_blit.c | 90 ++---
> >  1 file changed, 48 insertions(+), 42 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_meta_blit.c 
> > b/src/amd/vulkan/radv_meta_blit.c
> > index a2ba7e45022..5af9c4a303f 100644
> > --- a/src/amd/vulkan/radv_meta_blit.c
> > +++ b/src/amd/vulkan/radv_meta_blit.c
> > @@ -849,54 +849,60 @@ build_pipeline(struct radv_device *device,
> > .subpass = 0,
> > };
> >
> > -   switch(aspect) {
> > -   case VK_IMAGE_ASPECT_COLOR_BIT:
> > -   vk_pipeline_info.pColorBlendState = 
> > &(VkPipelineColorBlendStateCreateInfo) {
> > -   .sType = 
> > VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
> > -   .attachmentCount = 1,
> > -   .pAttachments = 
> > (VkPipelineColorBlendAttachmentState []) {
> > -   { .colorWriteMask =
> > -   VK_COLOR_COMPONENT_A_BIT |
> > -   VK_COLOR_COMPONENT_R_BIT |
> > -   VK_COLOR_COMPONENT_G_BIT |
> > -   VK_COLOR_COMPONENT_B_BIT },
> > +   VkPipelineColorBlendStateCreateInfo color_blend_info = {
> > +   .sType = 
> > VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
> > +   .attachmentCount = 1,
> > +   .pAttachments = (VkPipelineColorBlendAttachmentState []) {
> > +   {
> > +   .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
> > + VK_COLOR_COMPONENT_R_BIT |
> > + VK_COLOR_COMPONENT_G_BIT |
> > + VK_COLOR_COMPONENT_B_BIT 
> > },
> > }
> > };
> > +
> > +   VkPipelineDepthStencilStateCreateInfo depth_info = {
> > +   .sType = 
> > VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
> > +   .depthTestEnable = true,
> > +   .depthWriteEnable = true,
> > +   .depthCompareOp = VK_COMPARE_OP_ALWAYS,
> > +   };
> > +
> > +   VkPipelineDepthStencilStateCreateInfo stencil_info = {
> > +   .sType = 
> > VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
> > +   .depthTestEnable = false,
> > +   .depthWriteEnable = false,
> > +   .stencilTestEnable = true,
> > +   .front = {
> > +   .failOp = VK_STENCIL_OP_REPLACE,
> > +   .passOp = VK_STENCIL_OP_REPLACE,
> > +   .depthFailOp = VK_STENCIL_OP_REPLACE,
> > +   .compareOp = VK_COMPARE_OP_ALWAYS,
> > +   .compareMask = 0xff,
> > +   .writeMask = 0xff,
> > +   .reference = 0
> > +   },
> > +   .back = {
> > +   .failOp = VK_STENCIL_OP_REPLACE,
> > +   .passOp = VK_STENCIL_OP_REPLACE,
> > +   .depthFailOp = VK_STENCIL_OP_REPLACE,
> > +   .compareOp = VK_COMPARE_OP_ALWAYS,
> > +   .compareMask = 0xff,
> > +   .writeMask = 0xff,
> > +   .reference = 0
> > +   },
> > +   .depthCompareOp = VK_COMPARE_OP_ALWAYS,
> > +   };
> > +
> > +   switch(

Re: [Mesa-dev] [PATCH] radv/llvm: initialise passes static member.

2019-02-08 Thread Bas Nieuwenhuizen
The variable is not static? (initializing a static member in the
constructor would be nonsense ...)

With that word remove from the title:

Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 8, 2019 at 6:26 AM Dave Airlie  wrote:
>
> From: Dave Airlie 
>
> Fixes coverity warning
> ---
>  src/amd/vulkan/radv_llvm_helper.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_llvm_helper.cpp 
> b/src/amd/vulkan/radv_llvm_helper.cpp
> index f651593ca62..d1e1e376f90 100644
> --- a/src/amd/vulkan/radv_llvm_helper.cpp
> +++ b/src/amd/vulkan/radv_llvm_helper.cpp
> @@ -29,7 +29,7 @@ class radv_llvm_per_thread_info {
>  public:
> radv_llvm_per_thread_info(enum radeon_family arg_family,
> enum ac_target_machine_options arg_tm_options)
> -   : family(arg_family), tm_options(arg_tm_options) {}
> +   : family(arg_family), tm_options(arg_tm_options), 
> passes(NULL) {}
>
> ~radv_llvm_per_thread_info()
> {
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 4/4] radv: add support for push constants inlining when possible

2019-02-05 Thread Bas Nieuwenhuizen
On Tue, Feb 5, 2019 at 11:07 AM Samuel Pitoiset
 wrote:
>
>
> On 2/5/19 10:58 AM, Bas Nieuwenhuizen wrote:
> > On Fri, Jan 25, 2019 at 5:27 PM Samuel Pitoiset
> >  wrote:
> >> This removes some scalar loads from shaders, but it increases
> >> the number of SET_SH_REG packets. This is currently basic but
> >> it could be improved if needed. Inlining dynamic offsets might
> >> also help.
> >>
> >> Original idea from Dave Airlie.
> >>
> >> 29164 shaders in 15096 tests
> >> Totals:
> >> SGPRS: 1336072 -> 1365241 (2.18 %)
> >> VGPRS: 937784 -> 934592 (-0.34 %)
> >> Spilled SGPRs: 24751 -> 24796 (0.18 %)
> >> Code Size: 50001672 -> 49815524 (-0.37 %) bytes
> >> Max Waves: 208755 -> 208830 (0.04 %)
> >>
> >> Totals from affected shaders:
> >> SGPRS: 295018 -> 324187 (9.89 %)
> >> VGPRS: 243108 -> 239916 (-1.31 %)
> >> Spilled SGPRs: 1464 -> 1509 (3.07 %)
> >> Code Size: 8028188 -> 7842040 (-2.32 %) bytes
> >> Max Waves: 69580 -> 69655 (0.11 %)
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/common/ac_nir_to_llvm.c   | 23 +++--
> >>   src/amd/common/ac_shader_abi.h|  4 ++
> >>   src/amd/vulkan/radv_cmd_buffer.c  | 78 ++-
> >>   src/amd/vulkan/radv_nir_to_llvm.c | 54 +
> >>   src/amd/vulkan/radv_shader.h  | 10 ++--
> >>   src/amd/vulkan/radv_shader_info.c |  4 ++
> >>   6 files changed, 145 insertions(+), 28 deletions(-)
> >>
> >> diff --git a/src/amd/common/ac_nir_to_llvm.c 
> >> b/src/amd/common/ac_nir_to_llvm.c
> >> index f509fc31dff..db1574b5b35 100644
> >> --- a/src/amd/common/ac_nir_to_llvm.c
> >> +++ b/src/amd/common/ac_nir_to_llvm.c
> >> @@ -1392,10 +1392,27 @@ static LLVMValueRef 
> >> visit_load_push_constant(struct ac_nir_context *ctx,
> >>nir_intrinsic_instr *instr)
> >>   {
> >>  LLVMValueRef ptr, addr;
> >> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> >> +   unsigned index = nir_intrinsic_base(instr);
> >>
> >> -   addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
> >> -   addr = LLVMBuildAdd(ctx->ac.builder, addr,
> >> -   get_src(ctx, instr->src[0]), "");
> >> +   addr = LLVMConstInt(ctx->ac.i32, index, 0);
> >> +   addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
> >> +
> >> +   /* Load constant values from user SGPRS when possible, otherwise
> >> +* fallback to the default path that loads directly from memory.
> >> +*/
> >> +   if (LLVMIsConstant(src0) &&
> >> +   index == 0 &&
> >> +   instr->dest.ssa.bit_size == 32) {
> >> +   unsigned offset = LLVMConstIntGetZExtValue(src0) / 4;
> >> +   unsigned count = instr->dest.ssa.num_components;
> >> +
> >> +   if (offset + count <= ctx->abi->num_inline_push_consts) {
> >> +   return ac_build_gather_values(>ac,
> >> + 
> >> ctx->abi->inline_push_consts + offset,
> >> + count);
> >> +   }
> >> +   }
> >>
> >>  ptr = ac_build_gep0(>ac, ctx->abi->push_constants, addr);
> >>
> >> diff --git a/src/amd/common/ac_shader_abi.h 
> >> b/src/amd/common/ac_shader_abi.h
> >> index ee18e6c1923..704c3d107c2 100644
> >> --- a/src/amd/common/ac_shader_abi.h
> >> +++ b/src/amd/common/ac_shader_abi.h
> >> @@ -32,6 +32,8 @@ struct nir_variable;
> >>
> >>   #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
> >>
> >> +#define AC_MAX_INLINE_PUSH_CONSTS 8
> >> +
> >>   enum ac_descriptor_type {
> >>  AC_DESC_IMAGE,
> >>  AC_DESC_FMASK,
> >> @@ -66,6 +68,8 @@ struct ac_shader_abi {
> >>
> >>  /* Vulkan only */
> >>  LLVMValueRef push_constants;
> >> +   LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
> >> +   unsigned num_inline_push_consts;
> >>  LLVMValueRef view_index;
> >>
> >>  LLVMValueRef outputs[AC_LLVM_MAX_OUT

Re: [Mesa-dev] [RFC PATCH 4/4] radv: add support for push constants inlining when possible

2019-02-05 Thread Bas Nieuwenhuizen
On Fri, Jan 25, 2019 at 5:27 PM Samuel Pitoiset
 wrote:
>
> This removes some scalar loads from shaders, but it increases
> the number of SET_SH_REG packets. This is currently basic but
> it could be improved if needed. Inlining dynamic offsets might
> also help.
>
> Original idea from Dave Airlie.
>
> 29164 shaders in 15096 tests
> Totals:
> SGPRS: 1336072 -> 1365241 (2.18 %)
> VGPRS: 937784 -> 934592 (-0.34 %)
> Spilled SGPRs: 24751 -> 24796 (0.18 %)
> Code Size: 50001672 -> 49815524 (-0.37 %) bytes
> Max Waves: 208755 -> 208830 (0.04 %)
>
> Totals from affected shaders:
> SGPRS: 295018 -> 324187 (9.89 %)
> VGPRS: 243108 -> 239916 (-1.31 %)
> Spilled SGPRs: 1464 -> 1509 (3.07 %)
> Code Size: 8028188 -> 7842040 (-2.32 %) bytes
> Max Waves: 69580 -> 69655 (0.11 %)
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c   | 23 +++--
>  src/amd/common/ac_shader_abi.h|  4 ++
>  src/amd/vulkan/radv_cmd_buffer.c  | 78 ++-
>  src/amd/vulkan/radv_nir_to_llvm.c | 54 +
>  src/amd/vulkan/radv_shader.h  | 10 ++--
>  src/amd/vulkan/radv_shader_info.c |  4 ++
>  6 files changed, 145 insertions(+), 28 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index f509fc31dff..db1574b5b35 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1392,10 +1392,27 @@ static LLVMValueRef visit_load_push_constant(struct 
> ac_nir_context *ctx,
>   nir_intrinsic_instr *instr)
>  {
> LLVMValueRef ptr, addr;
> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> +   unsigned index = nir_intrinsic_base(instr);
>
> -   addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
> -   addr = LLVMBuildAdd(ctx->ac.builder, addr,
> -   get_src(ctx, instr->src[0]), "");
> +   addr = LLVMConstInt(ctx->ac.i32, index, 0);
> +   addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
> +
> +   /* Load constant values from user SGPRS when possible, otherwise
> +* fallback to the default path that loads directly from memory.
> +*/
> +   if (LLVMIsConstant(src0) &&
> +   index == 0 &&
> +   instr->dest.ssa.bit_size == 32) {
> +   unsigned offset = LLVMConstIntGetZExtValue(src0) / 4;
> +   unsigned count = instr->dest.ssa.num_components;
> +
> +   if (offset + count <= ctx->abi->num_inline_push_consts) {
> +   return ac_build_gather_values(>ac,
> + 
> ctx->abi->inline_push_consts + offset,
> + count);
> +   }
> +   }
>
> ptr = ac_build_gep0(>ac, ctx->abi->push_constants, addr);
>
> diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> index ee18e6c1923..704c3d107c2 100644
> --- a/src/amd/common/ac_shader_abi.h
> +++ b/src/amd/common/ac_shader_abi.h
> @@ -32,6 +32,8 @@ struct nir_variable;
>
>  #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
>
> +#define AC_MAX_INLINE_PUSH_CONSTS 8
> +
>  enum ac_descriptor_type {
> AC_DESC_IMAGE,
> AC_DESC_FMASK,
> @@ -66,6 +68,8 @@ struct ac_shader_abi {
>
> /* Vulkan only */
> LLVMValueRef push_constants;
> +   LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
> +   unsigned num_inline_push_consts;
> LLVMValueRef view_index;
>
> LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index aae90290841..f80e2078da0 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -627,6 +627,23 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
> *cmd_buffer,
> }
>  }
>
> +static void
> +radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
> +struct radv_pipeline *pipeline,
> +gl_shader_stage stage,
> +int idx, int count, uint32_t *values)
> +{
> +   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, 
> stage, idx);
> +   uint32_t base_reg = pipeline->user_data_0[stage];
> +   if (loc->sgpr_idx == -1)
> +   return;
> +
> +   assert(loc->num_sgprs == count);
> +
> +   radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 
> count);
> +   radeon_emit_array(cmd_buffer->cs, values, count);
> +}
> +
>  static void
>  radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
>   struct radv_pipeline *pipeline)
> @@ -1900,6 +1917,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
> radv_get_descriptors_state(cmd_buffer, bind_point);
> struct radv_pipeline_layout *layout = pipeline->layout;
> 

Re: [Mesa-dev] [PATCH 19/19] radv: don't flush src stages when dstStageMask == BOTTOM_OF_PIPE

2019-02-01 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series except 09 where I had comments.

On Tue, Jan 29, 2019 at 10:17 PM Samuel Pitoiset
 wrote:
>
> Original patch by Fredrik Höglund.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 16 +++-
>  src/amd/vulkan/radv_pass.c   |  6 --
>  2 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 93982c1232e..3b215b4b103 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4646,6 +4646,7 @@ struct radv_barrier_info {
> uint32_t eventCount;
> const VkEvent *pEvents;
> VkPipelineStageFlags srcStageMask;
> +   VkPipelineStageFlags dstStageMask;
>  };
>
>  static void
> @@ -4697,7 +4698,19 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer,
> image);
> }
>
> -   radv_stage_flush(cmd_buffer, info->srcStageMask);
> +   /* The Vulkan spec 1.1.98 says:
> +*
> +* "An execution dependency with only
> +*  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
> +*  will only prevent that stage from executing in subsequently
> +*  submitted commands. As this stage does not perform any actual
> +*  execution, this is not observable - in effect, it does not delay
> +*  processing of subsequent commands. Similarly an execution 
> dependency
> +*  with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage 
> mask
> +*  will effectively not wait for any prior commands to complete."
> +*/
> +   if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   radv_stage_flush(cmd_buffer, info->srcStageMask);
> cmd_buffer->state.flush_bits |= src_flush_bits;
>
> for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
> @@ -4738,6 +4751,7 @@ void radv_CmdPipelineBarrier(
> info.eventCount = 0;
> info.pEvents = NULL;
> info.srcStageMask = srcStageMask;
> +   info.dstStageMask = destStageMask;
>
> radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
>  bufferMemoryBarrierCount, pBufferMemoryBarriers,
> diff --git a/src/amd/vulkan/radv_pass.c b/src/amd/vulkan/radv_pass.c
> index 08ea2454750..7a724dc2da5 100644
> --- a/src/amd/vulkan/radv_pass.c
> +++ b/src/amd/vulkan/radv_pass.c
> @@ -47,11 +47,13 @@ radv_render_pass_add_subpass_dep(struct radv_render_pass 
> *pass,
> dst = 0;
>
> if (dst == VK_SUBPASS_EXTERNAL) {
> -   pass->end_barrier.src_stage_mask |= dep->srcStageMask;
> +   if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   pass->end_barrier.src_stage_mask |= dep->srcStageMask;
> pass->end_barrier.src_access_mask |= dep->srcAccessMask;
> pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
> } else {
> -   pass->subpasses[dst].start_barrier.src_stage_mask |= 
> dep->srcStageMask;
> +   if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   pass->subpasses[dst].start_barrier.src_stage_mask |= 
> dep->srcStageMask;
> pass->subpasses[dst].start_barrier.src_access_mask |= 
> dep->srcAccessMask;
> pass->subpasses[dst].start_barrier.dst_access_mask |= 
> dep->dstAccessMask;
> }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: take LDS into account for compute shader occupancy stats

2019-02-01 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 1, 2019 at 12:07 PM Timothy Arceri  wrote:
>
> Ported from d205faeb6c96.
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c |  6 +++---
>  src/amd/vulkan/radv_private.h |  3 +++
>  src/amd/vulkan/radv_shader.c  | 10 --
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index e80938527e5..d90a4c0de1e 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3372,9 +3372,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
> }
>  }
>
> -static unsigned
> -ac_nir_get_max_workgroup_size(enum chip_class chip_class,
> - const struct nir_shader *nir)
> +unsigned
> +radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +   const struct nir_shader *nir)
>  {
> switch (nir->info.stage) {
> case MESA_SHADER_TESS_CTRL:
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 85c18906f84..e5b8286ea62 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1934,6 +1934,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler 
> *ac_llvm,
>  int nir_count,
>  const struct radv_nir_compiler_options *options);
>
> +unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +const struct nir_shader *nir);
> +
>  /* radv_shader_info.h */
>  struct radv_shader_info;
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 07450ff236b..a7fce02ee83 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -744,7 +744,8 @@ generate_shader_stats(struct radv_device *device,
>   gl_shader_stage stage,
>   struct _mesa_string_buffer *buf)
>  {
> -   unsigned lds_increment = device->physical_device->rad_info.chip_class 
> >= CIK ? 512 : 256;
> +   enum chip_class chip_class = 
> device->physical_device->rad_info.chip_class;
> +   unsigned lds_increment = chip_class >= CIK ? 512 : 256;
> struct ac_shader_config *conf;
> unsigned max_simd_waves;
> unsigned lds_per_wave = 0;
> @@ -757,12 +758,17 @@ generate_shader_stats(struct radv_device *device,
> lds_per_wave = conf->lds_size * lds_increment +
>align(variant->info.fs.num_interp * 48,
>  lds_increment);
> +   } else if (stage == MESA_SHADER_COMPUTE) {
> +   unsigned max_workgroup_size =
> +   ac_nir_get_max_workgroup_size(chip_class, 
> variant->nir);
> +   lds_per_wave = (conf->lds_size * lds_increment) /
> +  DIV_ROUND_UP(max_workgroup_size, 64);
> }
>
> if (conf->num_sgprs)
> max_simd_waves =
> MIN2(max_simd_waves,
> -
> ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class) / 
> conf->num_sgprs);
> +ac_get_num_physical_sgprs(chip_class) / 
> conf->num_sgprs);
>
> if (conf->num_vgprs)
> max_simd_waves =
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/19] radv: use the new attachments array in CmdEndRenderPass()

2019-02-01 Thread Bas Nieuwenhuizen
On Fri, Feb 1, 2019 at 8:52 AM Samuel Pitoiset
 wrote:
>
>
> On 1/31/19 11:16 AM, Bas Nieuwenhuizen wrote:
> > On Tue, Jan 29, 2019 at 10:16 PM Samuel Pitoiset
> >  wrote:
> >> That shouldn't change anything as we check if the last
> >> subpass id is the final subpass.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/vulkan/radv_cmd_buffer.c | 16 +---
> >>   1 file changed, 13 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> >> b/src/amd/vulkan/radv_cmd_buffer.c
> >> index 914ff0055d9..b0b453d76e9 100644
> >> --- a/src/amd/vulkan/radv_cmd_buffer.c
> >> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> >> @@ -4333,15 +4333,25 @@ void radv_CmdEndRenderPass(
> >>  VkCommandBuffer commandBuffer)
> >>   {
> >>  RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> >> +   struct radv_cmd_state *state = _buffer->state;
> >> +   const struct radv_subpass *subpass = state->subpass;
> >> +   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
> >>
> >>  radv_subpass_barrier(cmd_buffer, 
> >> _buffer->state.pass->end_barrier);
> >>
> >>  radv_cmd_buffer_resolve_subpass(cmd_buffer);
> >>
> >> -   for (unsigned i = 0; i < 
> >> cmd_buffer->state.framebuffer->attachment_count; ++i) {
> >> -   VkImageLayout layout = 
> >> cmd_buffer->state.pass->attachments[i].final_layout;
> >> +   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
> >> +   const uint32_t a = subpass->attachments[i].attachment;
> >> +   if (a == VK_ATTACHMENT_UNUSED)
> >> +   continue;
> >> +
> >> +   if (state->pass->attachments[a].last_subpass_idx != 
> >> subpass_id)
> >> +   continue;
> > I don't think we can add this without adding logic that if the
> > attachment is not in the final subpass, we transition it to the final
> > layout earlier?
>
> I don't see the problem. If the attachment isn't in the final subpass,
> the transition should happen in the previous subpass?

It should if we did the right thing, but we did not implement doing
the final transition before the final subpass yet right?

>
> >
> >> +
> >> +   VkImageLayout layout = 
> >> state->pass->attachments[a].final_layout;
> >>  radv_handle_subpass_image_transition(cmd_buffer,
> >> - (struct radv_subpass_attachment){i, 
> >> layout});
> >> + (struct radv_subpass_attachment){a, 
> >> layout});
> >>  }
> >>
> >>  vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
> >> --
> >> 2.20.1
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/19] radv: use the new attachments array in CmdEndRenderPass()

2019-01-31 Thread Bas Nieuwenhuizen
On Tue, Jan 29, 2019 at 10:16 PM Samuel Pitoiset
 wrote:
>
> That shouldn't change anything as we check if the last
> subpass id is the final subpass.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 16 +---
>  1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 914ff0055d9..b0b453d76e9 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4333,15 +4333,25 @@ void radv_CmdEndRenderPass(
> VkCommandBuffer commandBuffer)
>  {
> RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> +   struct radv_cmd_state *state = _buffer->state;
> +   const struct radv_subpass *subpass = state->subpass;
> +   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
>
> radv_subpass_barrier(cmd_buffer, 
> _buffer->state.pass->end_barrier);
>
> radv_cmd_buffer_resolve_subpass(cmd_buffer);
>
> -   for (unsigned i = 0; i < 
> cmd_buffer->state.framebuffer->attachment_count; ++i) {
> -   VkImageLayout layout = 
> cmd_buffer->state.pass->attachments[i].final_layout;
> +   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
> +   const uint32_t a = subpass->attachments[i].attachment;
> +   if (a == VK_ATTACHMENT_UNUSED)
> +   continue;
> +
> +   if (state->pass->attachments[a].last_subpass_idx != 
> subpass_id)
> +   continue;

I don't think we can add this without adding logic that if the
attachment is not in the final subpass, we transition it to the final
layout earlier?

> +
> +   VkImageLayout layout = 
> state->pass->attachments[a].final_layout;
> radv_handle_subpass_image_transition(cmd_buffer,
> - (struct radv_subpass_attachment){i, 
> layout});
> + (struct radv_subpass_attachment){a, 
> layout});
> }
>
> vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 1/4] radv: gather more info about push constants

2019-01-28 Thread Bas Nieuwenhuizen
On Fri, Jan 25, 2019 at 5:27 PM Samuel Pitoiset
 wrote:
>
> This is needed in order to inline some push constants when possible.
> This also adds a new helper for initializing the pass.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c |  2 ++
>  src/amd/vulkan/radv_private.h |  2 ++
>  src/amd/vulkan/radv_shader.h  |  4 
>  src/amd/vulkan/radv_shader_info.c | 32 ++-
>  4 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index 5048d9d2493..b655e2c2e2c 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3439,6 +3439,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct 
> ac_llvm_compiler *ac_llvm,
>
> memset(shader_info, 0, sizeof(*shader_info));
>
> +   radv_nir_shader_info_init(_info->info);
> +
> for(int i = 0; i < shader_count; ++i)
> radv_nir_shader_info_pass(shaders[i], options, 
> _info->info);
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 85c18906f84..4c76521a045 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1941,6 +1941,8 @@ void radv_nir_shader_info_pass(const struct nir_shader 
> *nir,
>const struct radv_nir_compiler_options 
> *options,
>struct radv_shader_info *info);
>
> +void radv_nir_shader_info_init(struct radv_shader_info *info);
> +
>  struct radeon_winsys_sem;
>
>  #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)\
> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
> index 3652a811e80..0f049f9a528 100644
> --- a/src/amd/vulkan/radv_shader.h
> +++ b/src/amd/vulkan/radv_shader.h
> @@ -162,6 +162,10 @@ struct radv_streamout_info {
>
>  struct radv_shader_info {
> bool loads_push_constants;
> +   uint8_t min_push_constant_used;
> +   uint8_t max_push_constant_used;

can be 0-256 + an "invalid" value, so need to be uint16_t?

> +   bool has_32bit_push_constants;
> +   bool has_indirect_push_constants;
> uint32_t desc_set_used_mask;
> bool needs_multiview_view_index;
> bool uses_invocation_id;
> diff --git a/src/amd/vulkan/radv_shader_info.c 
> b/src/amd/vulkan/radv_shader_info.c
> index 7e5a3789af2..c9cd5fddc53 100644
> --- a/src/amd/vulkan/radv_shader_info.c
> +++ b/src/amd/vulkan/radv_shader_info.c
> @@ -190,6 +190,30 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
> }
>  }
>
> +static void
> +gather_push_constant_info(const nir_shader *nir,
> + const nir_intrinsic_instr *instr,
> + struct radv_shader_info *info)
> +{
> +   nir_const_value *cval = nir_src_as_const_value(instr->src[0]);
> +
> +   if (!cval)
> +   info->has_indirect_push_constants = true;
> +
> +   if (instr->dest.ssa.bit_size == 32)
> +   info->has_32bit_push_constants = true;
> +
> +   int base = nir_intrinsic_base(instr);
> +   int range = nir_intrinsic_range(instr);
> +
> +   if (base + range > info->max_push_constant_used)
> +   info->max_push_constant_used = base + range;
> +   if (base < info->min_push_constant_used)
> +   info->min_push_constant_used = base;

Use MIN2 and MAX2?

> +
> +   info->loads_push_constants = true;
> +}
> +
>  static void
>  gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr 
> *instr,
>   struct radv_shader_info *info)
> @@ -243,7 +267,7 @@ gather_intrinsic_info(const nir_shader *nir, const 
> nir_intrinsic_instr *instr,
> info->uses_prim_id = true;
> break;
> case nir_intrinsic_load_push_constant:
> -   info->loads_push_constants = true;
> +   gather_push_constant_info(nir, instr, info);
> break;
> case nir_intrinsic_vulkan_resource_index:
> info->desc_set_used_mask |= (1 << 
> nir_intrinsic_desc_set(instr));
> @@ -504,6 +528,12 @@ gather_xfb_info(const nir_shader *nir, struct 
> radv_shader_info *info)
> ralloc_free(xfb);
>  }
>
> +void
> +radv_nir_shader_info_init(struct radv_shader_info *info)
> +{
> +   info->min_push_constant_used = -1;

Can you use one of the appropriate MAX macros from IIRC stddef.h?
> +}
> +
>  void
>  radv_nir_shader_info_pass(const struct nir_shader *nir,
>   const struct radv_nir_compiler_options *options,
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: re-enable fast depth clears for 16-bit surfaces on VI

2019-01-28 Thread Bas Nieuwenhuizen
Well, if it looks good to you and you could reproduce before

Reviewed-by: Bas Nieuwenhuizen 

On Mon, Jan 28, 2019 at 5:38 PM Samuel Pitoiset
 wrote:
>
> This has been disabled some months ago because it introduced
> rendering issues with Shadow Of Warrier II (DXVK). This game is
> no longer affected, I wonder if 824cfc1ee5e ("radv: rework the
> TC-compat HTILE hardware bug with COND_EXEC") fixed the problem.
> I checked The Forest on my Polaris, and it renders fine too.
>
> According to Phillip, this gives +5.5% with Rise Of The Tomb
> Raider and DXVK. This is because DXVK  uses 16-bit depth surfaces
> while the native port from Feral uses 32-bit depth surfaces.
>
> Unfortunately, Shadow Of The Tomb Raider isn't affected because
> it clears each layer of a D16 array texture individually. So it
> doesn't hit the fast clear path.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_clear.c | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index b61345b9241..8805f0435e1 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -899,14 +899,6 @@ radv_image_can_fast_clear(struct radv_device *device,  
> struct radv_image *image)
> } else {
> if (!radv_image_has_htile(image))
> return false;
> -
> -   /* GFX8 only supports 32-bit depth surfaces but we can enable
> -* TC-compat HTILE for 16-bit surfaces if no Z planes are
> -* compressed. Though, fast HTILE clears don't seem to work.
> -*/
> -   if (device->physical_device->rad_info.chip_class == VI &&
> -   image->vk_format == VK_FORMAT_D16_UNORM)
> -   return false;
> }
>
> /* Do not fast clears 3D images. */
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


<    1   2   3   4   5   6   7   8   9   10   >