Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-06-24 Thread Samuel Pitoiset

Thanks for the head-up Marek, I will update once I have hardware access.

On 6/21/19 7:33 PM, Marek Olšák wrote:
Gfx10 remembers sample positions in compressed Z/S memory, so the hw 
doesn't need the decompress pass for shader loads.


Marek

On Wed, May 22, 2019 at 4:20 PM Marek Olšák > wrote:


The depth decompress pass needs to know the sample locations.

If shader loads read from compressed depth, the texture hardware
will always use the standard locations for decompression.

Marek

On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen
mailto:b...@basnieuwenhuizen.nl>> wrote:

So this does not seem to use the sample locations during
layout transitions?

AFAIK those are needed for e.g. HTILE decompression as it is
based on
equations somehow.

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>>
wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. It doesn't support variable sample locations
> during subpass. Note that we don't have to upload the user
> sample locations because the spec doesn't allow this.
>
> Only enabled on VI+ because it's untested on older chips.
>
> v2: - change sampleLocationCoordinateRange[1] to 0.9375
>     - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>     - rebased on top of master
>     - some cleanups
>
> Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 223
++
>  src/amd/vulkan/radv_device.c      |  27 
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c    |  30 
>  src/amd/vulkan/radv_private.h     |  26 +++-
>  5 files changed, 300 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..fb79c1c6713 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>         dest->viewport.count = src->viewport.count;
>         dest->scissor.count = src->scissor.count;
>         dest->discard_rectangle.count =
src->discard_rectangle.count;
> +       dest->sample_location.count =
src->sample_location.count;
>
>         if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>                 if (memcmp(>viewport.viewports,
>viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>                 }
>         }
>
> +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +               if (dest->sample_location.per_pixel !=
src->sample_location.per_pixel ||
> +  dest->sample_location.grid_size.width !=
src->sample_location.grid_size.width ||
> +  dest->sample_location.grid_size.height !=
src->sample_location.grid_size.height ||
> +  memcmp(>sample_location.locations,
> + >sample_location.locations,
> + src->sample_location.count * sizeof(VkSampleLocationEXT))) {
> +  dest->sample_location.per_pixel =
src->sample_location.per_pixel;
> +  dest->sample_location.grid_size =
src->sample_location.grid_size;
> +  typed_memcpy(dest->sample_location.locations,
> + src->sample_location.locations,
> + src->sample_location.count);
> +                       dest_mask |=
RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +               }
> +       }
> +
>         cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
radv_cmd_buffer *cmd_buffer,
>         }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample
locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct
radv_sample_locations_state *state,
> +                             uint32_t x, uint32_t y,
VkOffset2D *sample_locs)
> +{
> +       uint32_t x_offset = x % state->grid_size.width;
> +       uint32_t y_offset = y % state->grid_size.height;
> +       uint32_t num_samples = (uint32_t)state->per_pixel;
> +       VkSampleLocationEXT *user_locs;
> +       uint32_t pixel_offset;
> +
> +       pixel_offset = (x_offset + y_offset *

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-06-21 Thread Marek Olšák
Gfx10 remembers sample positions in compressed Z/S memory, so the hw
doesn't need the decompress pass for shader loads.

Marek

On Wed, May 22, 2019 at 4:20 PM Marek Olšák  wrote:

> The depth decompress pass needs to know the sample locations.
>
> If shader loads read from compressed depth, the texture hardware will
> always use the standard locations for decompression.
>
> Marek
>
> On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
> wrote:
>
>> So this does not seem to use the sample locations during layout
>> transitions?
>>
>> AFAIK those are needed for e.g. HTILE decompression as it is based on
>> equations somehow.
>>
>> On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
>>  wrote:
>> >
>> > Basically, this extension allows applications to use custom
>> > sample locations. It doesn't support variable sample locations
>> > during subpass. Note that we don't have to upload the user
>> > sample locations because the spec doesn't allow this.
>> >
>> > Only enabled on VI+ because it's untested on older chips.
>> >
>> > v2: - change sampleLocationCoordinateRange[1] to 0.9375
>> > - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>> > - rebased on top of master
>> > - some cleanups
>> >
>> > Signed-off-by: Samuel Pitoiset 
>> > ---
>> >  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
>> >  src/amd/vulkan/radv_device.c  |  27 
>> >  src/amd/vulkan/radv_extensions.py |   1 +
>> >  src/amd/vulkan/radv_pipeline.c|  30 
>> >  src/amd/vulkan/radv_private.h |  26 +++-
>> >  5 files changed, 300 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> > index 4f592bc7f68..fb79c1c6713 100644
>> > --- a/src/amd/vulkan/radv_cmd_buffer.c
>> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > dest->viewport.count = src->viewport.count;
>> > dest->scissor.count = src->scissor.count;
>> > dest->discard_rectangle.count = src->discard_rectangle.count;
>> > +   dest->sample_location.count = src->sample_location.count;
>> >
>> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>> > if (memcmp(>viewport.viewports,
>> >viewport.viewports,
>> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > }
>> > }
>> >
>> > +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
>> > +   if (dest->sample_location.per_pixel !=
>> src->sample_location.per_pixel ||
>> > +   dest->sample_location.grid_size.width !=
>> src->sample_location.grid_size.width ||
>> > +   dest->sample_location.grid_size.height !=
>> src->sample_location.grid_size.height ||
>> > +   memcmp(>sample_location.locations,
>> > +  >sample_location.locations,
>> > +  src->sample_location.count *
>> sizeof(VkSampleLocationEXT))) {
>> > +   dest->sample_location.per_pixel =
>> src->sample_location.per_pixel;
>> > +   dest->sample_location.grid_size =
>> src->sample_location.grid_size;
>> > +   typed_memcpy(dest->sample_location.locations,
>> > +src->sample_location.locations,
>> > +src->sample_location.count);
>> > +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
>> > +   }
>> > +   }
>> > +
>> > cmd_buffer->state.dirty |= dest_mask;
>> >  }
>> >
>> > @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
>> radv_cmd_buffer *cmd_buffer,
>> > }
>> >  }
>> >
>> > +/**
>> > + * Convert the user sample locations to hardware sample locations (the
>> values
>> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
>> > + */
>> > +static void
>> > +radv_convert_user_sample_locs(struct radv_sample_locations_state
>> *state,
>> > + uint32_t x, uint32_t y, VkOffset2D
>> *sample_locs)
>> > +{
>> > +   uint32_t x_offset = x % state->grid_size.width;
>> > +   uint32_t y_offset = y % state->grid_size.height;
>> > +   uint32_t num_samples = (uint32_t)state->per_pixel;
>> > +   VkSampleLocationEXT *user_locs;
>> > +   uint32_t pixel_offset;
>> > +
>> > +   pixel_offset = (x_offset + y_offset * state->grid_size.width) *
>> num_samples;
>> > +
>> > +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
>> > +   user_locs = >locations[pixel_offset];
>> > +
>> > +   for (uint32_t i = 0; i < num_samples; i++) {
>> > +   float shifted_pos_x = user_locs[i].x - 0.5;
>> > +   float shifted_pos_y = user_locs[i].y - 0.5;
>> > +
>> > +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
>> > +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
>> > +
>> > +   sample_locs[i].x = 

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-29 Thread Bas Nieuwenhuizen
R-b, assuming you disable the ext and add todos for the htile stuff.

On Fri, May 24, 2019, 9:28 AM Samuel Pitoiset 
wrote:

> It's a bit difficult and invasive to support variable sample locations
> during layout transitions actually.
>
> So for now, I disabled HTILE for depth/stencil images that might require
> sample locations.
> On 5/22/19 10:20 PM, Marek Olšák wrote:
>
> The depth decompress pass needs to know the sample locations.
>
> If shader loads read from compressed depth, the texture hardware will
> always use the standard locations for decompression.
>
> Marek
>
> On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
> wrote:
>
>> So this does not seem to use the sample locations during layout
>> transitions?
>>
>> AFAIK those are needed for e.g. HTILE decompression as it is based on
>> equations somehow.
>>
>> On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
>>  wrote:
>> >
>> > Basically, this extension allows applications to use custom
>> > sample locations. It doesn't support variable sample locations
>> > during subpass. Note that we don't have to upload the user
>> > sample locations because the spec doesn't allow this.
>> >
>> > Only enabled on VI+ because it's untested on older chips.
>> >
>> > v2: - change sampleLocationCoordinateRange[1] to 0.9375
>> > - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>> > - rebased on top of master
>> > - some cleanups
>> >
>> > Signed-off-by: Samuel Pitoiset 
>> > ---
>> >  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
>> >  src/amd/vulkan/radv_device.c  |  27 
>> >  src/amd/vulkan/radv_extensions.py |   1 +
>> >  src/amd/vulkan/radv_pipeline.c|  30 
>> >  src/amd/vulkan/radv_private.h |  26 +++-
>> >  5 files changed, 300 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> > index 4f592bc7f68..fb79c1c6713 100644
>> > --- a/src/amd/vulkan/radv_cmd_buffer.c
>> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > dest->viewport.count = src->viewport.count;
>> > dest->scissor.count = src->scissor.count;
>> > dest->discard_rectangle.count = src->discard_rectangle.count;
>> > +   dest->sample_location.count = src->sample_location.count;
>> >
>> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>> > if (memcmp(>viewport.viewports,
>> >viewport.viewports,
>> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
>> *cmd_buffer,
>> > }
>> > }
>> >
>> > +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
>> > +   if (dest->sample_location.per_pixel !=
>> src->sample_location.per_pixel ||
>> > +   dest->sample_location.grid_size.width !=
>> src->sample_location.grid_size.width ||
>> > +   dest->sample_location.grid_size.height !=
>> src->sample_location.grid_size.height ||
>> > +   memcmp(>sample_location.locations,
>> > +  >sample_location.locations,
>> > +  src->sample_location.count *
>> sizeof(VkSampleLocationEXT))) {
>> > +   dest->sample_location.per_pixel =
>> src->sample_location.per_pixel;
>> > +   dest->sample_location.grid_size =
>> src->sample_location.grid_size;
>> > +   typed_memcpy(dest->sample_location.locations,
>> > +src->sample_location.locations,
>> > +src->sample_location.count);
>> > +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
>> > +   }
>> > +   }
>> > +
>> > cmd_buffer->state.dirty |= dest_mask;
>> >  }
>> >
>> > @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
>> radv_cmd_buffer *cmd_buffer,
>> > }
>> >  }
>> >
>> > +/**
>> > + * Convert the user sample locations to hardware sample locations (the
>> values
>> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
>> > + */
>> > +static void
>> > +radv_convert_user_sample_locs(struct radv_sample_locations_state
>> *state,
>> > + uint32_t x, uint32_t y, VkOffset2D
>> *sample_locs)
>> > +{
>> > +   uint32_t x_offset = x % state->grid_size.width;
>> > +   uint32_t y_offset = y % state->grid_size.height;
>> > +   uint32_t num_samples = (uint32_t)state->per_pixel;
>> > +   VkSampleLocationEXT *user_locs;
>> > +   uint32_t pixel_offset;
>> > +
>> > +   pixel_offset = (x_offset + y_offset * state->grid_size.width) *
>> num_samples;
>> > +
>> > +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
>> > +   user_locs = >locations[pixel_offset];
>> > +
>> > +   for (uint32_t i = 0; i < num_samples; i++) {
>> > +   float shifted_pos_x = user_locs[i].x - 0.5;
>> > +   float shifted_pos_y = user_locs[i].y - 0.5;
>> 

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-24 Thread Samuel Pitoiset
It's a bit difficult and invasive to support variable sample locations 
during layout transitions actually.


So for now, I disabled HTILE for depth/stencil images that might require 
sample locations.


On 5/22/19 10:20 PM, Marek Olšák wrote:

The depth decompress pass needs to know the sample locations.

If shader loads read from compressed depth, the texture hardware will 
always use the standard locations for decompression.


Marek

On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
mailto:b...@basnieuwenhuizen.nl>> wrote:


So this does not seem to use the sample locations during layout
transitions?

AFAIK those are needed for e.g. HTILE decompression as it is based on
equations somehow.

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>> wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. It doesn't support variable sample locations
> during subpass. Note that we don't have to upload the user
> sample locations because the spec doesn't allow this.
>
> Only enabled on VI+ because it's untested on older chips.
>
> v2: - change sampleLocationCoordinateRange[1] to 0.9375
>     - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>     - rebased on top of master
>     - some cleanups
>
> Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 223
++
>  src/amd/vulkan/radv_device.c      |  27 
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c    |  30 
>  src/amd/vulkan/radv_private.h     |  26 +++-
>  5 files changed, 300 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..fb79c1c6713 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>         dest->viewport.count = src->viewport.count;
>         dest->scissor.count = src->scissor.count;
>         dest->discard_rectangle.count =
src->discard_rectangle.count;
> +       dest->sample_location.count = src->sample_location.count;
>
>         if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>                 if (memcmp(>viewport.viewports,
>viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>                 }
>         }
>
> +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +               if (dest->sample_location.per_pixel !=
src->sample_location.per_pixel ||
> +  dest->sample_location.grid_size.width !=
src->sample_location.grid_size.width ||
> +  dest->sample_location.grid_size.height !=
src->sample_location.grid_size.height ||
> +  memcmp(>sample_location.locations,
> + >sample_location.locations,
> + src->sample_location.count * sizeof(VkSampleLocationEXT))) {
> +  dest->sample_location.per_pixel = src->sample_location.per_pixel;
> +  dest->sample_location.grid_size = src->sample_location.grid_size;
> +  typed_memcpy(dest->sample_location.locations,
> + src->sample_location.locations,
> + src->sample_location.count);
> +                       dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +               }
> +       }
> +
>         cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
radv_cmd_buffer *cmd_buffer,
>         }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample
locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct
radv_sample_locations_state *state,
> +                             uint32_t x, uint32_t y, VkOffset2D
*sample_locs)
> +{
> +       uint32_t x_offset = x % state->grid_size.width;
> +       uint32_t y_offset = y % state->grid_size.height;
> +       uint32_t num_samples = (uint32_t)state->per_pixel;
> +       VkSampleLocationEXT *user_locs;
> +       uint32_t pixel_offset;
> +
> +       pixel_offset = (x_offset + y_offset *
state->grid_size.width) * num_samples;
> +
> +       assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +       user_locs = >locations[pixel_offset];
> +
> +       for (uint32_t i = 0; i < num_samples; i++) {
> +               float shifted_pos_x = user_locs[i].x - 0.5;
> +               float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +               

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-23 Thread Samuel Pitoiset

Thanks for pointing this out, I will implement.

On 5/22/19 10:20 PM, Marek Olšák wrote:

The depth decompress pass needs to know the sample locations.

If shader loads read from compressed depth, the texture hardware will 
always use the standard locations for decompression.


Marek

On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
mailto:b...@basnieuwenhuizen.nl>> wrote:


So this does not seem to use the sample locations during layout
transitions?

AFAIK those are needed for e.g. HTILE decompression as it is based on
equations somehow.

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>> wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. It doesn't support variable sample locations
> during subpass. Note that we don't have to upload the user
> sample locations because the spec doesn't allow this.
>
> Only enabled on VI+ because it's untested on older chips.
>
> v2: - change sampleLocationCoordinateRange[1] to 0.9375
>     - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
>     - rebased on top of master
>     - some cleanups
>
> Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 223
++
>  src/amd/vulkan/radv_device.c      |  27 
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c    |  30 
>  src/amd/vulkan/radv_private.h     |  26 +++-
>  5 files changed, 300 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..fb79c1c6713 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>         dest->viewport.count = src->viewport.count;
>         dest->scissor.count = src->scissor.count;
>         dest->discard_rectangle.count =
src->discard_rectangle.count;
> +       dest->sample_location.count = src->sample_location.count;
>
>         if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
>                 if (memcmp(>viewport.viewports,
>viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct
radv_cmd_buffer *cmd_buffer,
>                 }
>         }
>
> +       if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +               if (dest->sample_location.per_pixel !=
src->sample_location.per_pixel ||
> +  dest->sample_location.grid_size.width !=
src->sample_location.grid_size.width ||
> +  dest->sample_location.grid_size.height !=
src->sample_location.grid_size.height ||
> +  memcmp(>sample_location.locations,
> + >sample_location.locations,
> + src->sample_location.count * sizeof(VkSampleLocationEXT))) {
> +  dest->sample_location.per_pixel = src->sample_location.per_pixel;
> +  dest->sample_location.grid_size = src->sample_location.grid_size;
> +  typed_memcpy(dest->sample_location.locations,
> + src->sample_location.locations,
> + src->sample_location.count);
> +                       dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +               }
> +       }
> +
>         cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
radv_cmd_buffer *cmd_buffer,
>         }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample
locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct
radv_sample_locations_state *state,
> +                             uint32_t x, uint32_t y, VkOffset2D
*sample_locs)
> +{
> +       uint32_t x_offset = x % state->grid_size.width;
> +       uint32_t y_offset = y % state->grid_size.height;
> +       uint32_t num_samples = (uint32_t)state->per_pixel;
> +       VkSampleLocationEXT *user_locs;
> +       uint32_t pixel_offset;
> +
> +       pixel_offset = (x_offset + y_offset *
state->grid_size.width) * num_samples;
> +
> +       assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +       user_locs = >locations[pixel_offset];
> +
> +       for (uint32_t i = 0; i < num_samples; i++) {
> +               float shifted_pos_x = user_locs[i].x - 0.5;
> +               float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +               sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> +               sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> +       }
> +}
> +
> 

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-22 Thread Marek Olšák
The depth decompress pass needs to know the sample locations.

If shader loads read from compressed depth, the texture hardware will
always use the standard locations for decompression.

Marek

On Tue, May 21, 2019 at 8:17 PM Bas Nieuwenhuizen 
wrote:

> So this does not seem to use the sample locations during layout
> transitions?
>
> AFAIK those are needed for e.g. HTILE decompression as it is based on
> equations somehow.
>
> On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
>  wrote:
> >
> > Basically, this extension allows applications to use custom
> > sample locations. It doesn't support variable sample locations
> > during subpass. Note that we don't have to upload the user
> > sample locations because the spec doesn't allow this.
> >
> > Only enabled on VI+ because it's untested on older chips.
> >
> > v2: - change sampleLocationCoordinateRange[1] to 0.9375
> > - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
> > - rebased on top of master
> > - some cleanups
> >
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
> >  src/amd/vulkan/radv_device.c  |  27 
> >  src/amd/vulkan/radv_extensions.py |   1 +
> >  src/amd/vulkan/radv_pipeline.c|  30 
> >  src/amd/vulkan/radv_private.h |  26 +++-
> >  5 files changed, 300 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c
> b/src/amd/vulkan/radv_cmd_buffer.c
> > index 4f592bc7f68..fb79c1c6713 100644
> > --- a/src/amd/vulkan/radv_cmd_buffer.c
> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
> *cmd_buffer,
> > dest->viewport.count = src->viewport.count;
> > dest->scissor.count = src->scissor.count;
> > dest->discard_rectangle.count = src->discard_rectangle.count;
> > +   dest->sample_location.count = src->sample_location.count;
> >
> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> > if (memcmp(>viewport.viewports,
> >viewport.viewports,
> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer
> *cmd_buffer,
> > }
> > }
> >
> > +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> > +   if (dest->sample_location.per_pixel !=
> src->sample_location.per_pixel ||
> > +   dest->sample_location.grid_size.width !=
> src->sample_location.grid_size.width ||
> > +   dest->sample_location.grid_size.height !=
> src->sample_location.grid_size.height ||
> > +   memcmp(>sample_location.locations,
> > +  >sample_location.locations,
> > +  src->sample_location.count *
> sizeof(VkSampleLocationEXT))) {
> > +   dest->sample_location.per_pixel =
> src->sample_location.per_pixel;
> > +   dest->sample_location.grid_size =
> src->sample_location.grid_size;
> > +   typed_memcpy(dest->sample_location.locations,
> > +src->sample_location.locations,
> > +src->sample_location.count);
> > +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> > +   }
> > +   }
> > +
> > cmd_buffer->state.dirty |= dest_mask;
> >  }
> >
> > @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct
> radv_cmd_buffer *cmd_buffer,
> > }
> >  }
> >
> > +/**
> > + * Convert the user sample locations to hardware sample locations (the
> values
> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> > + */
> > +static void
> > +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> > + uint32_t x, uint32_t y, VkOffset2D
> *sample_locs)
> > +{
> > +   uint32_t x_offset = x % state->grid_size.width;
> > +   uint32_t y_offset = y % state->grid_size.height;
> > +   uint32_t num_samples = (uint32_t)state->per_pixel;
> > +   VkSampleLocationEXT *user_locs;
> > +   uint32_t pixel_offset;
> > +
> > +   pixel_offset = (x_offset + y_offset * state->grid_size.width) *
> num_samples;
> > +
> > +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> > +   user_locs = >locations[pixel_offset];
> > +
> > +   for (uint32_t i = 0; i < num_samples; i++) {
> > +   float shifted_pos_x = user_locs[i].x - 0.5;
> > +   float shifted_pos_y = user_locs[i].y - 0.5;
> > +
> > +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> > +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> > +
> > +   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> > +   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> > +   }
> > +}
> > +
> > +/**
> > + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware
> sample
> > + * locations.
> > + */
> > +static void
> > +radv_compute_sample_locs_pixel(uint32_t 

Re: [Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-21 Thread Bas Nieuwenhuizen
So this does not seem to use the sample locations during layout transitions?

AFAIK those are needed for e.g. HTILE decompression as it is based on
equations somehow.

On Thu, May 16, 2019 at 11:51 AM Samuel Pitoiset
 wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. It doesn't support variable sample locations
> during subpass. Note that we don't have to upload the user
> sample locations because the spec doesn't allow this.
>
> Only enabled on VI+ because it's untested on older chips.
>
> v2: - change sampleLocationCoordinateRange[1] to 0.9375
> - compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
> - rebased on top of master
> - some cleanups
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
>  src/amd/vulkan/radv_device.c  |  27 
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c|  30 
>  src/amd/vulkan/radv_private.h |  26 +++-
>  5 files changed, 300 insertions(+), 7 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 4f592bc7f68..fb79c1c6713 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> dest->viewport.count = src->viewport.count;
> dest->scissor.count = src->scissor.count;
> dest->discard_rectangle.count = src->discard_rectangle.count;
> +   dest->sample_location.count = src->sample_location.count;
>
> if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> if (memcmp(>viewport.viewports, 
> >viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> }
> }
>
> +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +   if (dest->sample_location.per_pixel != 
> src->sample_location.per_pixel ||
> +   dest->sample_location.grid_size.width != 
> src->sample_location.grid_size.width ||
> +   dest->sample_location.grid_size.height != 
> src->sample_location.grid_size.height ||
> +   memcmp(>sample_location.locations,
> +  >sample_location.locations,
> +  src->sample_location.count * 
> sizeof(VkSampleLocationEXT))) {
> +   dest->sample_location.per_pixel = 
> src->sample_location.per_pixel;
> +   dest->sample_location.grid_size = 
> src->sample_location.grid_size;
> +   typed_memcpy(dest->sample_location.locations,
> +src->sample_location.locations,
> +src->sample_location.count);
> +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +   }
> +   }
> +
> cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
> *cmd_buffer,
> }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> + uint32_t x, uint32_t y, VkOffset2D *sample_locs)
> +{
> +   uint32_t x_offset = x % state->grid_size.width;
> +   uint32_t y_offset = y % state->grid_size.height;
> +   uint32_t num_samples = (uint32_t)state->per_pixel;
> +   VkSampleLocationEXT *user_locs;
> +   uint32_t pixel_offset;
> +
> +   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
> num_samples;
> +
> +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +   user_locs = >locations[pixel_offset];
> +
> +   for (uint32_t i = 0; i < num_samples; i++) {
> +   float shifted_pos_x = user_locs[i].x - 0.5;
> +   float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> +   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> +   }
> +}
> +
> +/**
> + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
> + * locations.
> + */
> +static void
> +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
> +  uint32_t *sample_locs_pixel)
> +{
> +   for (uint32_t i = 0; i < num_samples; i++) {
> +   uint32_t sample_reg_idx = i / 4;
> +   uint32_t sample_loc_idx = i % 4;
> +   int32_t pos_x = sample_locs[i].x;
> +   int32_t pos_y = sample_locs[i].y;
> +
> +   uint32_t shift_x = 8 * sample_loc_idx;
> +   uint32_t shift_y = 

[Mesa-dev] [PATCH v2] radv: implement VK_EXT_sample_locations

2019-05-16 Thread Samuel Pitoiset
Basically, this extension allows applications to use custom
sample locations. It doesn't support variable sample locations
during subpass. Note that we don't have to upload the user
sample locations because the spec doesn't allow this.

Only enabled on VI+ because it's untested on older chips.

v2: - change sampleLocationCoordinateRange[1] to 0.9375
- compute and emit PA_SC_CENTROID_PRIORITY_{0,1}
- rebased on top of master
- some cleanups

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 223 ++
 src/amd/vulkan/radv_device.c  |  27 
 src/amd/vulkan/radv_extensions.py |   1 +
 src/amd/vulkan/radv_pipeline.c|  30 
 src/amd/vulkan/radv_private.h |  26 +++-
 5 files changed, 300 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 4f592bc7f68..fb79c1c6713 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
dest->viewport.count = src->viewport.count;
dest->scissor.count = src->scissor.count;
dest->discard_rectangle.count = src->discard_rectangle.count;
+   dest->sample_location.count = src->sample_location.count;
 
if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
if (memcmp(>viewport.viewports, >viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
}
}
 
+   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+   if (dest->sample_location.per_pixel != 
src->sample_location.per_pixel ||
+   dest->sample_location.grid_size.width != 
src->sample_location.grid_size.width ||
+   dest->sample_location.grid_size.height != 
src->sample_location.grid_size.height ||
+   memcmp(>sample_location.locations,
+  >sample_location.locations,
+  src->sample_location.count * 
sizeof(VkSampleLocationEXT))) {
+   dest->sample_location.per_pixel = 
src->sample_location.per_pixel;
+   dest->sample_location.grid_size = 
src->sample_location.grid_size;
+   typed_memcpy(dest->sample_location.locations,
+src->sample_location.locations,
+src->sample_location.count);
+   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+   }
+   }
+
cmd_buffer->state.dirty |= dest_mask;
 }
 
@@ -632,6 +649,190 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
*cmd_buffer,
}
 }
 
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+   uint32_t x_offset = x % state->grid_size.width;
+   uint32_t y_offset = y % state->grid_size.height;
+   uint32_t num_samples = (uint32_t)state->per_pixel;
+   VkSampleLocationEXT *user_locs;
+   uint32_t pixel_offset;
+
+   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
num_samples;
+
+   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+   user_locs = >locations[pixel_offset];
+
+   for (uint32_t i = 0; i < num_samples; i++) {
+   float shifted_pos_x = user_locs[i].x - 0.5;
+   float shifted_pos_y = user_locs[i].y - 0.5;
+
+   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+   }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+  uint32_t *sample_locs_pixel)
+{
+   for (uint32_t i = 0; i < num_samples; i++) {
+   uint32_t sample_reg_idx = i / 4;
+   uint32_t sample_loc_idx = i % 4;
+   int32_t pos_x = sample_locs[i].x;
+   int32_t pos_y = sample_locs[i].y;
+
+   uint32_t shift_x = 8 * sample_loc_idx;
+   uint32_t shift_y = shift_x + 4;
+
+   sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+   sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+   }
+}
+
+/**
+ * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware
+ * sample locations.
+ */
+static uint64_t
+radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer,
+  VkOffset2D *sample_locs,
+