Re: [Mesa-dev] [PATCH] radv: implement VK_EXT_sample_locations

2018-12-10 Thread Samuel Pitoiset



On 12/8/18 7:03 PM, Rhys Perry wrote:

A small number of questions/concerns:

- sampleLocationCoordinateRange[1] should probably be set to 0.9375,
   because of how the sample locations are encoded


That doesn't matter much but we can update it yeah.


- gl_SamplePosition doesn't seem like it would return the new sample
   locations


Oh right, and that's a huge pain to change...


- R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
   this is required, but it's probably best to do so.


Not sure if that's really important to set.


- I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
   before radv_emit_sample_locations()


That's an optimization.


- unlike AMDVLK, this doesn't seem to make use of sample location
   information during layout transitions?


No variableSampleLocations support.



You said that this implements the bare minimum, so you might already know
about some of these though (unless you were just talking about the
variableSampleLocations thing).
On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset  wrote:


Basically, this extension allows applications to use custom
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.

Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.

Only enabled on VI+ because it's untested on older chips.

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_cmd_buffer.c  | 177 +-
  src/amd/vulkan/radv_device.c  |  27 +
  src/amd/vulkan/radv_extensions.py |   1 +
  src/amd/vulkan/radv_pipeline.c|  30 +
  src/amd/vulkan/radv_private.h |  26 +++--
  5 files changed, 253 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
 dest->viewport.count = src->viewport.count;
 dest->scissor.count = src->scissor.count;
 dest->discard_rectangle.count = src->discard_rectangle.count;
+   dest->sample_location.count = src->sample_location.count;

 if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
 if (memcmp(>viewport.viewports, >viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
 }
 }

+   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+   if (dest->sample_location.per_pixel != 
src->sample_location.per_pixel ||
+   dest->sample_location.grid_size.width != 
src->sample_location.grid_size.width ||
+   dest->sample_location.grid_size.height != 
src->sample_location.grid_size.height ||
+   memcmp(>sample_location.locations,
+  >sample_location.locations,
+  src->sample_location.count * 
sizeof(VkSampleLocationEXT))) {
+   dest->sample_location.per_pixel = 
src->sample_location.per_pixel;
+   dest->sample_location.grid_size = 
src->sample_location.grid_size;
+   typed_memcpy(dest->sample_location.locations,
+src->sample_location.locations,
+src->sample_location.count);
+   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+   }
+   }
+
 cmd_buffer->state.dirty |= dest_mask;
  }

@@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
*cmd_buffer,
 }
  }

+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+   uint32_t x_offset = x % state->grid_size.width;
+   uint32_t y_offset = y % state->grid_size.height;
+   uint32_t num_samples = (uint32_t)state->per_pixel;
+   VkSampleLocationEXT *user_locs;
+   uint32_t pixel_offset;
+
+   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
num_samples;
+
+   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+   user_locs = >locations[pixel_offset];
+
+   for (uint32_t i = 0; i < num_samples; i++) {
+   float shifted_pos_x = user_locs[i].x - 0.5;
+   float shifted_pos_y = user_locs[i].y - 0.5;
+
+   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+   }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */

Re: [Mesa-dev] [PATCH] radv: implement VK_EXT_sample_locations

2018-12-08 Thread Bas Nieuwenhuizen
On Sat, Dec 8, 2018 at 7:03 PM Rhys Perry  wrote:
>
> A small number of questions/concerns:
>
> - sampleLocationCoordinateRange[1] should probably be set to 0.9375,
>   because of how the sample locations are encoded
> - gl_SamplePosition doesn't seem like it would return the new sample
>   locations
> - R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
>   this is required, but it's probably best to do so.
> - I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
>   before radv_emit_sample_locations()
> - unlike AMDVLK, this doesn't seem to make use of sample location
>   information during layout transitions?

AFAIU having the correct sample locations might be necessary during
HTILE decompression.

>
> You said that this implements the bare minimum, so you might already know
> about some of these though (unless you were just talking about the
> variableSampleLocations thing).
> On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset  
> wrote:
> >
> > Basically, this extension allows applications to use custom
> > sample locations. This only implements the barely minimum.
> > It doesn't support variable sample locations during subpass.
> >
> > Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
> > CTS now pass.
> >
> > Only enabled on VI+ because it's untested on older chips.
> >
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_cmd_buffer.c  | 177 +-
> >  src/amd/vulkan/radv_device.c  |  27 +
> >  src/amd/vulkan/radv_extensions.py |   1 +
> >  src/amd/vulkan/radv_pipeline.c|  30 +
> >  src/amd/vulkan/radv_private.h |  26 +++--
> >  5 files changed, 253 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> > b/src/amd/vulkan/radv_cmd_buffer.c
> > index b4aea5bc898..c4bebeda0ce 100644
> > --- a/src/amd/vulkan/radv_cmd_buffer.c
> > +++ b/src/amd/vulkan/radv_cmd_buffer.c
> > @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> > *cmd_buffer,
> > dest->viewport.count = src->viewport.count;
> > dest->scissor.count = src->scissor.count;
> > dest->discard_rectangle.count = src->discard_rectangle.count;
> > +   dest->sample_location.count = src->sample_location.count;
> >
> > if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> > if (memcmp(>viewport.viewports, 
> > >viewport.viewports,
> > @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> > *cmd_buffer,
> > }
> > }
> >
> > +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> > +   if (dest->sample_location.per_pixel != 
> > src->sample_location.per_pixel ||
> > +   dest->sample_location.grid_size.width != 
> > src->sample_location.grid_size.width ||
> > +   dest->sample_location.grid_size.height != 
> > src->sample_location.grid_size.height ||
> > +   memcmp(>sample_location.locations,
> > +  >sample_location.locations,
> > +  src->sample_location.count * 
> > sizeof(VkSampleLocationEXT))) {
> > +   dest->sample_location.per_pixel = 
> > src->sample_location.per_pixel;
> > +   dest->sample_location.grid_size = 
> > src->sample_location.grid_size;
> > +   typed_memcpy(dest->sample_location.locations,
> > +src->sample_location.locations,
> > +src->sample_location.count);
> > +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> > +   }
> > +   }
> > +
> > cmd_buffer->state.dirty |= dest_mask;
> >  }
> >
> > @@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
> > *cmd_buffer,
> > }
> >  }
> >
> > +/**
> > + * Convert the user sample locations to hardware sample locations (the 
> > values
> > + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> > + */
> > +static void
> > +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> > + uint32_t x, uint32_t y, VkOffset2D 
> > *sample_locs)
> > +{
> > +   uint32_t x_offset = x % state->grid_size.width;
> > +   uint32_t y_offset = y % state->grid_size.height;
> > +   uint32_t num_samples = (uint32_t)state->per_pixel;
> > +   VkSampleLocationEXT *user_locs;
> > +   uint32_t pixel_offset;
> > +
> > +   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
> > num_samples;
> > +
> > +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> > +   user_locs = >locations[pixel_offset];
> > +
> > +   for (uint32_t i = 0; i < num_samples; i++) {
> > +   float shifted_pos_x = user_locs[i].x - 0.5;
> > +   float shifted_pos_y = user_locs[i].y - 0.5;
> > +
> > +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> > +   int32_t scaled_pos_y 

Re: [Mesa-dev] [PATCH] radv: implement VK_EXT_sample_locations

2018-12-08 Thread Rhys Perry
A small number of questions/concerns:

- sampleLocationCoordinateRange[1] should probably be set to 0.9375,
  because of how the sample locations are encoded
- gl_SamplePosition doesn't seem like it would return the new sample
  locations
- R_028BD4_PA_SC_CENTROID_PRIORITY_{0,1} isn't updated. I'm not sure if
  this is required, but it's probably best to do so.
- I think it can pointlessly call radv_cayman_emit_msaa_sample_locs()
  before radv_emit_sample_locations()
- unlike AMDVLK, this doesn't seem to make use of sample location
  information during layout transitions?

You said that this implements the bare minimum, so you might already know
about some of these though (unless you were just talking about the
variableSampleLocations thing).
On Fri, 7 Dec 2018 at 16:19, Samuel Pitoiset  wrote:
>
> Basically, this extension allows applications to use custom
> sample locations. This only implements the barely minimum.
> It doesn't support variable sample locations during subpass.
>
> Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
> CTS now pass.
>
> Only enabled on VI+ because it's untested on older chips.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 177 +-
>  src/amd/vulkan/radv_device.c  |  27 +
>  src/amd/vulkan/radv_extensions.py |   1 +
>  src/amd/vulkan/radv_pipeline.c|  30 +
>  src/amd/vulkan/radv_private.h |  26 +++--
>  5 files changed, 253 insertions(+), 8 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index b4aea5bc898..c4bebeda0ce 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> dest->viewport.count = src->viewport.count;
> dest->scissor.count = src->scissor.count;
> dest->discard_rectangle.count = src->discard_rectangle.count;
> +   dest->sample_location.count = src->sample_location.count;
>
> if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
> if (memcmp(>viewport.viewports, 
> >viewport.viewports,
> @@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer 
> *cmd_buffer,
> }
> }
>
> +   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
> +   if (dest->sample_location.per_pixel != 
> src->sample_location.per_pixel ||
> +   dest->sample_location.grid_size.width != 
> src->sample_location.grid_size.width ||
> +   dest->sample_location.grid_size.height != 
> src->sample_location.grid_size.height ||
> +   memcmp(>sample_location.locations,
> +  >sample_location.locations,
> +  src->sample_location.count * 
> sizeof(VkSampleLocationEXT))) {
> +   dest->sample_location.per_pixel = 
> src->sample_location.per_pixel;
> +   dest->sample_location.grid_size = 
> src->sample_location.grid_size;
> +   typed_memcpy(dest->sample_location.locations,
> +src->sample_location.locations,
> +src->sample_location.count);
> +   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
> +   }
> +   }
> +
> cmd_buffer->state.dirty |= dest_mask;
>  }
>
> @@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
> *cmd_buffer,
> }
>  }
>
> +/**
> + * Convert the user sample locations to hardware sample locations (the values
> + * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
> + */
> +static void
> +radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
> + uint32_t x, uint32_t y, VkOffset2D *sample_locs)
> +{
> +   uint32_t x_offset = x % state->grid_size.width;
> +   uint32_t y_offset = y % state->grid_size.height;
> +   uint32_t num_samples = (uint32_t)state->per_pixel;
> +   VkSampleLocationEXT *user_locs;
> +   uint32_t pixel_offset;
> +
> +   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
> num_samples;
> +
> +   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
> +   user_locs = >locations[pixel_offset];
> +
> +   for (uint32_t i = 0; i < num_samples; i++) {
> +   float shifted_pos_x = user_locs[i].x - 0.5;
> +   float shifted_pos_y = user_locs[i].y - 0.5;
> +
> +   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
> +   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
> +
> +   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
> +   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
> +   }
> +}
> +
> +/**
> + * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
> + * locations.
> + */
> +static void
> +radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D 

[Mesa-dev] [PATCH] radv: implement VK_EXT_sample_locations

2018-12-07 Thread Samuel Pitoiset
Basically, this extension allows applications to use custom
sample locations. This only implements the barely minimum.
It doesn't support variable sample locations during subpass.

Most of the dEQP-VK.pipeline.multisample.sample_locations_ext.*
CTS now pass.

Only enabled on VI+ because it's untested on older chips.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 177 +-
 src/amd/vulkan/radv_device.c  |  27 +
 src/amd/vulkan/radv_extensions.py |   1 +
 src/amd/vulkan/radv_pipeline.c|  30 +
 src/amd/vulkan/radv_private.h |  26 +++--
 5 files changed, 253 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b4aea5bc898..c4bebeda0ce 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -105,6 +105,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
dest->viewport.count = src->viewport.count;
dest->scissor.count = src->scissor.count;
dest->discard_rectangle.count = src->discard_rectangle.count;
+   dest->sample_location.count = src->sample_location.count;
 
if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
if (memcmp(>viewport.viewports, >viewport.viewports,
@@ -192,6 +193,22 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
}
}
 
+   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+   if (dest->sample_location.per_pixel != 
src->sample_location.per_pixel ||
+   dest->sample_location.grid_size.width != 
src->sample_location.grid_size.width ||
+   dest->sample_location.grid_size.height != 
src->sample_location.grid_size.height ||
+   memcmp(>sample_location.locations,
+  >sample_location.locations,
+  src->sample_location.count * 
sizeof(VkSampleLocationEXT))) {
+   dest->sample_location.per_pixel = 
src->sample_location.per_pixel;
+   dest->sample_location.grid_size = 
src->sample_location.grid_size;
+   typed_memcpy(dest->sample_location.locations,
+src->sample_location.locations,
+src->sample_location.count);
+   dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+   }
+   }
+
cmd_buffer->state.dirty |= dest_mask;
 }
 
@@ -634,6 +651,135 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer 
*cmd_buffer,
}
 }
 
+/**
+ * Convert the user sample locations to hardware sample locations (the values
+ * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
+ */
+static void
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
+ uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+{
+   uint32_t x_offset = x % state->grid_size.width;
+   uint32_t y_offset = y % state->grid_size.height;
+   uint32_t num_samples = (uint32_t)state->per_pixel;
+   VkSampleLocationEXT *user_locs;
+   uint32_t pixel_offset;
+
+   pixel_offset = (x_offset + y_offset * state->grid_size.width) * 
num_samples;
+
+   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+   user_locs = >locations[pixel_offset];
+
+   for (uint32_t i = 0; i < num_samples; i++) {
+   float shifted_pos_x = user_locs[i].x - 0.5;
+   float shifted_pos_y = user_locs[i].y - 0.5;
+
+   int32_t scaled_pos_x = floor(shifted_pos_x * 16);
+   int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+
+   sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+   sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+   }
+}
+
+/**
+ * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
+ * locations.
+ */
+static void
+radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
+  uint32_t *sample_locs_pixel)
+{
+   for (uint32_t i = 0; i < num_samples; ++i) {
+   uint32_t sample_reg_idx = i / 4;
+   uint32_t sample_loc_idx = i % 4;
+   int32_t pos_x = sample_locs[i].x;
+   int32_t pos_y = sample_locs[i].y;
+
+   uint32_t shift_x = 8 * sample_loc_idx;
+   uint32_t shift_y = shift_x + 4;
+
+   sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+   sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+   }
+}
+
+/**
+ * Emit the sample locations that are specified with VK_EXT_sample_locations.
+ */
+static void
+radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
+{
+   struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   struct radv_multisample_state *ms = >graphics.ms;
+   struct radv_sample_locations_state *sample_location =
+