Re: [Mesa-dev] [PATCH] anv: Do color resolve tracking one slice at a time for 3D images

2018-02-02 Thread Jason Ekstrand
On Fri, Feb 2, 2018 at 6:55 PM, Nanley Chery  wrote:

> On Thu, Feb 01, 2018 at 06:31:18PM -0800, Jason Ekstrand wrote:
> > ---
> >  src/intel/vulkan/anv_image.c   | 14 +-
>
> We should also update the comment in anv_image that describes 3D as
> having one slice per LOD.
>

Yup.  Fixed locally.  I found a couple more bugs with my branch to do aux
in GENERAL layout.  That one suddenly forced more resolves and pointed out
some 3D bugs the other didn't.


> >  src/intel/vulkan/anv_private.h |  9 -
> >  src/intel/vulkan/genX_cmd_buffer.c | 34 --
> 
> >  3 files changed, 33 insertions(+), 24 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 6008e3c..a3e857c 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -262,11 +262,15 @@ add_aux_state_tracking_buffer(struct anv_image
> *image,
> > /* Clear color and fast clear type */
> > unsigned state_size = device->isl_dev.ss.clear_value_size + 4;
> >
> > -   /* We only need to track compression on CCS_E surfaces.  We don't
> consider
> > -* 3D images as actually having multiple array layers.
> > -*/
> > -   if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
> > -  state_size += image->levels * image->array_size * 4;
> > +   /* We only need to track compression on CCS_E surfaces. */
> > +   if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) {
> > +  if (image->type == VK_IMAGE_TYPE_3D) {
> > + for (uint32_t l = 0; l < image->levels; l++)
> > +state_size += anv_minify(image->extent.depth, l) * 4;
> > +  } else {
> > + state_size += image->levels * image->array_size * 4;
> > +  }
> > +   }
> >
> > image->planes[plane].fast_clear_state_offset =
> >image->planes[plane].offset + image->planes[plane].size;
> > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> > index 0cd94bf..f208618 100644
> > --- a/src/intel/vulkan/anv_private.h
> > +++ b/src/intel/vulkan/anv_private.h
> > @@ -2573,8 +2573,15 @@ anv_image_get_compression_state_addr(const
> struct anv_device *device,
> > struct anv_address addr =
> >anv_image_get_fast_clear_type_addr(device, image, aspect);
> > addr.offset += 4; /* Go past the fast clear type */
> > -   addr.offset += level * image->array_size * 4;
> > +
> > +   if (image->type == VK_IMAGE_TYPE_3D) {
> > +  for (uint32_t l = 0; l < image->levels; l++)
> > + addr.offset += anv_minify(image->extent.depth, l) * 4;
> > +   } else {
> > +  addr.offset += level * image->array_size * 4;
> > +   }
> > addr.offset += array_layer * 4;
> > +
> > return addr;
> >  }
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index e29228d..b4b6b7d 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -632,14 +632,8 @@ anv_cmd_predicated_ccs_resolve(struct
> anv_cmd_buffer *cmd_buffer,
> >mip.CompareOperation = COMPARE_SRCS_EQUAL;
> > }
> >
> > -   if (image->type == VK_IMAGE_TYPE_3D) {
> > -  anv_image_ccs_op(cmd_buffer, image, aspect, level,
> > -   0, anv_minify(image->extent.depth, level),
> > -   resolve_op, true);
> > -   } else {
> > -  anv_image_ccs_op(cmd_buffer, image, aspect, level,
> > -   array_layer, 1, resolve_op, true);
> > -   }
> > +   anv_image_ccs_op(cmd_buffer, image, aspect, level,
> > +array_layer, 1, resolve_op, true);
> >  }
> >
> >  void
> > @@ -836,9 +830,6 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> > base_layer, layer_count);
> > }
> >
> > -   if (image->type == VK_IMAGE_TYPE_3D)
> > -  base_layer = 0;
> > -
> > if (base_layer >= anv_image_aux_layers(image, aspect, base_level))
> >return;
> >
> > @@ -897,10 +888,6 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> >  uint32_t level_layer_count =
> > MIN2(layer_count, anv_image_aux_layers(image, aspect,
> level));
> >
> > -/* A transition of a 3D subresource works on all slices. */
> > -if (image->type == VK_IMAGE_TYPE_3D)
> > -   level_layer_count = anv_minify(image->extent.depth,
> level);
> > -
> >  anv_image_ccs_op(cmd_buffer, image, aspect, level,
> >   base_layer, level_layer_count,
> >   ISL_AUX_OP_AMBIGUATE, false);
> > @@ -994,7 +981,10 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> >
> > for (uint32_t l = 0; l < level_count; l++) {
> >uint32_t level = base_level + l;
> > -  for (uint32_t a = 0; a < layer_count; a++) {
> > +  uint32_t level_layer_count =
> > + MIN2(layer_count, 

Re: [Mesa-dev] [PATCH 2/2] i965: Create new program cache bo when clearing the program cache

2018-02-02 Thread Kenneth Graunke
On Friday, February 2, 2018 5:07:01 PM PST Jordan Justen wrote:
> When the disk shader cache CI testing was enabled, we started noticing
> occasional failures on deqp test runs. (Mainly SNB, rarely HSW)
> 
> Before this change, when we cleared the (in memory) program cache we
> reused the same bo. Since the disk shader cache quickly restores
> programs, it appears that this would lead to overwrites of the older
> program binaries in the in memory program cache that apparently were
> still executing in some cases. If these programs were still executing,
> this could cause a GPU hang.
> 
> This issue probably is not disk shader cache specific, but rather may
> have been hidden since the compiler would take some time to recompile
> programs after the cache was cleared.
> 
> Cc: Kenneth Graunke 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_program_cache.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
> b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index f084f94f929..a6638c3d302 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -448,6 +448,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache 
> *cache)
> brw->cs.base.prog_data = NULL;
>  
> intel_batchbuffer_flush(brw);
> +   if (cache->bo)
> +  brw_cache_new_bo(cache, cache->bo->size, false);
>  }
>  
>  void
> 

Also, please Cc: mesa-sta...@lists.freedesktop.org for this patch.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Create new program cache bo when clearing the program cache

2018-02-02 Thread Kenneth Graunke
On Friday, February 2, 2018 5:07:01 PM PST Jordan Justen wrote:
> When the disk shader cache CI testing was enabled, we started noticing
> occasional failures on deqp test runs. (Mainly SNB, rarely HSW)
> 
> Before this change, when we cleared the (in memory) program cache we
> reused the same bo. Since the disk shader cache quickly restores
> programs, it appears that this would lead to overwrites of the older
> program binaries in the in memory program cache that apparently were
> still executing in some cases. If these programs were still executing,
> this could cause a GPU hang.
> 
> This issue probably is not disk shader cache specific, but rather may
> have been hidden since the compiler would take some time to recompile
> programs after the cache was cleared.
> 
> Cc: Kenneth Graunke 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_program_cache.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
> b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index f084f94f929..a6638c3d302 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -448,6 +448,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache 
> *cache)
> brw->cs.base.prog_data = NULL;
>  
> intel_batchbuffer_flush(brw);
> +   if (cache->bo)
> +  brw_cache_new_bo(cache, cache->bo->size, false);
>  }
>  
>  void
> 

I'd kind of expected this to be called from brw_program_cache_check_size
instead...was worried we'd be making a new BO on brw_destroy_cache...but
in that case, we've destroyed cache->bo and made it NULL, so this NULL
check will guarantee that we do the right thing.

Either way's fine with me.

With the extra boolean parameter dropped, this is
Reviewed-by: Kenneth Graunke 



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Add copy param to brw_cache_new_bo

2018-02-02 Thread Kenneth Graunke
On Friday, February 2, 2018 5:07:00 PM PST Jordan Justen wrote:
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_program_cache.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
> b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index 9266273b5da..f084f94f929 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -213,7 +213,7 @@ brw_search_cache(struct brw_cache *cache,
>  }
>  
>  static void
> -brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
> +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size, bool copy)
>  {
> struct brw_context *brw = cache->brw;
> struct brw_bo *new_bo;
> @@ -229,7 +229,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t 
> new_size)
> MAP_ASYNC | MAP_PERSISTENT);
>  
> /* Copy any existing data that needs to be saved. */
> -   if (cache->next_offset != 0) {
> +   if (copy && cache->next_offset != 0) {
>  #ifdef USE_SSE41
>if (!cache->bo->cache_coherent && cpu_has_sse4_1)
>   _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
> @@ -286,7 +286,7 @@ brw_alloc_item_data(struct brw_cache *cache, uint32_t 
> size)
>while (cache->next_offset + size > new_size)
>   new_size *= 2;
>  
> -  brw_cache_new_bo(cache, new_size);
> +  brw_cache_new_bo(cache, new_size, true);
> }
>  
> offset = cache->next_offset;
> 

I don't think you need this new parameter...we can just arrange for
cache->next_offset to be reset to 0 before calling brw_cache_new_bo,
at which point it'll see there's nothing to copy, and skip it.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/blorp: Use the hardware op for CCS ambiguate on gen10+

2018-02-02 Thread Jason Ekstrand
On Fri, Feb 2, 2018 at 6:47 PM, Nanley Chery  wrote:

> On Tue, Jan 30, 2018 at 05:20:07PM -0800, Jason Ekstrand wrote:
> > Completely untested.
>
> The message in your fdo branch looks good.
>
> > ---
> >  src/intel/blorp/blorp_clear.c | 12 +++-
> >  src/intel/blorp/blorp_genX_exec.h |  6 ++
> >  2 files changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/intel/blorp/blorp_clear.c
> b/src/intel/blorp/blorp_clear.c
> > index dd29d9e..32ec31b 100644
> > --- a/src/intel/blorp/blorp_clear.c
> > +++ b/src/intel/blorp/blorp_clear.c
> > @@ -758,7 +758,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> > params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
> > params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
> >
> > -   if (batch->blorp->isl_dev->info->gen >= 9) {
> > +   if (batch->blorp->isl_dev->info->gen >= 10) {
> > +  assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
> > + resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE ||
> > + resolve_op == ISL_AUX_OP_AMBIGUATE);
> > +   } else if (batch->blorp->isl_dev->info->gen >= 9) {
> >assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
> >   resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
> > } else {
> > @@ -893,6 +897,12 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
> >  struct blorp_surf *surf,
> >  uint32_t level, uint32_t layer)
> >  {
> > +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 10) {
> > +  /* On gen10 and above, we have a hardware resolve op for this */
> > +  return blorp_ccs_resolve(batch, surf, level, layer, 1,
> > +   surf->surf->format,
> ISL_AUX_OP_AMBIGUATE);
>
> The HW docs describe the fast-clear-to-0 as occuring during a clear pass.
> Why are we doing it in a resolve pass?
>

The only difference between the two for CCS is that we have a bit of extra
alignment for fast-clears.  I've combed through all the docs I can find in
the bspec and I can't find the alignment requirement anymore.  In fact, I
found a nice little SKL+ line that says "The Resolve Rectangle size is same
as Clear Rectangle size from SKL+".  The extra alignment isn't hurting
anything but it also means that there's no real difference between clears
and resolves anymore.

--Jason



> > +   }
> > +
> > struct blorp_params params;
> > blorp_params_init();
> >
> > diff --git a/src/intel/blorp/blorp_genX_exec.h
> b/src/intel/blorp/blorp_genX_exec.h
> > index 5e1312a..85abf6b 100644
> > --- a/src/intel/blorp/blorp_genX_exec.h
> > +++ b/src/intel/blorp/blorp_genX_exec.h
> > @@ -752,6 +752,12 @@ blorp_emit_ps_config(struct blorp_batch *batch,
> >switch (params->fast_clear_op) {
> >case ISL_AUX_OP_NONE:
> >   break;
> > +#if GEN_GEN >= 10
> > +  case ISL_AUX_OP_AMBIGUATE:
> > + ps.RenderTargetFastClearEnable = true;
> > + ps.RenderTargetResolveType = FAST_CLEAR_0;
> > + break;
> > +#endif
> >  #if GEN_GEN >= 9
> >case ISL_AUX_OP_PARTIAL_RESOLVE:
> >   ps.RenderTargetResolveType = RESOLVE_PARTIAL;
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-02-02 Thread Nanley Chery
On Fri, Feb 02, 2018 at 09:02:25PM -0800, Jason Ekstrand wrote:
> On Fri, Feb 2, 2018 at 5:58 PM, Nanley Chery  wrote:
> 
> > On Fri, Feb 02, 2018 at 04:42:14PM -0800, Jason Ekstrand wrote:
> > > On Fri, Feb 2, 2018 at 2:39 PM, Nanley Chery 
> > wrote:
> > >
> > > > On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> > > > > Now that we're tracking aux properly per-slice, we can enable this
> > for
> > > > > applications which actually care.
> > > > > ---
> > > > >  src/intel/vulkan/anv_blorp.c   | 22 +++---
> > > > >  src/intel/vulkan/genX_cmd_buffer.c | 13 +
> > > > >  2 files changed, 24 insertions(+), 11 deletions(-)
> > > > >
> > > > > diff --git a/src/intel/vulkan/anv_blorp.c
> > b/src/intel/vulkan/anv_blorp.c
> > > > > index 594b0d8..73a44fd 100644
> > > > > --- a/src/intel/vulkan/anv_blorp.c
> > > > > +++ b/src/intel/vulkan/anv_blorp.c
> > > > > @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct
> > > > anv_cmd_buffer *cmd_buffer)
> > > > > image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > > > att_state->aux_usage, );
> > > > >
> > > > > +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> > > > > +  uint32_t layer_count = fb->layers;
> > > > > +
> > > > >if (att_state->fast_clear) {
> > > > >   surf.clear_color = vk_to_isl_color(att_state->
> > > > clear_value.color);
> > > > >
> > > > > + /* We only support fast-clears on the first layer */
> > > > > + assert(iview->planes[0].isl.base_level == 0);
> > > > > + assert(iview->planes[0].isl.base_array_layer == 0);
> > > > > +
> > > > >   /* From the Sky Lake PRM Vol. 7, "Render Target Fast
> > Clear":
> > > > >*
> > > > >*"After Render target fast clear, pipe-control with
> > color
> > > > cache
> > > > > @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct
> > > > anv_cmd_buffer *cmd_buffer)
> > > > >
> > > > >   assert(image->n_planes == 1);
> > > > >   blorp_fast_clear(, ,
> > iview->planes[0].isl.format,
> > > > > -  iview->planes[0].isl.base_level,
> > > > > -  iview->planes[0].isl.base_array_layer,
> > > > fb->layers,
> > > > > +  iview->planes[0].isl.base_level,
> > base_layer,
> > > > 1,
> > > > >render_area.offset.x,
> > render_area.offset.y,
> > > > >render_area.offset.x +
> > > > render_area.extent.width,
> > > > >render_area.offset.y +
> > > > render_area.extent.height);
> > > > > + base_layer++;
> > > > > + layer_count--;
> > > > >
> > > > >   cmd_buffer->state.pending_pipe_bits |=
> > > > >  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
> > > > ANV_PIPE_CS_STALL_BIT;
> > > > > -  } else {
> > > > > +  }
> > > > > +
> > > > > +  if (layer_count > 0) {
> > > > >   assert(image->n_planes == 1);
> > > > >   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> > > > >
> >  VK_IMAGE_ASPECT_COLOR_BIT,
> > > > > att_state->aux_usage,
> > > > >
> >  iview->planes[0].isl.base_
> > > > level,
> > > > > -
> >  iview->planes[0].isl.base_
> > > > array_layer,
> > > > > -   fb->layers);
> > > > > +   base_layer, layer_count);
> > > > >
> > > > >   blorp_clear(, , iview->planes[0].isl.format,
> > > > >   anv_swizzle_for_render(iview->
> > > > planes[0].isl.swizzle),
> > > > > - iview->planes[0].isl.base_level,
> > > > > - iview->planes[0].isl.base_array_layer,
> > fb->layers,
> > > > > + iview->planes[0].isl.base_level, base_layer,
> > > > layer_count,
> > > > >   render_area.offset.x, render_area.offset.y,
> > > > >   render_area.offset.x +
> > render_area.extent.width,
> > > > >   render_area.offset.y +
> > render_area.extent.height,
> > > > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > > > b/src/intel/vulkan/genX_cmd_buffer.c
> > > > > index 4c83a5c..484246d 100644
> > > > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > > > @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct
> > > > anv_device * device,
> > > > > */
> > > > >if (att_state->fast_clear &&
> > > > >(iview->planes[0].isl.base_level > 0 ||
> > > > > -   iview->image->type == VK_IMAGE_TYPE_3D ||
> > > > > -   iview->image->array_size > 0)) {
> > > > > +   iview->planes[0].isl.base_array_layer > 0 ||
> > > > > +   cmd_state->framebuffer->layers > 1)) {
> > > > >   anv_perf_warn(device->instance, 

Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-02-02 Thread Jason Ekstrand
On Fri, Feb 2, 2018 at 5:58 PM, Nanley Chery  wrote:

> On Fri, Feb 02, 2018 at 04:42:14PM -0800, Jason Ekstrand wrote:
> > On Fri, Feb 2, 2018 at 2:39 PM, Nanley Chery 
> wrote:
> >
> > > On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> > > > Now that we're tracking aux properly per-slice, we can enable this
> for
> > > > applications which actually care.
> > > > ---
> > > >  src/intel/vulkan/anv_blorp.c   | 22 +++---
> > > >  src/intel/vulkan/genX_cmd_buffer.c | 13 +
> > > >  2 files changed, 24 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/src/intel/vulkan/anv_blorp.c
> b/src/intel/vulkan/anv_blorp.c
> > > > index 594b0d8..73a44fd 100644
> > > > --- a/src/intel/vulkan/anv_blorp.c
> > > > +++ b/src/intel/vulkan/anv_blorp.c
> > > > @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct
> > > anv_cmd_buffer *cmd_buffer)
> > > > image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > > att_state->aux_usage, );
> > > >
> > > > +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> > > > +  uint32_t layer_count = fb->layers;
> > > > +
> > > >if (att_state->fast_clear) {
> > > >   surf.clear_color = vk_to_isl_color(att_state->
> > > clear_value.color);
> > > >
> > > > + /* We only support fast-clears on the first layer */
> > > > + assert(iview->planes[0].isl.base_level == 0);
> > > > + assert(iview->planes[0].isl.base_array_layer == 0);
> > > > +
> > > >   /* From the Sky Lake PRM Vol. 7, "Render Target Fast
> Clear":
> > > >*
> > > >*"After Render target fast clear, pipe-control with
> color
> > > cache
> > > > @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct
> > > anv_cmd_buffer *cmd_buffer)
> > > >
> > > >   assert(image->n_planes == 1);
> > > >   blorp_fast_clear(, ,
> iview->planes[0].isl.format,
> > > > -  iview->planes[0].isl.base_level,
> > > > -  iview->planes[0].isl.base_array_layer,
> > > fb->layers,
> > > > +  iview->planes[0].isl.base_level,
> base_layer,
> > > 1,
> > > >render_area.offset.x,
> render_area.offset.y,
> > > >render_area.offset.x +
> > > render_area.extent.width,
> > > >render_area.offset.y +
> > > render_area.extent.height);
> > > > + base_layer++;
> > > > + layer_count--;
> > > >
> > > >   cmd_buffer->state.pending_pipe_bits |=
> > > >  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
> > > ANV_PIPE_CS_STALL_BIT;
> > > > -  } else {
> > > > +  }
> > > > +
> > > > +  if (layer_count > 0) {
> > > >   assert(image->n_planes == 1);
> > > >   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> > > >
>  VK_IMAGE_ASPECT_COLOR_BIT,
> > > > att_state->aux_usage,
> > > >
>  iview->planes[0].isl.base_
> > > level,
> > > > -
>  iview->planes[0].isl.base_
> > > array_layer,
> > > > -   fb->layers);
> > > > +   base_layer, layer_count);
> > > >
> > > >   blorp_clear(, , iview->planes[0].isl.format,
> > > >   anv_swizzle_for_render(iview->
> > > planes[0].isl.swizzle),
> > > > - iview->planes[0].isl.base_level,
> > > > - iview->planes[0].isl.base_array_layer,
> fb->layers,
> > > > + iview->planes[0].isl.base_level, base_layer,
> > > layer_count,
> > > >   render_area.offset.x, render_area.offset.y,
> > > >   render_area.offset.x +
> render_area.extent.width,
> > > >   render_area.offset.y +
> render_area.extent.height,
> > > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > > b/src/intel/vulkan/genX_cmd_buffer.c
> > > > index 4c83a5c..484246d 100644
> > > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > > @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct
> > > anv_device * device,
> > > > */
> > > >if (att_state->fast_clear &&
> > > >(iview->planes[0].isl.base_level > 0 ||
> > > > -   iview->image->type == VK_IMAGE_TYPE_3D ||
> > > > -   iview->image->array_size > 0)) {
> > > > +   iview->planes[0].isl.base_array_layer > 0 ||
> > > > +   cmd_state->framebuffer->layers > 1)) {
> > > >   anv_perf_warn(device->instance, iview->image,
> > > > "Rendering to a multi-LOD or multi-layer
> > > framebuffer "
> > > > -   "with LOAD_OP_CLEAR.  Not fast-clearing");
> > > > - att_state->fast_clear = false;
> > > > +   "with LOAD_OP_CLEAR.  

Re: [Mesa-dev] [PATCH] anv: Do color resolve tracking one slice at a time for 3D images

2018-02-02 Thread Nanley Chery
On Thu, Feb 01, 2018 at 06:31:18PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_image.c   | 14 +-

We should also update the comment in anv_image that describes 3D as
having one slice per LOD.

>  src/intel/vulkan/anv_private.h |  9 -
>  src/intel/vulkan/genX_cmd_buffer.c | 34 --
>  3 files changed, 33 insertions(+), 24 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 6008e3c..a3e857c 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -262,11 +262,15 @@ add_aux_state_tracking_buffer(struct anv_image *image,
> /* Clear color and fast clear type */
> unsigned state_size = device->isl_dev.ss.clear_value_size + 4;
>  
> -   /* We only need to track compression on CCS_E surfaces.  We don't consider
> -* 3D images as actually having multiple array layers.
> -*/
> -   if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
> -  state_size += image->levels * image->array_size * 4;
> +   /* We only need to track compression on CCS_E surfaces. */
> +   if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E) {
> +  if (image->type == VK_IMAGE_TYPE_3D) {
> + for (uint32_t l = 0; l < image->levels; l++)
> +state_size += anv_minify(image->extent.depth, l) * 4;
> +  } else {
> + state_size += image->levels * image->array_size * 4;
> +  }
> +   }
>  
> image->planes[plane].fast_clear_state_offset =
>image->planes[plane].offset + image->planes[plane].size;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 0cd94bf..f208618 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2573,8 +2573,15 @@ anv_image_get_compression_state_addr(const struct 
> anv_device *device,
> struct anv_address addr =
>anv_image_get_fast_clear_type_addr(device, image, aspect);
> addr.offset += 4; /* Go past the fast clear type */
> -   addr.offset += level * image->array_size * 4;
> +
> +   if (image->type == VK_IMAGE_TYPE_3D) {
> +  for (uint32_t l = 0; l < image->levels; l++)
> + addr.offset += anv_minify(image->extent.depth, l) * 4;
> +   } else {
> +  addr.offset += level * image->array_size * 4;
> +   }
> addr.offset += array_layer * 4;
> +
> return addr;
>  }
>  
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index e29228d..b4b6b7d 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -632,14 +632,8 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer 
> *cmd_buffer,
>mip.CompareOperation = COMPARE_SRCS_EQUAL;
> }
>  
> -   if (image->type == VK_IMAGE_TYPE_3D) {
> -  anv_image_ccs_op(cmd_buffer, image, aspect, level,
> -   0, anv_minify(image->extent.depth, level),
> -   resolve_op, true);
> -   } else {
> -  anv_image_ccs_op(cmd_buffer, image, aspect, level,
> -   array_layer, 1, resolve_op, true);
> -   }
> +   anv_image_ccs_op(cmd_buffer, image, aspect, level,
> +array_layer, 1, resolve_op, true);
>  }
>  
>  void
> @@ -836,9 +830,6 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> base_layer, layer_count);
> }
>  
> -   if (image->type == VK_IMAGE_TYPE_3D)
> -  base_layer = 0;
> -
> if (base_layer >= anv_image_aux_layers(image, aspect, base_level))
>return;
>  
> @@ -897,10 +888,6 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>  uint32_t level_layer_count =
> MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
>  
> -/* A transition of a 3D subresource works on all slices. */
> -if (image->type == VK_IMAGE_TYPE_3D)
> -   level_layer_count = anv_minify(image->extent.depth, level);
> -
>  anv_image_ccs_op(cmd_buffer, image, aspect, level,
>   base_layer, level_layer_count,
>   ISL_AUX_OP_AMBIGUATE, false);
> @@ -994,7 +981,10 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>  
> for (uint32_t l = 0; l < level_count; l++) {
>uint32_t level = base_level + l;
> -  for (uint32_t a = 0; a < layer_count; a++) {
> +  uint32_t level_layer_count =
> + MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
> +
> +  for (uint32_t a = 0; a < level_layer_count; a++) {
>   uint32_t array_layer = base_layer + a;
>   anv_cmd_predicated_ccs_resolve(cmd_buffer, image, aspect,
>  level, array_layer, resolve_op,
> @@ -1663,12 +1653,20 @@ void genX(CmdPipelineBarrier)(
>  anv_image_expand_aspects(image, range->aspectMask);
>   uint32_t aspect_bit;
>  
> + uint32_t 

Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Roland Scheidegger
Am 03.02.2018 um 03:12 schrieb Marek Olšák:
> On Sat, Feb 3, 2018 at 2:55 AM, Roland Scheidegger  wrote:
>> Am 03.02.2018 um 00:31 schrieb Marek Olšák:
>>> On Sat, Feb 3, 2018 at 12:01 AM, Roland Scheidegger  
>>> wrote:
 Am 02.02.2018 um 23:39 schrieb Marek Olšák:
> On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
> wrote:
>> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
>>> Hi,
>>>
>>> This is the second and hopefully final version of 32-bit pointer
>>> support for radeonsi.
>>>
>>> Constant buffer 0 now has restrictions on which buffers can be set
>>> in that slot.
>>>
>>> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
>>> will be accepted there).
>>>
>>> There will also be a dependency on new libdrm (not included in this
>>> series).
>>>
>>> Please review.
>>>
>>
>> From a api cleanliness point of view, I don't like this much.
>> First, you're making the hack case the default and even require it. IMHO
>> a driver should be able to bind ordinary UBOs to all buffer slots. This
>> is really not a nice burden to put on state trackers to do something
>> special for just slot 0. The gallium API should stay reasonable imho,
>> that's a bit too much custom tailoring for GL for my liking.
>>
>> Maybe I'm missing something but I can't quite see why you can't handle
>> this transparently inside the driver. Can't you just create a different
>> shader depending on what kind of buffer is bound or what's the problem?
>> (You wouldn't expect it to change therefore you should not have to
>> recompile.)
>
> We don't recompile shaders in the vast majority of cases. When shader
> compilation stalls rendering, the gaming experience is destroyed.
>
> There is no alternative. Our shader ABI will be set up such that it
> only has 32-bit pointers in shader registers. There are
> performance-related reasons for that.

 That seems to be quite limited, why can't you have a shader ABI which
 can do either 32 or 64 bit pointers?
>>>
>>> Good questions. GCN shaders have only 16 dwords of constant memory
>>> (GFX9 has 32). There are no shader resource slots and the pixel shader
>>> is the only one to have real inputs. All other stages don't have any
>>> shader inputs except for system values.
>>>
>>> The 16 dwords contain pointers and states to load inputs and load
>>> descriptions of resource slots from memory. One of the pointers
>>> sometimes points to constant buffer 0. If it's a VS, there are only 13
>>> dwords, because 3 are reserved for baseinstance, basevertex, and
>>> drawID. We can also put some other data into that constant memory to
>>> skip load instructions. There is a huge incentive to free those
>>> precious dwords and use them for something else, like skipping some
>>> load instructions. I've been also considering 16-bit pointers (e.g.
>>> 32-bit pointers aligned to 64KB).
>>>
>>
>> Ok, so for other buffers you can't really do anything special? You just
>> go through a pointer to array-of-pointer lookup?
> 
> By default, the shader gets a pointer that points to a merged list of
> constant buffer and shader buffer descriptions in memory. If a shader
> only uses constant buffer 0 and no shader buffers, that pointer points
> to constant buffer 0 directly.
Ahh so you can easily guarantee a 32bit pointer if you use the
descriptor list (as that's a driver-internal allocation), in which case
the actual buffer address size doesn't matter (?), but not if you use
the optimization when only constant buffer 0 is used?
Albeit that also means you can't do any such optimization for other
cases (say, a simple shader only using UBO 0 as that will end up as
constant buffer 1, without a guaranteed 32bit address).
So I guess indeed if that optimization is worth it, your options are
limited (if you don't want a shader dependency on the actual type of
buffer bound).

Roland

> 
>> I thought "proper" apps would just use UBOs for everything these days
>> (hence nothing really much need for tuning slot 0). But maybe that's not
>> actually true... I can see that you'd want to optimize usage of this
>> precious space. I suppose GL doesn't give you much help there with its
>> iffy buffer handling.
> 
> Yes, games use UBOs.
> 
> Marek
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/blorp: Use the hardware op for CCS ambiguate on gen10+

2018-02-02 Thread Nanley Chery
On Tue, Jan 30, 2018 at 05:20:07PM -0800, Jason Ekstrand wrote:
> Completely untested.

The message in your fdo branch looks good.

> ---
>  src/intel/blorp/blorp_clear.c | 12 +++-
>  src/intel/blorp/blorp_genX_exec.h |  6 ++
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index dd29d9e..32ec31b 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -758,7 +758,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
> params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
>  
> -   if (batch->blorp->isl_dev->info->gen >= 9) {
> +   if (batch->blorp->isl_dev->info->gen >= 10) {
> +  assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
> + resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE ||
> + resolve_op == ISL_AUX_OP_AMBIGUATE);
> +   } else if (batch->blorp->isl_dev->info->gen >= 9) {
>assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
>   resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
> } else {
> @@ -893,6 +897,12 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
>  struct blorp_surf *surf,
>  uint32_t level, uint32_t layer)
>  {
> +   if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 10) {
> +  /* On gen10 and above, we have a hardware resolve op for this */
> +  return blorp_ccs_resolve(batch, surf, level, layer, 1,
> +   surf->surf->format, ISL_AUX_OP_AMBIGUATE);

The HW docs describe the fast-clear-to-0 as occuring during a clear pass.
Why are we doing it in a resolve pass?

> +   }
> +
> struct blorp_params params;
> blorp_params_init();
>  
> diff --git a/src/intel/blorp/blorp_genX_exec.h 
> b/src/intel/blorp/blorp_genX_exec.h
> index 5e1312a..85abf6b 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -752,6 +752,12 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>switch (params->fast_clear_op) {
>case ISL_AUX_OP_NONE:
>   break;
> +#if GEN_GEN >= 10
> +  case ISL_AUX_OP_AMBIGUATE:
> + ps.RenderTargetFastClearEnable = true;
> + ps.RenderTargetResolveType = FAST_CLEAR_0;
> + break;
> +#endif
>  #if GEN_GEN >= 9
>case ISL_AUX_OP_PARTIAL_RESOLVE:
>   ps.RenderTargetResolveType = RESOLVE_PARTIAL;
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
On Sat, Feb 3, 2018 at 2:55 AM, Roland Scheidegger  wrote:
> Am 03.02.2018 um 00:31 schrieb Marek Olšák:
>> On Sat, Feb 3, 2018 at 12:01 AM, Roland Scheidegger  
>> wrote:
>>> Am 02.02.2018 um 23:39 schrieb Marek Olšák:
 On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
 wrote:
> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
>> Hi,
>>
>> This is the second and hopefully final version of 32-bit pointer
>> support for radeonsi.
>>
>> Constant buffer 0 now has restrictions on which buffers can be set
>> in that slot.
>>
>> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
>> will be accepted there).
>>
>> There will also be a dependency on new libdrm (not included in this
>> series).
>>
>> Please review.
>>
>
> From a api cleanliness point of view, I don't like this much.
> First, you're making the hack case the default and even require it. IMHO
> a driver should be able to bind ordinary UBOs to all buffer slots. This
> is really not a nice burden to put on state trackers to do something
> special for just slot 0. The gallium API should stay reasonable imho,
> that's a bit too much custom tailoring for GL for my liking.
>
> Maybe I'm missing something but I can't quite see why you can't handle
> this transparently inside the driver. Can't you just create a different
> shader depending on what kind of buffer is bound or what's the problem?
> (You wouldn't expect it to change therefore you should not have to
> recompile.)

 We don't recompile shaders in the vast majority of cases. When shader
 compilation stalls rendering, the gaming experience is destroyed.

 There is no alternative. Our shader ABI will be set up such that it
 only has 32-bit pointers in shader registers. There are
 performance-related reasons for that.
>>>
>>> That seems to be quite limited, why can't you have a shader ABI which
>>> can do either 32 or 64 bit pointers?
>>
>> Good questions. GCN shaders have only 16 dwords of constant memory
>> (GFX9 has 32). There are no shader resource slots and the pixel shader
>> is the only one to have real inputs. All other stages don't have any
>> shader inputs except for system values.
>>
>> The 16 dwords contain pointers and states to load inputs and load
>> descriptions of resource slots from memory. One of the pointers
>> sometimes points to constant buffer 0. If it's a VS, there are only 13
>> dwords, because 3 are reserved for baseinstance, basevertex, and
>> drawID. We can also put some other data into that constant memory to
>> skip load instructions. There is a huge incentive to free those
>> precious dwords and use them for something else, like skipping some
>> load instructions. I've been also considering 16-bit pointers (e.g.
>> 32-bit pointers aligned to 64KB).
>>
>
> Ok, so for other buffers you can't really do anything special? You just
> go through a pointer to array-of-pointer lookup?

By default, the shader gets a pointer that points to a merged list of
constant buffer and shader buffer descriptions in memory. If a shader
only uses constant buffer 0 and no shader buffers, that pointer points
to constant buffer 0 directly.

> I thought "proper" apps would just use UBOs for everything these days
> (hence nothing really much need for tuning slot 0). But maybe that's not
> actually true... I can see that you'd want to optimize usage of this
> precious space. I suppose GL doesn't give you much help there with its
> iffy buffer handling.

Yes, games use UBOs.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-02-02 Thread Nanley Chery
On Fri, Feb 02, 2018 at 04:42:14PM -0800, Jason Ekstrand wrote:
> On Fri, Feb 2, 2018 at 2:39 PM, Nanley Chery  wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> > > Now that we're tracking aux properly per-slice, we can enable this for
> > > applications which actually care.
> > > ---
> > >  src/intel/vulkan/anv_blorp.c   | 22 +++---
> > >  src/intel/vulkan/genX_cmd_buffer.c | 13 +
> > >  2 files changed, 24 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > > index 594b0d8..73a44fd 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct
> > anv_cmd_buffer *cmd_buffer)
> > > image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > att_state->aux_usage, );
> > >
> > > +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> > > +  uint32_t layer_count = fb->layers;
> > > +
> > >if (att_state->fast_clear) {
> > >   surf.clear_color = vk_to_isl_color(att_state->
> > clear_value.color);
> > >
> > > + /* We only support fast-clears on the first layer */
> > > + assert(iview->planes[0].isl.base_level == 0);
> > > + assert(iview->planes[0].isl.base_array_layer == 0);
> > > +
> > >   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> > >*
> > >*"After Render target fast clear, pipe-control with color
> > cache
> > > @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct
> > anv_cmd_buffer *cmd_buffer)
> > >
> > >   assert(image->n_planes == 1);
> > >   blorp_fast_clear(, , iview->planes[0].isl.format,
> > > -  iview->planes[0].isl.base_level,
> > > -  iview->planes[0].isl.base_array_layer,
> > fb->layers,
> > > +  iview->planes[0].isl.base_level, base_layer,
> > 1,
> > >render_area.offset.x, render_area.offset.y,
> > >render_area.offset.x +
> > render_area.extent.width,
> > >render_area.offset.y +
> > render_area.extent.height);
> > > + base_layer++;
> > > + layer_count--;
> > >
> > >   cmd_buffer->state.pending_pipe_bits |=
> > >  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
> > ANV_PIPE_CS_STALL_BIT;
> > > -  } else {
> > > +  }
> > > +
> > > +  if (layer_count > 0) {
> > >   assert(image->n_planes == 1);
> > >   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> > > VK_IMAGE_ASPECT_COLOR_BIT,
> > > att_state->aux_usage,
> > > iview->planes[0].isl.base_
> > level,
> > > -   iview->planes[0].isl.base_
> > array_layer,
> > > -   fb->layers);
> > > +   base_layer, layer_count);
> > >
> > >   blorp_clear(, , iview->planes[0].isl.format,
> > >   anv_swizzle_for_render(iview->
> > planes[0].isl.swizzle),
> > > - iview->planes[0].isl.base_level,
> > > - iview->planes[0].isl.base_array_layer, fb->layers,
> > > + iview->planes[0].isl.base_level, base_layer,
> > layer_count,
> > >   render_area.offset.x, render_area.offset.y,
> > >   render_area.offset.x + render_area.extent.width,
> > >   render_area.offset.y + render_area.extent.height,
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > > index 4c83a5c..484246d 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct
> > anv_device * device,
> > > */
> > >if (att_state->fast_clear &&
> > >(iview->planes[0].isl.base_level > 0 ||
> > > -   iview->image->type == VK_IMAGE_TYPE_3D ||
> > > -   iview->image->array_size > 0)) {
> > > +   iview->planes[0].isl.base_array_layer > 0 ||
> > > +   cmd_state->framebuffer->layers > 1)) {
> > >   anv_perf_warn(device->instance, iview->image,
> > > "Rendering to a multi-LOD or multi-layer
> > framebuffer "
> > > -   "with LOAD_OP_CLEAR.  Not fast-clearing");
> > > - att_state->fast_clear = false;
> > > +   "with LOAD_OP_CLEAR.  Only fast-clearing the
> > first "
> > > +   "slice");
> > > +
> > > + /* Leave fast_clear enabled if we are clearing the first
> > slice. */
> > > + 

Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Roland Scheidegger
Am 03.02.2018 um 00:31 schrieb Marek Olšák:
> On Sat, Feb 3, 2018 at 12:01 AM, Roland Scheidegger  
> wrote:
>> Am 02.02.2018 um 23:39 schrieb Marek Olšák:
>>> On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
>>> wrote:
 Am 02.02.2018 um 21:48 schrieb Marek Olšák:
> Hi,
>
> This is the second and hopefully final version of 32-bit pointer
> support for radeonsi.
>
> Constant buffer 0 now has restrictions on which buffers can be set
> in that slot.
>
> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
> will be accepted there).
>
> There will also be a dependency on new libdrm (not included in this
> series).
>
> Please review.
>

 From a api cleanliness point of view, I don't like this much.
 First, you're making the hack case the default and even require it. IMHO
 a driver should be able to bind ordinary UBOs to all buffer slots. This
 is really not a nice burden to put on state trackers to do something
 special for just slot 0. The gallium API should stay reasonable imho,
 that's a bit too much custom tailoring for GL for my liking.

 Maybe I'm missing something but I can't quite see why you can't handle
 this transparently inside the driver. Can't you just create a different
 shader depending on what kind of buffer is bound or what's the problem?
 (You wouldn't expect it to change therefore you should not have to
 recompile.)
>>>
>>> We don't recompile shaders in the vast majority of cases. When shader
>>> compilation stalls rendering, the gaming experience is destroyed.
>>>
>>> There is no alternative. Our shader ABI will be set up such that it
>>> only has 32-bit pointers in shader registers. There are
>>> performance-related reasons for that.
>>
>> That seems to be quite limited, why can't you have a shader ABI which
>> can do either 32 or 64 bit pointers?
> 
> Good questions. GCN shaders have only 16 dwords of constant memory
> (GFX9 has 32). There are no shader resource slots and the pixel shader
> is the only one to have real inputs. All other stages don't have any
> shader inputs except for system values.
> 
> The 16 dwords contain pointers and states to load inputs and load
> descriptions of resource slots from memory. One of the pointers
> sometimes points to constant buffer 0. If it's a VS, there are only 13
> dwords, because 3 are reserved for baseinstance, basevertex, and
> drawID. We can also put some other data into that constant memory to
> skip load instructions. There is a huge incentive to free those
> precious dwords and use them for something else, like skipping some
> load instructions. I've been also considering 16-bit pointers (e.g.
> 32-bit pointers aligned to 64KB).
> 

Ok, so for other buffers you can't really do anything special? You just
go through a pointer to array-of-pointer lookup?
I thought "proper" apps would just use UBOs for everything these days
(hence nothing really much need for tuning slot 0). But maybe that's not
actually true... I can see that you'd want to optimize usage of this
precious space. I suppose GL doesn't give you much help there with its
iffy buffer handling.

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-02 Thread Nanley Chery
On Fri, Feb 02, 2018 at 02:39:25PM -0800, Jason Ekstrand wrote:
> On Fri, Feb 2, 2018 at 1:47 PM, Nanley Chery  wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> > > This commit completely reworks aux tracking.  This includes a number of
> > > somewhat distinct changes:
> > >
> > >  1) Since we are no longer fast-clearing multiple slices, we only need
> > > to track one fast clear color and one fast clear type.
> > >
> > >  2) We store two bits for fast clear instead of one to let us
> > > distinguish between zero and non-zero fast clear colors.  This is
> > > needed so that we can do full resolves when transitioning to
> > > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> > > values in all sorts of places wouldn't normally.
> >^
> > Missing word?  we ?
> >
> 
> Yup.  Fixed.
> 
> 
> > >
> > >  3) We now track compression state as a boolean separate from fast clear
> > > type and this is tracked on a per-slice granularity.
> > >
> > > The previous scheme had some issues when it came to individual slices of
> > > a multi-LOD images.  In particular, we only tracked "needs resolve"
> > > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> > > a portion of the image and would set "needs resolve" to false anyway.
> > > Also, any transition from an undefined layout would reset the clear
> > > color for the entire LOD regardless of whether or not there was some
> > > clear color on some other slice.
> > >
> > > As far as full/partial resolves go, he assumptions of the previous
> > > scheme held because the one case where we do need a full resolve when
> > > CCS_E is enabled is for window-system images.  Since we only ever
> > > allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> > > and we never got CCS_E.  With the advent of Y-tiled window-system
> > > buffers, we now need to properly support doing a full resolve of images
> > > marked CCS_E.
> > > ---
> > >  src/intel/vulkan/anv_blorp.c   |   3 +-
> > >  src/intel/vulkan/anv_image.c   |  96 ++-
> > >  src/intel/vulkan/anv_private.h |  53 +++---
> > >  src/intel/vulkan/genX_cmd_buffer.c | 340 +++---
> > ---
> > >  4 files changed, 331 insertions(+), 161 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > > index 3698543..594b0d8 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > > * particular value and don't care about format or clear value.
> > > */
> > >const struct anv_address clear_color_addr =
> > > - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > > -aspect, level);
> > > + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > aspect);
> > >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> > > }
> > >
> > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > > index 94b9ecb..d5f8dcf 100644
> > > --- a/src/intel/vulkan/anv_image.c
> > > +++ b/src/intel/vulkan/anv_image.c
> > > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct
> > gen_device_info *devinfo,
> > >   * fast-clear values in non-trivial cases (e.g., outside of a render
> > pass in
> > >   * which a fast clear has occurred).
> > >   *
> > > - * For the purpose of discoverability, the algorithm used to manage
> > this buffer
> > > - * is described here. A clear value in this buffer is updated when a
> > fast clear
> > > - * is performed on a subresource. One of two synchronization operations
> > is
> > > - * performed in order for a following memory access to use the
> > fast-clear
> > > - * value:
> > > - *a. Copy the value from the buffer to the surface state object
> > used for
> > > - *   reading. This is done implicitly when the value is the clear
> > value
> > > - *   predetermined to be the default in other surface state
> > objects. This
> > > - *   is currently only done explicitly for the operation below.
> > > - *b. Do (a) and use the surface state object to resolve the
> > subresource.
> > > - *   This is only done during layout transitions for decent
> > performance.
> > > + * In order to avoid having multiple clear colors for a single plane of
> > an
> > > + * image (hence a single RENDER_SURFACE_STATE), we only allow
> > fast-clears on
> > > + * the first slice (level 0, layer 0).  At the time of our testing (Jan
> > 17,
> > > + * 2018), there were known applications which would benefit from
> > fast-clearing
> > > + * more than just the first slice.
> > >   *
> > > - * With the above scheme, we can fast-clear whenever the hardware
> > allows except
> > > - * for two cases in which 

[Mesa-dev] [PATCH 2/2] i965: Create new program cache bo when clearing the program cache

2018-02-02 Thread Jordan Justen
When the disk shader cache CI testing was enabled, we started noticing
occasional failures on deqp test runs. (Mainly SNB, rarely HSW)

Before this change, when we cleared the (in memory) program cache we
reused the same bo. Since the disk shader cache quickly restores
programs, it appears that this would lead to overwrites of the older
program binaries in the in memory program cache that apparently were
still executing in some cases. If these programs were still executing,
this could cause a GPU hang.

This issue probably is not disk shader cache specific, but rather may
have been hidden since the compiler would take some time to recompile
programs after the cache was cleared.

Cc: Kenneth Graunke 
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_program_cache.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index f084f94f929..a6638c3d302 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -448,6 +448,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache 
*cache)
brw->cs.base.prog_data = NULL;
 
intel_batchbuffer_flush(brw);
+   if (cache->bo)
+  brw_cache_new_bo(cache, cache->bo->size, false);
 }
 
 void
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Add copy param to brw_cache_new_bo

2018-02-02 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_program_cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 9266273b5da..f084f94f929 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -213,7 +213,7 @@ brw_search_cache(struct brw_cache *cache,
 }
 
 static void
-brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
+brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size, bool copy)
 {
struct brw_context *brw = cache->brw;
struct brw_bo *new_bo;
@@ -229,7 +229,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
MAP_ASYNC | MAP_PERSISTENT);
 
/* Copy any existing data that needs to be saved. */
-   if (cache->next_offset != 0) {
+   if (copy && cache->next_offset != 0) {
 #ifdef USE_SSE41
   if (!cache->bo->cache_coherent && cpu_has_sse4_1)
  _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
@@ -286,7 +286,7 @@ brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
   while (cache->next_offset + size > new_size)
  new_size *= 2;
 
-  brw_cache_new_bo(cache, new_size);
+  brw_cache_new_bo(cache, new_size, true);
}
 
offset = cache->next_offset;
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/eg: use texture target to pick array size not view target (v2)

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 06:33 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This fixes a few CTS cases in :
> KHR-GL45.texture_view.view_sampling
> 
> some multisample cases are still broken, but not sure this is
> the same problem.
> 
> v2: fix more cases
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 17 ++---
>  1 file changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 63a39a23f8..90f05c06d3 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -811,18 +811,21 @@ static int evergreen_fill_tex_resource_words(struct 
> r600_context *rctx,
>   }
>   nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
>  
> - if (params->target == PIPE_TEXTURE_1D_ARRAY) {
> - height = 1;
> - depth = texture->array_size;
> - } else if (params->target == PIPE_TEXTURE_2D_ARRAY) {
> - depth = texture->array_size;
> - } else if (params->target == PIPE_TEXTURE_CUBE_ARRAY)
> - depth = texture->array_size / 6;
>  
>   va = tmp->resource.gpu_address;
>  
>   /* array type views and views into array types need to use layer offset 
> */
>   dim = r600_tex_dim(tmp, params->target, texture->nr_samples);
> +
> + if (dim == V_03_SQ_TEX_DIM_1D_ARRAY) {
> + height = 1;
> + depth = texture->array_size;
I know this is unchanged, but is this actually correct? The docs say:
TEX_HEIGHT 13:0 0x0
Height of the texture minus 1; number of stacks minus 1,
for 1D arrays.
TEX_DEPTH 26:14 0x0
Depth of the texture minus 1; number of stacks minus 1,
for 2D arrays.

So if some weirdness is going on here I think some comment would be nice.


> + } else if (dim == V_03_SQ_TEX_DIM_2D_ARRAY ||
> +dim == V_03_SQ_TEX_DIM_2D_ARRAY_MSAA) {
> + depth = texture->array_size;
> + } else if (dim == V_03_SQ_TEX_DIM_CUBEMAP)
> + depth = texture->array_size / 6;
> +
>   tex_resource_words[0] = (S_03_DIM(dim) |
>S_03_PITCH((pitch / 8) - 1) |
>S_03_TEX_WIDTH(width - 1));
> 

This looks otherwise alright to me.
Reviewed-by: Roland Scheidegger 

FWIW there's some bits just below which also contradict the docs:

if (params->target != texture->target && depth == 1) {
last_layer = params->first_layer;
}
So, if params->target was CUBEMAP_ARRAY (or just cubemap) and
texture->target was 2D_ARRAY with a array size of 6, then depth would be
1 and we'd smash last_layer to first_layer. The docs say "For cubemaps
and cubemap arrays, LAST_ARRAY must be programmed with BASE_ARRAY +
(N*6) - 1, where N is the number of cubemaps in the array, or N=1 for a
single cubemap." So I have no idea what this bit here is supposed to do,
but it doesn't look right.

Maybe should fix these issues on r600 too? Albeit we don't support
ARB_texture_view there so I suppose it's not really needed. (Though
afaik the chip should be able to support it, since d3d10 requires it,
but it's always possible there's some gl specific bits which don't work,
in particular view format classes compatibility is quite different, but
maybe fixing those issues there might be all that's needed?)
Albeit r600 is quite ancient at this point :-).

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-02-02 Thread Jason Ekstrand
On Fri, Feb 2, 2018 at 2:39 PM, Nanley Chery  wrote:

> On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> > Now that we're tracking aux properly per-slice, we can enable this for
> > applications which actually care.
> > ---
> >  src/intel/vulkan/anv_blorp.c   | 22 +++---
> >  src/intel/vulkan/genX_cmd_buffer.c | 13 +
> >  2 files changed, 24 insertions(+), 11 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 594b0d8..73a44fd 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct
> anv_cmd_buffer *cmd_buffer)
> > image, VK_IMAGE_ASPECT_COLOR_BIT,
> > att_state->aux_usage, );
> >
> > +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> > +  uint32_t layer_count = fb->layers;
> > +
> >if (att_state->fast_clear) {
> >   surf.clear_color = vk_to_isl_color(att_state->
> clear_value.color);
> >
> > + /* We only support fast-clears on the first layer */
> > + assert(iview->planes[0].isl.base_level == 0);
> > + assert(iview->planes[0].isl.base_array_layer == 0);
> > +
> >   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> >*
> >*"After Render target fast clear, pipe-control with color
> cache
> > @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct
> anv_cmd_buffer *cmd_buffer)
> >
> >   assert(image->n_planes == 1);
> >   blorp_fast_clear(, , iview->planes[0].isl.format,
> > -  iview->planes[0].isl.base_level,
> > -  iview->planes[0].isl.base_array_layer,
> fb->layers,
> > +  iview->planes[0].isl.base_level, base_layer,
> 1,
> >render_area.offset.x, render_area.offset.y,
> >render_area.offset.x +
> render_area.extent.width,
> >render_area.offset.y +
> render_area.extent.height);
> > + base_layer++;
> > + layer_count--;
> >
> >   cmd_buffer->state.pending_pipe_bits |=
> >  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
> ANV_PIPE_CS_STALL_BIT;
> > -  } else {
> > +  }
> > +
> > +  if (layer_count > 0) {
> >   assert(image->n_planes == 1);
> >   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> > VK_IMAGE_ASPECT_COLOR_BIT,
> > att_state->aux_usage,
> > iview->planes[0].isl.base_
> level,
> > -   iview->planes[0].isl.base_
> array_layer,
> > -   fb->layers);
> > +   base_layer, layer_count);
> >
> >   blorp_clear(, , iview->planes[0].isl.format,
> >   anv_swizzle_for_render(iview->
> planes[0].isl.swizzle),
> > - iview->planes[0].isl.base_level,
> > - iview->planes[0].isl.base_array_layer, fb->layers,
> > + iview->planes[0].isl.base_level, base_layer,
> layer_count,
> >   render_area.offset.x, render_area.offset.y,
> >   render_area.offset.x + render_area.extent.width,
> >   render_area.offset.y + render_area.extent.height,
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 4c83a5c..484246d 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct
> anv_device * device,
> > */
> >if (att_state->fast_clear &&
> >(iview->planes[0].isl.base_level > 0 ||
> > -   iview->image->type == VK_IMAGE_TYPE_3D ||
> > -   iview->image->array_size > 0)) {
> > +   iview->planes[0].isl.base_array_layer > 0 ||
> > +   cmd_state->framebuffer->layers > 1)) {
> >   anv_perf_warn(device->instance, iview->image,
> > "Rendering to a multi-LOD or multi-layer
> framebuffer "
> > -   "with LOAD_OP_CLEAR.  Not fast-clearing");
> > - att_state->fast_clear = false;
> > +   "with LOAD_OP_CLEAR.  Only fast-clearing the
> first "
> > +   "slice");
> > +
> > + /* Leave fast_clear enabled if we are clearing the first
> slice. */
> > + if (iview->planes[0].isl.base_level > 0 ||
> > + iview->planes[0].isl.base_array_layer > 0)
> > +att_state->fast_clear = false;
>
> The new perf_warn is only true for the else portion of this if statement.
>

No, it fires whenever the framebuffer 

[Mesa-dev] [PATCH] radv: implement VK_EXT_external_memory_host

2018-02-02 Thread Fredrik Höglund
Ported from the radeonsi GL_AMD_pinned_memory implementation.

Signed-off-by: Fredrik Höglund 
---

Tested using a version of the cube demo modified to use host memory
allocations for buffers and staging images.

 src/amd/vulkan/radv_device.c  | 60 ++-
 src/amd/vulkan/radv_extensions.py |  1 +
 src/amd/vulkan/radv_formats.c | 30 +++---
 src/amd/vulkan/radv_private.h |  1 +
 src/amd/vulkan/radv_radeon_winsys.h   |  4 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 49 ++
 6 files changed, 137 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9fda419d584..09bb382eeb8 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -810,6 +810,12 @@ void radv_GetPhysicalDeviceProperties2KHR(
properties->maxDiscardRectangles = 
MAX_DISCARD_RECTANGLES;
break;
}
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
+   VkPhysicalDeviceExternalMemoryHostPropertiesEXT 
*properties =
+   (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) 
ext;
+   properties->minImportedHostPointerAlignment = 4096;
+   break;
+   }
default:
break;
}
@@ -923,6 +929,33 @@ void radv_GetPhysicalDeviceMemoryProperties2KHR(
  
>memoryProperties);
 }
 
+VkResult radv_GetMemoryHostPointerPropertiesEXT(
+   VkDevice_device,
+   VkExternalMemoryHandleTypeFlagBitsKHR   handleType,
+   const void *pHostPointer,
+   VkMemoryHostPointerPropertiesEXT   
*pMemoryHostPointerProperties)
+{
+   RADV_FROM_HANDLE(radv_device, device, _device);
+
+   switch (handleType)
+   {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
+   const struct radv_physical_device *physical_device = 
device->physical_device;
+   uint32_t memoryTypeBits = 0;
+   for (int i = 0; i < 
physical_device->memory_properties.memoryTypeCount; i++) {
+   if (physical_device->mem_type_indices[i] == 
RADV_MEM_TYPE_GTT_CACHED) {
+   memoryTypeBits = (1 << i);
+   break;
+   }
+   }
+   pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
+   return VK_SUCCESS;
+   }
+   default:
+   return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+   }
+}
+
 static enum radeon_ctx_priority
 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT 
*pObj)
 {
@@ -2246,6 +2279,8 @@ static VkResult radv_alloc_memory(struct radv_device 
*device,
vk_find_struct_const(pAllocateInfo->pNext, 
MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
const VkExportMemoryAllocateInfoKHR *export_info =
vk_find_struct_const(pAllocateInfo->pNext, 
EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+   const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
+   vk_find_struct_const(pAllocateInfo->pNext, 
IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
 
const struct wsi_memory_allocate_info *wsi_info =
vk_find_struct_const(pAllocateInfo->pNext, 
WSI_MEMORY_ALLOCATE_INFO_MESA);
@@ -2266,6 +2301,8 @@ static VkResult radv_alloc_memory(struct radv_device 
*device,
mem->buffer = NULL;
}
 
+   mem->user_ptr = NULL;
+
if (import_info) {
assert(import_info->handleType ==
   VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
@@ -2282,6 +2319,20 @@ static VkResult radv_alloc_memory(struct radv_device 
*device,
}
}
 
+   if (host_ptr_info) {
+   assert(host_ptr_info->handleType == 
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
+   assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
+   mem->bo = device->ws->buffer_from_ptr(device->ws, 
host_ptr_info->pHostPointer,
+ 
pAllocateInfo->allocationSize);
+   if (!mem->bo) {
+   result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+   goto fail;
+   } else {
+   mem->user_ptr = host_ptr_info->pHostPointer;
+   goto out_success;
+   }
+   }
+
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
@@ -2362,7 +2413,11 @@ VkResult radv_MapMemory(

Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
On Sat, Feb 3, 2018 at 12:01 AM, Roland Scheidegger  wrote:
> Am 02.02.2018 um 23:39 schrieb Marek Olšák:
>> On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
>> wrote:
>>> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
 Hi,

 This is the second and hopefully final version of 32-bit pointer
 support for radeonsi.

 Constant buffer 0 now has restrictions on which buffers can be set
 in that slot.

 I plan to push this when my LLVM patch lands in 6.0 (hopefully it
 will be accepted there).

 There will also be a dependency on new libdrm (not included in this
 series).

 Please review.

>>>
>>> From a api cleanliness point of view, I don't like this much.
>>> First, you're making the hack case the default and even require it. IMHO
>>> a driver should be able to bind ordinary UBOs to all buffer slots. This
>>> is really not a nice burden to put on state trackers to do something
>>> special for just slot 0. The gallium API should stay reasonable imho,
>>> that's a bit too much custom tailoring for GL for my liking.
>>>
>>> Maybe I'm missing something but I can't quite see why you can't handle
>>> this transparently inside the driver. Can't you just create a different
>>> shader depending on what kind of buffer is bound or what's the problem?
>>> (You wouldn't expect it to change therefore you should not have to
>>> recompile.)
>>
>> We don't recompile shaders in the vast majority of cases. When shader
>> compilation stalls rendering, the gaming experience is destroyed.
>>
>> There is no alternative. Our shader ABI will be set up such that it
>> only has 32-bit pointers in shader registers. There are
>> performance-related reasons for that.
>
> That seems to be quite limited, why can't you have a shader ABI which
> can do either 32 or 64 bit pointers?

Good questions. GCN shaders have only 16 dwords of constant memory
(GFX9 has 32). There are no shader resource slots and the pixel shader
is the only one to have real inputs. All other stages don't have any
shader inputs except for system values.

The 16 dwords contain pointers and states to load inputs and load
descriptions of resource slots from memory. One of the pointers
sometimes points to constant buffer 0. If it's a VS, there are only 13
dwords, because 3 are reserved for baseinstance, basevertex, and
drawID. We can also put some other data into that constant memory to
skip load instructions. There is a huge incentive to free those
precious dwords and use them for something else, like skipping some
load instructions. I've been also considering 16-bit pointers (e.g.
32-bit pointers aligned to 64KB).

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
On Fri, Feb 2, 2018 at 11:39 PM, Marek Olšák  wrote:
> On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
> wrote:
>> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
>>> Hi,
>>>
>>> This is the second and hopefully final version of 32-bit pointer
>>> support for radeonsi.
>>>
>>> Constant buffer 0 now has restrictions on which buffers can be set
>>> in that slot.
>>>
>>> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
>>> will be accepted there).
>>>
>>> There will also be a dependency on new libdrm (not included in this
>>> series).
>>>
>>> Please review.
>>>
>>
>> From a api cleanliness point of view, I don't like this much.
>> First, you're making the hack case the default and even require it. IMHO
>> a driver should be able to bind ordinary UBOs to all buffer slots. This
>> is really not a nice burden to put on state trackers to do something
>> special for just slot 0. The gallium API should stay reasonable imho,
>> that's a bit too much custom tailoring for GL for my liking.
>>
>> Maybe I'm missing something but I can't quite see why you can't handle
>> this transparently inside the driver. Can't you just create a different
>> shader depending on what kind of buffer is bound or what's the problem?
>> (You wouldn't expect it to change therefore you should not have to
>> recompile.)
>
> We don't recompile shaders in the vast majority of cases. When shader
> compilation stalls rendering, the gaming experience is destroyed.
>
> There is no alternative. Our shader ABI will be set up such that it
> only has 32-bit pointers in shader registers. There are
> performance-related reasons for that.
>
> We'll handle maintenance and testing of this feature. You won't have
> to do anything.

The CAP is only a formality. In reality, we only have to fix
vl_compositor.c to use pipe_context::const_uploader::flags (or the
CAP) and that's it. All other code we care about is unaffected (GL,
VDPAU, VAAPI, OpenMax, Nine).

There are no users binding a real buffer as constant buffer 0 other
than VL and XA.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 23:39 schrieb Marek Olšák:
> On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  
> wrote:
>> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
>>> Hi,
>>>
>>> This is the second and hopefully final version of 32-bit pointer
>>> support for radeonsi.
>>>
>>> Constant buffer 0 now has restrictions on which buffers can be set
>>> in that slot.
>>>
>>> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
>>> will be accepted there).
>>>
>>> There will also be a dependency on new libdrm (not included in this
>>> series).
>>>
>>> Please review.
>>>
>>
>> From a api cleanliness point of view, I don't like this much.
>> First, you're making the hack case the default and even require it. IMHO
>> a driver should be able to bind ordinary UBOs to all buffer slots. This
>> is really not a nice burden to put on state trackers to do something
>> special for just slot 0. The gallium API should stay reasonable imho,
>> that's a bit too much custom tailoring for GL for my liking.
>>
>> Maybe I'm missing something but I can't quite see why you can't handle
>> this transparently inside the driver. Can't you just create a different
>> shader depending on what kind of buffer is bound or what's the problem?
>> (You wouldn't expect it to change therefore you should not have to
>> recompile.)
> 
> We don't recompile shaders in the vast majority of cases. When shader
> compilation stalls rendering, the gaming experience is destroyed.
> 
> There is no alternative. Our shader ABI will be set up such that it
> only has 32-bit pointers in shader registers. There are
> performance-related reasons for that.

That seems to be quite limited, why can't you have a shader ABI which
can do either 32 or 64 bit pointers?

> 
> We'll handle maintenance and testing of this feature. You won't have
> to do anything.
> 

Note that on some apis, there's no way state trackers can do what you
want. For instance, with a d3d10 state tracker, you simply can't tell
that you're going to bind some buffer to constant slot 0 - there is
absolutely nothing special about constant slot 0 (just like with gallium
until now). (You could, of course, avoid potential problems with such a
hypothetical state tracker by just avoiding slot 0 altogether, albeit
you probably then don't expose enough ordinary slots). (I would actually
expect for d3d10-style constant buffers you'd wanted to use 32bit
pointers for all of them in any case, not just those at slot 0.)

But well, it won't really affect anything but radeonsi, so while I think
this kind of interface is a mistake and ugly as hell, feel free to stick
to it...

Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] gallium: use PIPE_CAP_CONSTBUF0_FLAGS

2018-02-02 Thread Marek Olšák
On Fri, Feb 2, 2018 at 10:44 PM, Axel Davy  wrote:
> Hi Marek,
>
> Since the previous patch makes it mandatory to use the flags when required,
> I guess this patch should also add the neccessary changes to gallium nine.

Nine uses user buffers and const_uploader, so it's unaffected.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
On Fri, Feb 2, 2018 at 10:26 PM, Roland Scheidegger  wrote:
> Am 02.02.2018 um 21:48 schrieb Marek Olšák:
>> Hi,
>>
>> This is the second and hopefully final version of 32-bit pointer
>> support for radeonsi.
>>
>> Constant buffer 0 now has restrictions on which buffers can be set
>> in that slot.
>>
>> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
>> will be accepted there).
>>
>> There will also be a dependency on new libdrm (not included in this
>> series).
>>
>> Please review.
>>
>
> From a api cleanliness point of view, I don't like this much.
> First, you're making the hack case the default and even require it. IMHO
> a driver should be able to bind ordinary UBOs to all buffer slots. This
> is really not a nice burden to put on state trackers to do something
> special for just slot 0. The gallium API should stay reasonable imho,
> that's a bit too much custom tailoring for GL for my liking.
>
> Maybe I'm missing something but I can't quite see why you can't handle
> this transparently inside the driver. Can't you just create a different
> shader depending on what kind of buffer is bound or what's the problem?
> (You wouldn't expect it to change therefore you should not have to
> recompile.)

We don't recompile shaders in the vast majority of cases. When shader
compilation stalls rendering, the gaming experience is destroyed.

There is no alternative. Our shader ABI will be set up such that it
only has 32-bit pointers in shader registers. There are
performance-related reasons for that.

We'll handle maintenance and testing of this feature. You won't have
to do anything.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-02 Thread Jason Ekstrand
On Fri, Feb 2, 2018 at 1:47 PM, Nanley Chery  wrote:

> On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> > This commit completely reworks aux tracking.  This includes a number of
> > somewhat distinct changes:
> >
> >  1) Since we are no longer fast-clearing multiple slices, we only need
> > to track one fast clear color and one fast clear type.
> >
> >  2) We store two bits for fast clear instead of one to let us
> > distinguish between zero and non-zero fast clear colors.  This is
> > needed so that we can do full resolves when transitioning to
> > PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> > values in all sorts of places wouldn't normally.
>^
> Missing word?  we ?
>

Yup.  Fixed.


> >
> >  3) We now track compression state as a boolean separate from fast clear
> > type and this is tracked on a per-slice granularity.
> >
> > The previous scheme had some issues when it came to individual slices of
> > a multi-LOD images.  In particular, we only tracked "needs resolve"
> > per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> > a portion of the image and would set "needs resolve" to false anyway.
> > Also, any transition from an undefined layout would reset the clear
> > color for the entire LOD regardless of whether or not there was some
> > clear color on some other slice.
> >
> > As far as full/partial resolves go, he assumptions of the previous
> > scheme held because the one case where we do need a full resolve when
> > CCS_E is enabled is for window-system images.  Since we only ever
> > allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> > and we never got CCS_E.  With the advent of Y-tiled window-system
> > buffers, we now need to properly support doing a full resolve of images
> > marked CCS_E.
> > ---
> >  src/intel/vulkan/anv_blorp.c   |   3 +-
> >  src/intel/vulkan/anv_image.c   |  96 ++-
> >  src/intel/vulkan/anv_private.h |  53 +++---
> >  src/intel/vulkan/genX_cmd_buffer.c | 340 +++---
> ---
> >  4 files changed, 331 insertions(+), 161 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 3698543..594b0d8 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > * particular value and don't care about format or clear value.
> > */
> >const struct anv_address clear_color_addr =
> > - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > -aspect, level);
> > + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> aspect);
> >surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> > }
> >
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 94b9ecb..d5f8dcf 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct
> gen_device_info *devinfo,
> >   * fast-clear values in non-trivial cases (e.g., outside of a render
> pass in
> >   * which a fast clear has occurred).
> >   *
> > - * For the purpose of discoverability, the algorithm used to manage
> this buffer
> > - * is described here. A clear value in this buffer is updated when a
> fast clear
> > - * is performed on a subresource. One of two synchronization operations
> is
> > - * performed in order for a following memory access to use the
> fast-clear
> > - * value:
> > - *a. Copy the value from the buffer to the surface state object
> used for
> > - *   reading. This is done implicitly when the value is the clear
> value
> > - *   predetermined to be the default in other surface state
> objects. This
> > - *   is currently only done explicitly for the operation below.
> > - *b. Do (a) and use the surface state object to resolve the
> subresource.
> > - *   This is only done during layout transitions for decent
> performance.
> > + * In order to avoid having multiple clear colors for a single plane of
> an
> > + * image (hence a single RENDER_SURFACE_STATE), we only allow
> fast-clears on
> > + * the first slice (level 0, layer 0).  At the time of our testing (Jan
> 17,
> > + * 2018), there were known applications which would benefit from
> fast-clearing
> > + * more than just the first slice.
> >   *
> > - * With the above scheme, we can fast-clear whenever the hardware
> allows except
> > - * for two cases in which synchronization becomes impossible or
> undesirable:
> > - ** The subresource is in the GENERAL layout and is cleared to a
> value
> > - *  other than the special default value.
> > + * The fast clear portion of the image is laid out in the following
> order:
> >   *
> > 

Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-02-02 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> Now that we're tracking aux properly per-slice, we can enable this for
> applications which actually care.
> ---
>  src/intel/vulkan/anv_blorp.c   | 22 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 13 +
>  2 files changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 594b0d8..73a44fd 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
> image, VK_IMAGE_ASPECT_COLOR_BIT,
> att_state->aux_usage, );
>  
> +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> +  uint32_t layer_count = fb->layers;
> +
>if (att_state->fast_clear) {
>   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
>  
> + /* We only support fast-clears on the first layer */
> + assert(iview->planes[0].isl.base_level == 0);
> + assert(iview->planes[0].isl.base_array_layer == 0);
> +
>   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
>*
>*"After Render target fast clear, pipe-control with color cache
> @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
>  
>   assert(image->n_planes == 1);
>   blorp_fast_clear(, , iview->planes[0].isl.format,
> -  iview->planes[0].isl.base_level,
> -  iview->planes[0].isl.base_array_layer, fb->layers,
> +  iview->planes[0].isl.base_level, base_layer, 1,
>render_area.offset.x, render_area.offset.y,
>render_area.offset.x + render_area.extent.width,
>render_area.offset.y + render_area.extent.height);
> + base_layer++;
> + layer_count--;
>  
>   cmd_buffer->state.pending_pipe_bits |=
>  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> -  } else {
> +  }
> +
> +  if (layer_count > 0) {
>   assert(image->n_planes == 1);
>   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> VK_IMAGE_ASPECT_COLOR_BIT,
> att_state->aux_usage,
> iview->planes[0].isl.base_level,
> -   
> iview->planes[0].isl.base_array_layer,
> -   fb->layers);
> +   base_layer, layer_count);
>  
>   blorp_clear(, , iview->planes[0].isl.format,
>   anv_swizzle_for_render(iview->planes[0].isl.swizzle),
> - iview->planes[0].isl.base_level,
> - iview->planes[0].isl.base_array_layer, fb->layers,
> + iview->planes[0].isl.base_level, base_layer, 
> layer_count,
>   render_area.offset.x, render_area.offset.y,
>   render_area.offset.x + render_area.extent.width,
>   render_area.offset.y + render_area.extent.height,
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 4c83a5c..484246d 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct anv_device * 
> device,
> */
>if (att_state->fast_clear &&
>(iview->planes[0].isl.base_level > 0 ||
> -   iview->image->type == VK_IMAGE_TYPE_3D ||
> -   iview->image->array_size > 0)) {
> +   iview->planes[0].isl.base_array_layer > 0 ||
> +   cmd_state->framebuffer->layers > 1)) {
>   anv_perf_warn(device->instance, iview->image,
> "Rendering to a multi-LOD or multi-layer framebuffer "
> -   "with LOAD_OP_CLEAR.  Not fast-clearing");
> - att_state->fast_clear = false;
> +   "with LOAD_OP_CLEAR.  Only fast-clearing the first "
> +   "slice");
> +
> + /* Leave fast_clear enabled if we are clearing the first slice. */
> + if (iview->planes[0].isl.base_level > 0 ||
> + iview->planes[0].isl.base_array_layer > 0)
> +att_state->fast_clear = false;

The new perf_warn is only true for the else portion of this if statement.

>}
>  
>if (att_state->fast_clear) {
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev 

Re: [Mesa-dev] [PATCH 05/12] radeon/vcn: add header implementations for HEVC

2018-02-02 Thread Zhang, Boyuan
Update patch 05/12 with a fix.

From: Boyuan Zhang 

Implement encoding of sps, pps, vps, aud, and slice headers for HEVC
based on HEVC specs.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 348 +++-
 1 file changed, 347 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index a651f7e..c86c2f3 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -551,6 +551,86 @@ static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc)
+{
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
+   uint32_t *size_in_bytes = >cs->current.buf[enc->cs->current.cdw++];
+   int i;
+
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+   radeon_enc_code_fixed_bits(enc, 0x0001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x4201, 16);
+   radeon_enc_byte_align(enc);
+   radeon_enc_set_emulation_prevention(enc, true);
+   radeon_enc_code_fixed_bits(enc, 0x0, 4);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+   radeon_enc_code_fixed_bits(enc, 0x6000, 32);
+   radeon_enc_code_fixed_bits(enc, 0xb000, 32);
+   radeon_enc_code_fixed_bits(enc, 0x0, 16);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+   for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) ; 
i++)
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+
+   if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+   for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); 
i < 8; i++)
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+   }
+
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.session_init.aligned_picture_width);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.session_init.aligned_picture_height);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+   radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+   radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3);
+   //Only support CTBSize 64
+   radeon_enc_code_ue(enc, 6 - 
(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3));
+   radeon_enc_code_ue(enc, 
enc->enc_pic.log2_min_transform_block_size_minus2);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.log2_diff_max_min_transform_block_size);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.max_transform_hierarchy_depth_inter);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 
!enc->enc_pic.hevc_spec_misc.amp_disabled, 1);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.sample_adaptive_offset_enabled_flag, 1);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 0);
+   radeon_enc_code_ue(enc, 0);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0, 1);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+   radeon_enc_byte_align(enc);
+   radeon_enc_flush_headers(enc);
+   *size_in_bytes = (enc->bits_output + 7) / 8;
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
 {
RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
@@ -586,6 +666,150 @@ static void radeon_enc_nalu_pps(struct radeon_encoder 
*enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc)
+{
+   

Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-02-02 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> This commit completely reworks aux tracking.  This includes a number of
> somewhat distinct changes:
> 
>  1) Since we are no longer fast-clearing multiple slices, we only need
> to track one fast clear color and one fast clear type.
> 
>  2) We store two bits for fast clear instead of one to let us
> distinguish between zero and non-zero fast clear colors.  This is
> needed so that we can do full resolves when transitioning to
> PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> values in all sorts of places wouldn't normally.
   ^
Missing word?  we ?

> 
>  3) We now track compression state as a boolean separate from fast clear
> type and this is tracked on a per-slice granularity.
> 
> The previous scheme had some issues when it came to individual slices of
> a multi-LOD images.  In particular, we only tracked "needs resolve"
> per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> a portion of the image and would set "needs resolve" to false anyway.
> Also, any transition from an undefined layout would reset the clear
> color for the entire LOD regardless of whether or not there was some
> clear color on some other slice.
> 
> As far as full/partial resolves go, he assumptions of the previous
> scheme held because the one case where we do need a full resolve when
> CCS_E is enabled is for window-system images.  Since we only ever
> allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> and we never got CCS_E.  With the advent of Y-tiled window-system
> buffers, we now need to properly support doing a full resolve of images
> marked CCS_E.
> ---
>  src/intel/vulkan/anv_blorp.c   |   3 +-
>  src/intel/vulkan/anv_image.c   |  96 ++-
>  src/intel/vulkan/anv_private.h |  53 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 340 
> +++--
>  4 files changed, 331 insertions(+), 161 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 3698543..594b0d8 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> * particular value and don't care about format or clear value.
> */
>const struct anv_address clear_color_addr =
> - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> -aspect, level);
> + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
>surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> }
>  
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 94b9ecb..d5f8dcf 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct 
> gen_device_info *devinfo,
>   * fast-clear values in non-trivial cases (e.g., outside of a render pass in
>   * which a fast clear has occurred).
>   *
> - * For the purpose of discoverability, the algorithm used to manage this 
> buffer
> - * is described here. A clear value in this buffer is updated when a fast 
> clear
> - * is performed on a subresource. One of two synchronization operations is
> - * performed in order for a following memory access to use the fast-clear
> - * value:
> - *a. Copy the value from the buffer to the surface state object used for
> - *   reading. This is done implicitly when the value is the clear value
> - *   predetermined to be the default in other surface state objects. This
> - *   is currently only done explicitly for the operation below.
> - *b. Do (a) and use the surface state object to resolve the subresource.
> - *   This is only done during layout transitions for decent performance.
> + * In order to avoid having multiple clear colors for a single plane of an
> + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on
> + * the first slice (level 0, layer 0).  At the time of our testing (Jan 17,
> + * 2018), there were known applications which would benefit from 
> fast-clearing
> + * more than just the first slice.
>   *
> - * With the above scheme, we can fast-clear whenever the hardware allows 
> except
> - * for two cases in which synchronization becomes impossible or undesirable:
> - ** The subresource is in the GENERAL layout and is cleared to a value
> - *  other than the special default value.
> + * The fast clear portion of the image is laid out in the following order:
>   *
> - *  Performing a synchronization operation in order to read from the
> - *  subresource is undesirable in this case. Firstly, b) is not an option
> - *  because a layout transition isn't required between a write and read 
> of
> - *  an image in the GENERAL layout. 

Re: [Mesa-dev] [PATCH 2/7] gallium: use PIPE_CAP_CONSTBUF0_FLAGS

2018-02-02 Thread Axel Davy

Hi Marek,

Since the previous patch makes it mandatory to use the flags when required,
I guess this patch should also add the neccessary changes to gallium nine.

Yours,

Axel Davy

On 02/02/2018 21:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/auxiliary/util/u_inlines.h  | 21 +
  src/gallium/auxiliary/vl/vl_compositor.c|  2 +-
  src/gallium/drivers/radeonsi/si_pipe.c  |  2 +-
  src/gallium/state_trackers/xa/xa_renderer.c |  7 ---
  4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 4ba6ad7..4bd9b7e 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -277,20 +277,41 @@ pipe_buffer_create( struct pipe_screen *screen,
 buffer.usage = usage;
 buffer.flags = 0;
 buffer.width0 = size;
 buffer.height0 = 1;
 buffer.depth0 = 1;
 buffer.array_size = 1;
 return screen->resource_create(screen, );
  }
  
  
+static inline struct pipe_resource *

+pipe_buffer_create_const0(struct pipe_screen *screen,
+  unsigned bind,
+  enum pipe_resource_usage usage,
+  unsigned size)
+{
+   struct pipe_resource buffer;
+   memset(, 0, sizeof buffer);
+   buffer.target = PIPE_BUFFER;
+   buffer.format = PIPE_FORMAT_R8_UNORM;
+   buffer.bind = bind;
+   buffer.usage = usage;
+   buffer.flags = screen->get_param(screen, PIPE_CAP_CONSTBUF0_FLAGS);
+   buffer.width0 = size;
+   buffer.height0 = 1;
+   buffer.depth0 = 1;
+   buffer.array_size = 1;
+   return screen->resource_create(screen, );
+}
+
+
  /**
   * Map a range of a resource.
   * \param offset  start of region, in bytes
   * \param length  size of region, in bytes
   * \param access  bitmask of PIPE_TRANSFER_x flags
   * \param transfer  returns a transfer object
   */
  static inline void *
  pipe_buffer_map_range(struct pipe_context *pipe,
  struct pipe_resource *buffer,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 67ad7f5..725bfd9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1430,21 +1430,21 @@ vl_compositor_init_state(struct vl_compositor_state *s, 
struct pipe_context *pip
 s->pipe = pipe;
  
 s->clear_color.f[0] = s->clear_color.f[1] = 0.0f;

 s->clear_color.f[2] = s->clear_color.f[3] = 0.0f;
  
 /*

  * Create our fragment shader's constant buffer
  * Const buffer contains the color conversion matrix and bias vectors
  */
 /* XXX: Create with IMMUTABLE/STATIC... although it does change every once 
in a long while... */
-   s->csc_matrix = pipe_buffer_create
+   s->csc_matrix = pipe_buffer_create_const0
 (
pipe->screen,
PIPE_BIND_CONSTANT_BUFFER,
PIPE_USAGE_DEFAULT,
sizeof(csc_matrix) + 2*sizeof(float)
 );
  
 if (!s->csc_matrix)

return false;
  
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 26835d6..1a5d598 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -580,21 +580,21 @@ static void si_handle_env_var_force_family(struct 
si_screen *sscreen)
  
  	fprintf(stderr, "radeonsi: Unknown family: %s\n", family);

exit(1);
  }
  
  static void si_test_vmfault(struct si_screen *sscreen)

  {
struct pipe_context *ctx = sscreen->aux_context;
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_resource *buf =
-   pipe_buffer_create(>b, 0, PIPE_USAGE_DEFAULT, 64);
+   pipe_buffer_create_const0(>b, 0, PIPE_USAGE_DEFAULT, 
64);
  
  	if (!buf) {

puts("Buffer allocation failed.");
exit(1);
}
  
  	r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
  
  	if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {

si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0);
diff --git a/src/gallium/state_trackers/xa/xa_renderer.c 
b/src/gallium/state_trackers/xa/xa_renderer.c
index bc55f87..27497d3 100644
--- a/src/gallium/state_trackers/xa/xa_renderer.c
+++ b/src/gallium/state_trackers/xa/xa_renderer.c
@@ -386,23 +386,24 @@ renderer_bind_destination(struct xa_context *r,
  
  void

  renderer_set_constants(struct xa_context *r,
   int shader_type, const float *params, int param_bytes)
  {
  struct pipe_resource **cbuf =
(shader_type == PIPE_SHADER_VERTEX) ? >vs_const_buffer :
>fs_const_buffer;
  
  pipe_resource_reference(cbuf, NULL);

-*cbuf = pipe_buffer_create(r->pipe->screen,
-  PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT,
-  param_bytes);
+*cbuf = pipe_buffer_create_const0(r->pipe->screen,
+

Re: [Mesa-dev] [PATCH] radv: Don't expose VK_KHX_multiview on android.

2018-02-02 Thread Bas Nieuwenhuizen
On Fri, Feb 2, 2018 at 10:31 PM, Dylan Baker  wrote:
> Quoting Bas Nieuwenhuizen (2018-02-02 10:14:04)
>> On Fri, Feb 2, 2018 at 6:59 PM, Emil Velikov  
>> wrote:
>> > Hi Bas,
>> >
>> > On 31 January 2018 at 11:31, Bas Nieuwenhuizen  wrote:
>> >> deqp does not allow any KHX extensions, and since deqp is included
>> >> in android-cts, android does not allow any khx extensions.
>> >>
>> >> So disable VK_KHX_multiview on android.
>> >> ---
>> >>  src/amd/vulkan/radv_extensions.py | 2 +-
>> >>  1 file changed, 1 insertion(+), 1 deletion(-)
>> >>
>> >> diff --git a/src/amd/vulkan/radv_extensions.py 
>> >> b/src/amd/vulkan/radv_extensions.py
>> >> index ab34c01cb6..e6c6e63627 100644
>> >> --- a/src/amd/vulkan/radv_extensions.py
>> >> +++ b/src/amd/vulkan/radv_extensions.py
>> >> @@ -81,7 +81,7 @@ EXTENSIONS = [
>> >>  Extension('VK_KHR_wayland_surface',   6, 
>> >> 'VK_USE_PLATFORM_WAYLAND_KHR'),
>> >>  Extension('VK_KHR_xcb_surface',   6, 
>> >> 'VK_USE_PLATFORM_XCB_KHR'),
>> >>  Extension('VK_KHR_xlib_surface',  6, 
>> >> 'VK_USE_PLATFORM_XLIB_KHR'),
>> >> -Extension('VK_KHX_multiview', 1, True),
>> >> +Extension('VK_KHX_multiview', 1, '!ANDROID'),
>> >
>> > While picking the patch for stable the following questions came to
>> > mind. Hope you can you shed some light.
>> >
>> > Is this restriction effectively a Vulkan loader limitation or ?
>> > Should we use the same for the Intel Vulkan driver as well?
>>
>> The testsuite Android conformance testing (deqp as part of Android
>> CTS) is slightly stricter than the vulkan-CTS. So enabling it results
>> in a perfectly working extension, but you just don't have a conformant
>> Android device.
>>
>> I think Chad expected the KHX extensions to be disabled in all
>> releases, as far as I can tell they have not for the past few
>> releases, so I'm not entirely sure what is supposed to happen here.
>> I'd expect this would be needed by Intel too, but given that the Intel
>> driver has been tested for Android for a while and this is not in the
>> Intel driver yet, I'm probably overlooking their solution.
>>
>> - Bas
>>
>>
>> >
>> > Thanks
>> > Emil
>> > ___
>
> Our plan is (and has been) to disable VK_KHX extensions in release branches, 
> but
> leave them on in development snapshots, regardless of whether we're running on
> Android or !Android. If KHX extensions are on in the release branch that's a 
> bug
> and we need to fix it before shipping a release.

Looks like it is still enabled in 17.3 and 18.0 (though that is not
really released yet), but disabled in 17.2.

>
> Dylan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Don't expose VK_KHX_multiview on android.

2018-02-02 Thread Dylan Baker
Quoting Bas Nieuwenhuizen (2018-02-02 10:14:04)
> On Fri, Feb 2, 2018 at 6:59 PM, Emil Velikov  wrote:
> > Hi Bas,
> >
> > On 31 January 2018 at 11:31, Bas Nieuwenhuizen  wrote:
> >> deqp does not allow any KHX extensions, and since deqp is included
> >> in android-cts, android does not allow any khx extensions.
> >>
> >> So disable VK_KHX_multiview on android.
> >> ---
> >>  src/amd/vulkan/radv_extensions.py | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_extensions.py 
> >> b/src/amd/vulkan/radv_extensions.py
> >> index ab34c01cb6..e6c6e63627 100644
> >> --- a/src/amd/vulkan/radv_extensions.py
> >> +++ b/src/amd/vulkan/radv_extensions.py
> >> @@ -81,7 +81,7 @@ EXTENSIONS = [
> >>  Extension('VK_KHR_wayland_surface',   6, 
> >> 'VK_USE_PLATFORM_WAYLAND_KHR'),
> >>  Extension('VK_KHR_xcb_surface',   6, 
> >> 'VK_USE_PLATFORM_XCB_KHR'),
> >>  Extension('VK_KHR_xlib_surface',  6, 
> >> 'VK_USE_PLATFORM_XLIB_KHR'),
> >> -Extension('VK_KHX_multiview', 1, True),
> >> +Extension('VK_KHX_multiview', 1, '!ANDROID'),
> >
> > While picking the patch for stable the following questions came to
> > mind. Hope you can you shed some light.
> >
> > Is this restriction effectively a Vulkan loader limitation or ?
> > Should we use the same for the Intel Vulkan driver as well?
> 
> The testsuite Android conformance testing (deqp as part of Android
> CTS) is slightly stricter than the vulkan-CTS. So enabling it results
> in a perfectly working extension, but you just don't have a conformant
> Android device.
> 
> I think Chad expected the KHX extensions to be disabled in all
> releases, as far as I can tell they have not for the past few
> releases, so I'm not entirely sure what is supposed to happen here.
> I'd expect this would be needed by Intel too, but given that the Intel
> driver has been tested for Android for a while and this is not in the
> Intel driver yet, I'm probably overlooking their solution.
> 
> - Bas
> 
> 
> >
> > Thanks
> > Emil
> > ___

Our plan is (and has been) to disable VK_KHX extensions in release branches, but
leave them on in development snapshots, regardless of whether we're running on
Android or !Android. If KHX extensions are on in the release branch that's a bug
and we need to fix it before shipping a release.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Check for actual LLVM required versions

2018-02-02 Thread Dylan Baker
Quoting Bas Nieuwenhuizen (2018-02-02 11:54:58)
> Reviewed-by: Bas Nieuwenhuizen 
> 
> On Fri, Feb 2, 2018 at 7:55 PM, Dylan Baker  wrote:
> > Currently we always check for 3.9.0, which is pretty safe since
> > everything except radv work with >= 3.9 and 3.9 is pretty old at this
> > point. However, radv actually requires 4.0, and there is a patch for
> > radeonsi to do the same.
> >
> > Fixes: 673dda833076 ("meson: build "radv" vulkan driver for radeon 
> > hardware")
> > Signed-off-by: Dylan Baker 
> > ---
> >  meson.build | 12 ++--
> >  1 file changed, 10 insertions(+), 2 deletions(-)
> >
> > diff --git a/meson.build b/meson.build
> > index 80ea60ffa7d..0a030b92d0a 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -988,15 +988,23 @@ if with_gallium_opencl
> ># TODO: optional modules
> >  endif
> >
> > +if with_amd_vk
> > +  _llvm_version = '>= 4.0.0'
> > +elif with_gallium_opencl or with_gallium_swr or with_gallium_r600 or 
> > with_gallium_radeonsi
> > +  _llvm_version = '>= 3.9.0'
> > +else
> > +  _llvm_version = '>= 3.3.0'
> > +endif
> > +
> >  _llvm = get_option('llvm')
> >  if _llvm == 'auto'
> >dep_llvm = dependency(
> > -'llvm', version : '>= 3.9.0', modules : llvm_modules,
> > +'llvm', version : _llvm_version, modules : llvm_modules,
> >  required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or 
> > with_gallium_opencl,
> >)
> >with_llvm = dep_llvm.found()
> >  elif _llvm == 'true'
> > -  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : 
> > llvm_modules)
> > +  dep_llvm = dependency('llvm', version : _llvm_version, modules : 
> > llvm_modules)
> >with_llvm = true
> >  else
> >dep_llvm = []
> > --
> > 2.16.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Thanks! I went ahead and pushed this, that way Marek can merge is radeonsi patch
(which should now be much more trivial for meson).

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Dylan Baker
I went ahead and pushed my patch, the only things you should need to do for the
meson side now is move with_gallium_radeonsi, and remove the stray `s` in the
error message.

Dylan

Quoting Bas Nieuwenhuizen (2018-02-02 11:56:26)
> I'd prefer if Dylan's patch goes first, as we are bumping swr too otherwise.
> 
> Though either way:
> 
> Reviewed-by: Bas Nieuwenhuizen 
> 
> On Fri, Feb 2, 2018 at 8:15 PM, Dylan Baker  wrote:
> > FYI: I sent out a patch a few minutes ago to set the minimum version more 
> > like
> > autotools does, but I'm fine with this landing first.
> >
> > Reviewed-by: Dylan Baker 
> >
> > Quoting Marek Olšák (2018-02-02 11:07:57)
> >> Adding this:
> >>
> >> diff --git a/meson.build b/meson.build
> >> index 80ea60f..22eb702 100644
> >> --- a/meson.build
> >> +++ b/meson.build
> >> @@ -991,12 +991,12 @@ endif
> >>  _llvm = get_option('llvm')
> >>  if _llvm == 'auto'
> >>dep_llvm = dependency(
> >> -'llvm', version : '>= 3.9.0', modules : llvm_modules,
> >> +'llvm', version : '>= 4.0.0', modules : llvm_modules,
> >>  required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
> >> with_gallium_opencl,
> >>)
> >>with_llvm = dep_llvm.found()
> >>  elif _llvm == 'true'
> >> -  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : 
> >> llvm_modules)
> >> +  dep_llvm = dependency('llvm', version : '>= 4.0.0', modules : 
> >> llvm_modules)
> >>with_llvm = true
> >>  else
> >>dep_llvm = []
> >> @@ -1019,7 +1019,7 @@ if with_llvm
> >>  '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
> >>]
> >>  elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
> >> -  error('The following drivers requires LLVM: Radv, RadeonSI, SWR. One of
> >> these is enabled, but LLVM is disabled.')
> >> +  error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of 
> >> these
> >> is enabled, but LLVM is disabled.')
> >>  endif
> >>
> >>  dep_glvnd = []
> >>
> >> Marek
> >>
> >> On Fri, Feb 2, 2018 at 8:02 PM, Bas Nieuwenhuizen 
> >> 
> >> wrote:
> >>
> >> Also change meson.build?
> >>
> >> On Fri, Feb 2, 2018 at 7:34 PM, Marek Olšák  wrote:
> >> > From: Marek Olšák 
> >> >
> >> > Only these are supported:
> >> > - LLVM 4.0
> >> > - LLVM 5.0
> >> > - LLVM 6.0
> >> > - master (7.0)
> >> > ---
> >> >  configure.ac   |   4 +-
> >> >  src/amd/common/ac_llvm_build.c | 187
> >> ++---
> >> >  src/amd/common/ac_llvm_helper.cpp  |  10 --
> >> >  src/amd/common/ac_llvm_util.c  |  39 +
> >> >  src/amd/common/ac_llvm_util.h  |  14 +-
> >> >  src/amd/common/ac_nir_to_llvm.c|  32 +---
> >> >  src/amd/vulkan/radv_device.c   |   4 -
> >> >  src/gallium/drivers/radeonsi/si_compute.c  |   3 +-
> >> >  src/gallium/drivers/radeonsi/si_get.c  |  13 +-
> >> >  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
> >> >  .../drivers/radeonsi/si_shader_tgsi_setup.c|   2 -
> >> >  11 files changed, 72 insertions(+), 239 deletions(-)
> >> >
> >> > diff --git a/configure.ac b/configure.ac
> >> > index a54b7cb..8ed606c 100644
> >> > --- a/configure.ac
> >> > +++ b/configure.ac
> >> > @@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
> >> >  XSHMFENCE_REQUIRED=1.1
> >> >  XVMC_REQUIRED=1.0.6
> >> >  PYTHON_MAKO_REQUIRED=0.8.0
> >> >  LIBSENSORS_REQUIRED=4.0.0
> >> >  ZLIB_REQUIRED=1.2.3
> >> >
> >> >  dnl LLVM versions
> >> >  LLVM_REQUIRED_GALLIUM=3.3.0
> >> >  LLVM_REQUIRED_OPENCL=3.9.0
> >> >  LLVM_REQUIRED_R600=3.9.0
> >> > -LLVM_REQUIRED_RADEONSI=3.9.0
> >> > -LLVM_REQUIRED_RADV=3.9.0
> >> > +LLVM_REQUIRED_RADEONSI=4.0.0
> >> > +LLVM_REQUIRED_RADV=4.0.0
> >> >  LLVM_REQUIRED_SWR=3.9.0
> >> >
> >> >  dnl Check for progs
> >> >  AC_PROG_CPP
> >> >  AC_PROG_CC
> >> >  AC_PROG_CXX
> >> >  dnl add this here, so the help for this environmnet variable is 
> >> close to
> >> >  dnl other CC/CXX flags related help
> >> >  AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support
> >> (only needed if not
> >> >enabled by default and different  from
> >> -std=c++11)])
> >> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_
> >> build.c
> >> > index 6ede60a..3efcaa1 100644
> >> > --- a/src/amd/common/ac_llvm_build.c
> >> > +++ b/src/amd/common/ac_llvm_build.c
> >> > @@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx,
> >> LLVMValueRef v)
> >> > return LLVMBuildBitCast(ctx->builder, v, 
> >> ac_to_float_type(ctx,
> >> 

Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 21:48 schrieb Marek Olšák:
> Hi,
> 
> This is the second and hopefully final version of 32-bit pointer
> support for radeonsi.
> 
> Constant buffer 0 now has restrictions on which buffers can be set
> in that slot.
> 
> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
> will be accepted there).
> 
> There will also be a dependency on new libdrm (not included in this
> series).
> 
> Please review.
> 

From a api cleanliness point of view, I don't like this much.
First, you're making the hack case the default and even require it. IMHO
a driver should be able to bind ordinary UBOs to all buffer slots. This
is really not a nice burden to put on state trackers to do something
special for just slot 0. The gallium API should stay reasonable imho,
that's a bit too much custom tailoring for GL for my liking.

Maybe I'm missing something but I can't quite see why you can't handle
this transparently inside the driver. Can't you just create a different
shader depending on what kind of buffer is bound or what's the problem?
(You wouldn't expect it to change therefore you should not have to
recompile.)

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
This series actually has 10 patches. See 8/7, 9/7, 10/7.

Marek

On Fri, Feb 2, 2018 at 9:48 PM, Marek Olšák  wrote:

> Hi,
>
> This is the second and hopefully final version of 32-bit pointer
> support for radeonsi.
>
> Constant buffer 0 now has restrictions on which buffers can be set
> in that slot.
>
> I plan to push this when my LLVM patch lands in 6.0 (hopefully it
> will be accepted there).
>
> There will also be a dependency on new libdrm (not included in this
> series).
>
> Please review.
>
> Marek
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 9/7] radeonsi: disallow constant buffers with a 64-bit address in slot 0

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

State trackers must use a user buffer or const_uploader,
or set pipe_resource::flags same as const_uploader->flags.

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 6 ++
 src/gallium/drivers/radeonsi/si_get.c | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 98086a7..393053c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1268,20 +1268,26 @@ void si_set_rw_buffer(struct si_context *sctx,
 
 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
enum pipe_shader_type shader, uint slot,
const struct pipe_constant_buffer 
*input)
 {
struct si_context *sctx = (struct si_context *)ctx;
 
if (shader >= SI_NUM_SHADERS)
return;
 
+   if (slot == 0 && input && input->buffer &&
+   !(r600_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {
+   assert(!"constant buffer 0 must have a 32-bit VM address, use 
const_uploader");
+   return;
+   }
+
slot = si_get_constbuf_slot(slot);
si_set_constant_buffer(sctx, >const_and_shader_buffers[shader],
   
si_const_and_shader_buffer_descriptors_idx(shader),
   slot, input);
 }
 
 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
 uint slot, struct pipe_constant_buffer *cbuf)
 {
cbuf->user_buffer = NULL;
diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index 7465262..541bb24 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -261,26 +261,28 @@ static int si_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
-   case PIPE_CAP_CONSTBUF0_FLAGS:
return 0;
 
case PIPE_CAP_FENCE_SIGNAL:
return sscreen->info.has_syncobj;
 
+   case PIPE_CAP_CONSTBUF0_FLAGS:
+   return R600_RESOURCE_FLAG_32BIT;
+
case PIPE_CAP_NATIVE_FENCE_FD:
return sscreen->info.has_fence_to_handle;
 
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return si_have_tgsi_compute(sscreen);
 
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
return sscreen->has_draw_indirect_multi;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/7] radeonsi: move const_uploader allocations to 32-bit address space

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Reviewed-by: Samuel Pitoiset 
---

Continuation of the 32-bit pointer series.

 src/gallium/drivers/radeon/r600_buffer_common.c | 3 +++
 src/gallium/drivers/radeon/r600_pipe_common.c   | 5 +++--
 src/gallium/drivers/radeon/r600_pipe_common.h   | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index aca536d..2d64eed 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -170,20 +170,23 @@ void si_init_resource_fields(struct si_screen *sscreen,
res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
else
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
if (sscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;
 
if (res->b.b.flags & R600_RESOURCE_FLAG_READ_ONLY)
res->flags |= RADEON_FLAG_READ_ONLY;
 
+   if (res->b.b.flags & R600_RESOURCE_FLAG_32BIT)
+   res->flags |= RADEON_FLAG_32BIT;
+
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
res->max_forced_staging_uploads = 0;
res->b.max_forced_staging_uploads = 0;
 
if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;
 
res->max_forced_staging_uploads =
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9e45a9f..d46cb64 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -445,22 +445,23 @@ bool si_common_context_init(struct r600_common_context 
*rctx,
return false;
 
rctx->b.stream_uploader = u_upload_create(>b, 1024 * 1024,
  0, PIPE_USAGE_STREAM,
  R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;
 
rctx->b.const_uploader = u_upload_create(>b, 128 * 1024,
 0, PIPE_USAGE_DEFAULT,
-
sscreen->cpdma_prefetch_writes_memory ?
-   0 : 
R600_RESOURCE_FLAG_READ_ONLY);
+R600_RESOURCE_FLAG_32BIT |
+
(sscreen->cpdma_prefetch_writes_memory ?
+   0 : 
R600_RESOURCE_FLAG_READ_ONLY));
if (!rctx->b.const_uploader)
return false;
 
rctx->cached_gtt_allocator = u_upload_create(>b, 16 * 1024,
 0, PIPE_USAGE_STAGING, 0);
if (!rctx->cached_gtt_allocator)
return false;
 
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index e2cd6c60..7941903 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,20 +47,21 @@
 struct u_log_context;
 struct si_screen;
 struct si_context;
 
 #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV << 
0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 
3)
 #define R600_RESOURCE_FLAG_UNMAPPABLE  (PIPE_RESOURCE_FLAG_DRV_PRIV << 
4)
 #define R600_RESOURCE_FLAG_READ_ONLY   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
5)
+#define R600_RESOURCE_FLAG_32BIT   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
6)
 
 /* Debug flags. */
 enum {
/* Shader logging options: */
DBG_VS = PIPE_SHADER_VERTEX,
DBG_PS = PIPE_SHADER_FRAGMENT,
DBG_GS = PIPE_SHADER_GEOMETRY,
DBG_TCS = PIPE_SHADER_TESS_CTRL,
DBG_TES = PIPE_SHADER_TESS_EVAL,
DBG_CS = PIPE_SHADER_COMPUTE,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/7] radeonsi: implement 32-bit pointers in user data SGPRs (v2)

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

SGPRS: 2170102 -> 2158430 (-0.54 %)
VGPRS: 1645656 -> 1641516 (-0.25 %)
Spilled SGPRs: 9078 -> 8810 (-2.95 %)
Spilled VGPRs: 130 -> 114 (-12.31 %)
Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread
Code Size: 52094872 -> 52692540 (1.15 %) bytes
Max Waves: 371848 -> 372723 (0.24 %)

v2: - the shader cache needs to take address32_hi into account
- set amdgpu-32bit-address-high-bits

Reviewed-by: Samuel Pitoiset  (v1)
---
 src/amd/common/ac_llvm_build.c|  13 +++
 src/amd/common/ac_llvm_build.h|   5 +
 src/gallium/drivers/radeonsi/si_descriptors.c |  10 +-
 src/gallium/drivers/radeonsi/si_pipe.c|  16 ++-
 src/gallium/drivers/radeonsi/si_shader.c  | 118 ++
 src/gallium/drivers/radeonsi/si_shader.h  |  23 -
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |   8 +-
 7 files changed, 137 insertions(+), 56 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 3efcaa1..e1ec81f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context,
ctx->context = context;
ctx->module = NULL;
ctx->builder = NULL;
 
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+   ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
@@ -132,21 +133,24 @@ unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
LLVMTypeKind kind = LLVMGetTypeKind(type);
 
switch (kind) {
case LLVMIntegerTypeKind:
return LLVMGetIntTypeWidth(type) / 8;
case LLVMFloatTypeKind:
return 4;
case LLVMDoubleTypeKind:
+   return 8;
case LLVMPointerTypeKind:
+   if (LLVMGetPointerAddressSpace(type) == 
AC_CONST_32BIT_ADDR_SPACE)
+   return 4;
return 8;
case LLVMVectorTypeKind:
return LLVMGetVectorSize(type) *
   ac_get_type_size(LLVMGetElementType(type));
case LLVMArrayTypeKind:
return LLVMGetArrayLength(type) *
   ac_get_type_size(LLVMGetElementType(type));
default:
assert(0);
return 0;
@@ -1982,10 +1986,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
   LLVMIntEQ, src0,
   ctx->i32_0, ""),
   LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }
 
 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
 {
return LLVMPointerType(LLVMArrayType(elem_type, 0),
   AC_CONST_ADDR_SPACE);
 }
+
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
+{
+   if (!HAVE_32BIT_POINTERS)
+   return ac_array_in_const_addr_space(elem_type);
+
+   return LLVMPointerType(LLVMArrayType(elem_type, 0),
+  AC_CONST_32BIT_ADDR_SPACE);
+}
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 35f849a..116037a 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -27,36 +27,40 @@
 
 #include 
 #include 
 
 #include "amd_family.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600)
+
 enum {
AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that 
selects SMEM loads */
AC_LOCAL_ADDR_SPACE = 3,
+   AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type 
has 32 bits */
 };
 
 struct ac_llvm_context {
LLVMContextRef context;
LLVMModuleRef module;
LLVMBuilderRef builder;
 
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
+   LLVMTypeRef intptr;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
LLVMTypeRef v2i16;
LLVMTypeRef v2i32;

[Mesa-dev] [PATCH 0/7] RadeonSI 32-bit pointers v2 & Gallium changes

2018-02-02 Thread Marek Olšák
Hi,

This is the second and hopefully final version of 32-bit pointer
support for radeonsi.

Constant buffer 0 now has restrictions on which buffers can be set
in that slot.

I plan to push this when my LLVM patch lands in 6.0 (hopefully it
will be accepted there).

There will also be a dependency on new libdrm (not included in this
series).

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] gallium: allow drivers to impose BO flags restrictions on constant buffer 0

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Required by radeonsi for optimal behavior.
---
 src/gallium/docs/source/screen.rst   | 4 
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_get.c| 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/vc5/vc5_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 18 files changed, 21 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index cb3418f..cc45ffe 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -407,20 +407,24 @@ The integer capabilities:
   tracker works it out.
 * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
   Whether pipe_vertex_buffer::buffer_offset is treated as signed. The u_vbuf
   module needs this for optimal performance in workstation applications.
 * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support per-context
   priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
   supported priority levels.  A driver that does not support prioritized
   contexts can return 0.
 * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling semaphores
   using fence_server_signal().
+* ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that must be
+  set when binding that buffer as constant buffer 0. If the buffer doesn't have
+  those bits set, pipe_context::set_constant_buffer(.., 0, ..) is ignored
+  by the driver, and the driver can throw assertion failures.
 
 
 .. _pipe_capf:
 
 PIPE_CAPF_*
 
 
 The floating-point capabilities are:
 
 * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index d5d1f4f..aa6ab2a 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -260,20 +260,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
case PIPE_CAP_FENCE_SIGNAL:
+   case PIPE_CAP_CONSTBUF0_FLAGS:
   return 0;
 
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
   return 0;
 
/* Geometry shader output, unsupported. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 438817d..073cc6c 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -330,20 +330,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
case PIPE_CAP_FENCE_SIGNAL:
+   case PIPE_CAP_CONSTBUF0_FLAGS:
return 0;
 
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
return screen->priority_mask;
 
case PIPE_CAP_DRAW_INDIRECT:
if (is_a4xx(screen) || is_a5xx(screen))
return 1;
return 0;
 
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index ac3621f..345e82b 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -318,20 +318,21 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
case PIPE_CAP_QUERY_SO_OVERFLOW:
case 

[Mesa-dev] [PATCH 6/7] winsys/radeon: add struct radeon_vm_heap

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 63 ---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  9 ++--
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 11 ++--
 3 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 7aef238..bbfe5cc 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -191,146 +191,148 @@ static enum radeon_bo_domain 
radeon_bo_get_initial_domain(
 fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
 bo, bo->handle);
 /* Default domain as returned by get_valid_domain. */
 return RADEON_DOMAIN_VRAM_GTT;
 }
 
 /* GEM domains and winsys domains are defined the same. */
 return get_valid_domain(args.value);
 }
 
-static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
+static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
+ struct radeon_vm_heap *heap,
  uint64_t size, uint64_t alignment)
 {
 struct radeon_bo_va_hole *hole, *n;
 uint64_t offset = 0, waste = 0;
 
 /* All VM address space holes will implicitly start aligned to the
  * size alignment, so we don't need to sanitize the alignment here
  */
-size = align(size, rws->info.gart_page_size);
+size = align(size, info->gart_page_size);
 
-mtx_lock(>bo_va_mutex);
+mtx_lock(>mutex);
 /* first look for a hole */
-LIST_FOR_EACH_ENTRY_SAFE(hole, n, >va_holes, list) {
+LIST_FOR_EACH_ENTRY_SAFE(hole, n, >holes, list) {
 offset = hole->offset;
 waste = offset % alignment;
 waste = waste ? alignment - waste : 0;
 offset += waste;
 if (offset >= (hole->offset + hole->size)) {
 continue;
 }
 if (!waste && hole->size == size) {
 offset = hole->offset;
 list_del(>list);
 FREE(hole);
-mtx_unlock(>bo_va_mutex);
+mtx_unlock(>mutex);
 return offset;
 }
 if ((hole->size - waste) > size) {
 if (waste) {
 n = CALLOC_STRUCT(radeon_bo_va_hole);
 n->size = waste;
 n->offset = hole->offset;
 list_add(>list, >list);
 }
 hole->size -= (size + waste);
 hole->offset += size + waste;
-mtx_unlock(>bo_va_mutex);
+mtx_unlock(>mutex);
 return offset;
 }
 if ((hole->size - waste) == size) {
 hole->size = waste;
-mtx_unlock(>bo_va_mutex);
+mtx_unlock(>mutex);
 return offset;
 }
 }
 
-offset = rws->va_offset;
+offset = heap->start;
 waste = offset % alignment;
 waste = waste ? alignment - waste : 0;
 if (waste) {
 n = CALLOC_STRUCT(radeon_bo_va_hole);
 n->size = waste;
 n->offset = offset;
-list_add(>list, >va_holes);
+list_add(>list, >holes);
 }
 offset += waste;
-rws->va_offset += size + waste;
-mtx_unlock(>bo_va_mutex);
+heap->start += size + waste;
+mtx_unlock(>mutex);
 return offset;
 }
 
-static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
+static void radeon_bomgr_free_va(const struct radeon_info *info,
+ struct radeon_vm_heap *heap,
  uint64_t va, uint64_t size)
 {
 struct radeon_bo_va_hole *hole = NULL;
 
-size = align(size, rws->info.gart_page_size);
+size = align(size, info->gart_page_size);
 
-mtx_lock(>bo_va_mutex);
-if ((va + size) == rws->va_offset) {
-rws->va_offset = va;
+mtx_lock(>mutex);
+if ((va + size) == heap->start) {
+heap->start = va;
 /* Delete uppermost hole if it reaches the new top */
-if (!LIST_IS_EMPTY(>va_holes)) {
-hole = container_of(rws->va_holes.next, hole, list);
+if (!LIST_IS_EMPTY(>holes)) {
+hole = container_of(heap->holes.next, hole, list);
 if ((hole->offset + hole->size) == va) {
-rws->va_offset = hole->offset;
+heap->start = hole->offset;
 list_del(>list);
 FREE(hole);
 }
 }
 } else {
 struct radeon_bo_va_hole *next;
 
-hole = container_of(>va_holes, hole, list);
-LIST_FOR_EACH_ENTRY(next, >va_holes, list) {
+hole = container_of(>holes, hole, list);
+LIST_FOR_EACH_ENTRY(next, >holes, list) {
if (next->offset < va)
break;
 hole = next;
 }
 
-if (>list != >va_holes) {
+if (>list != 

[Mesa-dev] [PATCH 5/7] winsys/amdgpu: enable 32-bit VM allocations

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 5d565ff..8ce131c 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -430,21 +430,22 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
   fprintf(stderr, "amdgpu:size  : %"PRIu64" bytes\n", size);
   fprintf(stderr, "amdgpu:alignment : %u bytes\n", alignment);
   fprintf(stderr, "amdgpu:domains   : %u\n", initial_domain);
   goto error_bo_alloc;
}
 
va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
if (size > ws->info.pte_fragment_size)
   alignment = MAX2(alignment, ws->info.pte_fragment_size);
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size + va_gap_size, alignment, 0, , 
_handle, 0);
+ size + va_gap_size, alignment, 0, , _handle,
+ flags & RADEON_FLAG_32BIT ? 
AMDGPU_VA_RANGE_32_BIT : 0);
if (r)
   goto error_va_alloc;
 
unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_PAGE_EXECUTABLE;
 
if (!(flags & RADEON_FLAG_READ_ONLY))
vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
 
r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] ac: query high bits of 32-bit address space

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c | 7 +++
 src/amd/common/ac_gpu_info.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 6d9dcb5..b5b059e 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -219,20 +219,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
 
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0,
_version, _feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) 
failed.\n");
return false;
}
 
+   r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, 
>address32_hi);
+   if (r) {
+   fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) 
failed.\n");
+   return false;
+   }
+
/* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
info->vce_harvest_config = amdinfo->vce_harvest_config;
 
switch (info->pci_id) {
 #define CHIPSET(pci_id, cfamily) case pci_id: info->family = CHIP_##cfamily; 
break;
 #include "pci_ids/radeonsi_pci_ids.h"
 #undef CHIPSET
 
default:
@@ -364,20 +370,21 @@ void ac_print_gpu_info(struct radeon_info *info)
printf("family = %i\n", info->family);
printf("chip_class = %i\n", info->chip_class);
printf("pte_fragment_size = %u\n", info->pte_fragment_size);
printf("gart_page_size = %u\n", info->gart_page_size);
printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 
1024*1024));
printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 
1024*1024));
printf("vram_vis_size = %i MB\n", 
(int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024));
printf("max_alloc_size = %i MB\n",
   (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024));
printf("min_alloc_size = %u\n", info->min_alloc_size);
+   printf("address32_hi = %u\n", info->address32_hi);
printf("has_dedicated_vram = %u\n", info->has_dedicated_vram);
printf("has_virtual_memory = %i\n", info->has_virtual_memory);
printf("gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
printf("has_hw_decode = %u\n", info->has_hw_decode);
printf("num_sdma_rings = %i\n", info->num_sdma_rings);
printf("num_compute_rings = %u\n", info->num_compute_rings);
printf("uvd_fw_version = %u\n", info->uvd_fw_version);
printf("vce_fw_version = %u\n", info->vce_fw_version);
printf("me_fw_version = %i\n", info->me_fw_version);
printf("me_fw_feature = %i\n", info->me_fw_feature);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index cca3e98..ae42aff 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -50,20 +50,21 @@ struct radeon_info {
uint32_tpci_id;
enum radeon_family  family;
enum chip_class chip_class;
uint32_tpte_fragment_size;
uint32_tgart_page_size;
uint64_tgart_size;
uint64_tvram_size;
uint64_tvram_vis_size;
uint64_tmax_alloc_size;
uint32_tmin_alloc_size;
+   uint32_taddress32_hi;
boolhas_dedicated_vram;
boolhas_virtual_memory;
boolgfx_ib_pad_with_type2;
boolhas_hw_decode;
uint32_tnum_sdma_rings;
uint32_tnum_compute_rings;
uint32_tuvd_fw_version;
uint32_tvce_fw_version;
uint32_tme_fw_version;
uint32_tme_fw_feature;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/7] winsys/radeon: implement and enable 32-bit VM allocations

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 42 +++
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 28 ++-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |  2 ++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index bbfe5cc..06842a4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -242,32 +242,54 @@ static uint64_t radeon_bomgr_find_va(const struct 
radeon_info *info,
 if ((hole->size - waste) == size) {
 hole->size = waste;
 mtx_unlock(>mutex);
 return offset;
 }
 }
 
 offset = heap->start;
 waste = offset % alignment;
 waste = waste ? alignment - waste : 0;
+
+if (offset + waste + size > heap->end) {
+mtx_unlock(>mutex);
+return 0;
+}
+
 if (waste) {
 n = CALLOC_STRUCT(radeon_bo_va_hole);
 n->size = waste;
 n->offset = offset;
 list_add(>list, >holes);
 }
 offset += waste;
 heap->start += size + waste;
 mtx_unlock(>mutex);
 return offset;
 }
 
+static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
+   uint64_t size, uint64_t alignment)
+{
+uint64_t va = 0;
+
+/* Try to allocate from the 64-bit address space first.
+ * If it doesn't exist (start = 0) or if it doesn't have enough space,
+ * fall back to the 32-bit address space.
+ */
+if (ws->vm64.start)
+va = radeon_bomgr_find_va(>info, >vm64, size, alignment);
+if (!va)
+va = radeon_bomgr_find_va(>info, >vm32, size, alignment);
+return va;
+}
+
 static void radeon_bomgr_free_va(const struct radeon_info *info,
  struct radeon_vm_heap *heap,
  uint64_t va, uint64_t size)
 {
 struct radeon_bo_va_hole *hole = NULL;
 
 size = align(size, info->gart_page_size);
 
 mtx_lock(>mutex);
 if ((va + size) == heap->start) {
@@ -363,21 +385,23 @@ void radeon_bo_destroy(struct pb_buffer *_buf)
 
 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, ,
sizeof(va)) != 0 &&
va.operation == RADEON_VA_RESULT_ERROR) {
 fprintf(stderr, "radeon: Failed to deallocate virtual address 
for buffer:\n");
 fprintf(stderr, "radeon:size  : %"PRIu64" bytes\n", 
bo->base.size);
 fprintf(stderr, "radeon:va: 0x%"PRIx64"\n", 
bo->va);
 }
}
 
-   radeon_bomgr_free_va(>info, >vm64, bo->va, bo->base.size);
+   radeon_bomgr_free_va(>info,
+ bo->va < rws->vm32.end ? >vm32 : >vm64,
+ bo->va, bo->base.size);
 }
 
 /* Close object. */
 args.handle = bo->handle;
 drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, );
 
 mtx_destroy(>u.real.map_mutex);
 
 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
 rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
@@ -653,22 +677,28 @@ static struct radeon_bo *radeon_create_bo(struct 
radeon_drm_winsys *rws,
 if (heap >= 0) {
 pb_cache_init_entry(>bo_cache, >u.real.cache_entry, >base,
 heap);
 }
 
 if (rws->info.has_virtual_memory) {
 struct drm_radeon_gem_va va;
 unsigned va_gap_size;
 
 va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
-bo->va = radeon_bomgr_find_va(>info, >vm64,
-  size + va_gap_size, alignment);
+
+if (flags & RADEON_FLAG_32BIT) {
+bo->va = radeon_bomgr_find_va(>info, >vm32,
+  size + va_gap_size, alignment);
+assert(bo->va + size < rws->vm32.end);
+} else {
+bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, 
alignment);
+}
 
 va.handle = bo->handle;
 va.vm_id = 0;
 va.operation = RADEON_VA_MAP;
 va.flags = RADEON_VM_PAGE_READABLE |
RADEON_VM_PAGE_WRITEABLE |
RADEON_VM_PAGE_SNOOPED;
 va.offset = bo->va;
 r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, , sizeof(va));
 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
@@ -1055,22 +1085,21 @@ static struct pb_buffer 
*radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
 bo->hash = __sync_fetch_and_add(>next_bo_hash, 1);
 (void) mtx_init(>u.real.map_mutex, mtx_plain);
 
 util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
 
 mtx_unlock(>bo_handles_mutex);
 
 if (ws->info.has_virtual_memory) {
 struct drm_radeon_gem_va va;
 
- 

[Mesa-dev] [PATCH 2/7] gallium: use PIPE_CAP_CONSTBUF0_FLAGS

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_inlines.h  | 21 +
 src/gallium/auxiliary/vl/vl_compositor.c|  2 +-
 src/gallium/drivers/radeonsi/si_pipe.c  |  2 +-
 src/gallium/state_trackers/xa/xa_renderer.c |  7 ---
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 4ba6ad7..4bd9b7e 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -277,20 +277,41 @@ pipe_buffer_create( struct pipe_screen *screen,
buffer.usage = usage;
buffer.flags = 0;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
buffer.array_size = 1;
return screen->resource_create(screen, );
 }
 
 
+static inline struct pipe_resource *
+pipe_buffer_create_const0(struct pipe_screen *screen,
+  unsigned bind,
+  enum pipe_resource_usage usage,
+  unsigned size)
+{
+   struct pipe_resource buffer;
+   memset(, 0, sizeof buffer);
+   buffer.target = PIPE_BUFFER;
+   buffer.format = PIPE_FORMAT_R8_UNORM;
+   buffer.bind = bind;
+   buffer.usage = usage;
+   buffer.flags = screen->get_param(screen, PIPE_CAP_CONSTBUF0_FLAGS);
+   buffer.width0 = size;
+   buffer.height0 = 1;
+   buffer.depth0 = 1;
+   buffer.array_size = 1;
+   return screen->resource_create(screen, );
+}
+
+
 /**
  * Map a range of a resource.
  * \param offset  start of region, in bytes 
  * \param length  size of region, in bytes 
  * \param access  bitmask of PIPE_TRANSFER_x flags
  * \param transfer  returns a transfer object
  */
 static inline void *
 pipe_buffer_map_range(struct pipe_context *pipe,
  struct pipe_resource *buffer,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 67ad7f5..725bfd9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1430,21 +1430,21 @@ vl_compositor_init_state(struct vl_compositor_state *s, 
struct pipe_context *pip
s->pipe = pipe;
 
s->clear_color.f[0] = s->clear_color.f[1] = 0.0f;
s->clear_color.f[2] = s->clear_color.f[3] = 0.0f;
 
/*
 * Create our fragment shader's constant buffer
 * Const buffer contains the color conversion matrix and bias vectors
 */
/* XXX: Create with IMMUTABLE/STATIC... although it does change every once 
in a long while... */
-   s->csc_matrix = pipe_buffer_create
+   s->csc_matrix = pipe_buffer_create_const0
(
   pipe->screen,
   PIPE_BIND_CONSTANT_BUFFER,
   PIPE_USAGE_DEFAULT,
   sizeof(csc_matrix) + 2*sizeof(float)
);
 
if (!s->csc_matrix)
   return false;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 26835d6..1a5d598 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -580,21 +580,21 @@ static void si_handle_env_var_force_family(struct 
si_screen *sscreen)
 
fprintf(stderr, "radeonsi: Unknown family: %s\n", family);
exit(1);
 }
 
 static void si_test_vmfault(struct si_screen *sscreen)
 {
struct pipe_context *ctx = sscreen->aux_context;
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_resource *buf =
-   pipe_buffer_create(>b, 0, PIPE_USAGE_DEFAULT, 64);
+   pipe_buffer_create_const0(>b, 0, PIPE_USAGE_DEFAULT, 
64);
 
if (!buf) {
puts("Buffer allocation failed.");
exit(1);
}
 
r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
 
if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {
si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0);
diff --git a/src/gallium/state_trackers/xa/xa_renderer.c 
b/src/gallium/state_trackers/xa/xa_renderer.c
index bc55f87..27497d3 100644
--- a/src/gallium/state_trackers/xa/xa_renderer.c
+++ b/src/gallium/state_trackers/xa/xa_renderer.c
@@ -386,23 +386,24 @@ renderer_bind_destination(struct xa_context *r,
 
 void
 renderer_set_constants(struct xa_context *r,
   int shader_type, const float *params, int param_bytes)
 {
 struct pipe_resource **cbuf =
(shader_type == PIPE_SHADER_VERTEX) ? >vs_const_buffer :
>fs_const_buffer;
 
 pipe_resource_reference(cbuf, NULL);
-*cbuf = pipe_buffer_create(r->pipe->screen,
-  PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT,
-  param_bytes);
+*cbuf = pipe_buffer_create_const0(r->pipe->screen,
+  PIPE_BIND_CONSTANT_BUFFER,
+  PIPE_USAGE_DEFAULT,
+  param_bytes);
 
 if (*cbuf) {
pipe_buffer_write(r->pipe, *cbuf, 0, param_bytes, params);
 }
 

[Mesa-dev] [PATCH 4/7] gallium/radeon: add 32-bit address space heaps

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Reviewed-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeon/radeon_winsys.h | 47 --
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 238c921..0a56539 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -46,20 +46,21 @@ enum radeon_bo_domain { /* bitfield */
 RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
 };
 
 enum radeon_bo_flag { /* bitfield */
 RADEON_FLAG_GTT_WC =(1 << 0),
 RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
 RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
 RADEON_FLAG_SPARSE =(1 << 3),
 RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
 RADEON_FLAG_READ_ONLY = (1 << 5),
+RADEON_FLAG_32BIT =(1 << 6),
 };
 
 enum radeon_bo_usage { /* bitfield */
 RADEON_USAGE_READ = 2,
 RADEON_USAGE_WRITE = 4,
 RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
 
 /* The winsys ensures that the CS submission will be scheduled after
  * previously flushed CSs referencing this BO in a conflicting way.
  */
@@ -665,37 +666,45 @@ static inline void radeon_emit(struct radeon_winsys_cs 
*cs, uint32_t value)
 static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
 const uint32_t *values, unsigned count)
 {
 memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
 cs->current.cdw += count;
 }
 
 enum radeon_heap {
 RADEON_HEAP_VRAM_NO_CPU_ACCESS,
 RADEON_HEAP_VRAM_READ_ONLY,
+RADEON_HEAP_VRAM_READ_ONLY_32BIT,
+RADEON_HEAP_VRAM_32BIT,
 RADEON_HEAP_VRAM,
 RADEON_HEAP_GTT_WC,
 RADEON_HEAP_GTT_WC_READ_ONLY,
+RADEON_HEAP_GTT_WC_READ_ONLY_32BIT,
+RADEON_HEAP_GTT_WC_32BIT,
 RADEON_HEAP_GTT,
 RADEON_MAX_SLAB_HEAPS,
 RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
 };
 
 static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap 
heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 case RADEON_HEAP_VRAM_READ_ONLY:
+case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+case RADEON_HEAP_VRAM_32BIT:
 case RADEON_HEAP_VRAM:
 return RADEON_DOMAIN_VRAM;
 case RADEON_HEAP_GTT_WC:
 case RADEON_HEAP_GTT_WC_READ_ONLY:
+case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
+case RADEON_HEAP_GTT_WC_32BIT:
 case RADEON_HEAP_GTT:
 return RADEON_DOMAIN_GTT;
 default:
 assert(0);
 return (enum radeon_bo_domain)0;
 }
 }
 
 static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
 {
@@ -705,20 +714,31 @@ static inline unsigned radeon_flags_from_heap(enum 
radeon_heap heap)
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 return flags |
RADEON_FLAG_NO_CPU_ACCESS;
 
 case RADEON_HEAP_VRAM_READ_ONLY:
 case RADEON_HEAP_GTT_WC_READ_ONLY:
 return flags |
RADEON_FLAG_READ_ONLY;
 
+case RADEON_HEAP_VRAM_READ_ONLY_32BIT:
+case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT:
+return flags |
+   RADEON_FLAG_READ_ONLY |
+   RADEON_FLAG_32BIT;
+
+case RADEON_HEAP_VRAM_32BIT:
+case RADEON_HEAP_GTT_WC_32BIT:
+return flags |
+   RADEON_FLAG_32BIT;
+
 case RADEON_HEAP_VRAM:
 case RADEON_HEAP_GTT_WC:
 case RADEON_HEAP_GTT:
 default:
 return flags;
 }
 }
 
 /* Return the heap index for winsys allocators, or -1 on failure. */
 static inline int radeon_get_heap_index(enum radeon_bo_domain domain,
@@ -730,46 +750,67 @@ static inline int radeon_get_heap_index(enum 
radeon_bo_domain domain,
 assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == 
RADEON_DOMAIN_VRAM);
 
 /* Resources with interprocess sharing don't use any winsys allocators. */
 if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
 return -1;
 
 /* Unsupported flags: NO_SUBALLOC, SPARSE. */
 if (flags & ~(RADEON_FLAG_GTT_WC |
   RADEON_FLAG_NO_CPU_ACCESS |
   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-  RADEON_FLAG_READ_ONLY))
+  RADEON_FLAG_READ_ONLY |
+  RADEON_FLAG_32BIT))
 return -1;
 
 switch (domain) {
 case RADEON_DOMAIN_VRAM:
-switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY)) {
+switch (flags & (RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_32BIT)) {
+case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | 
RADEON_FLAG_32BIT:
 case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY:
 assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense");
 return -1;
+case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT:
+

Re: [Mesa-dev] [PATCH] autotools: Only build libmesa-st-tests-common.a for tests.

2018-02-02 Thread Gert Wollny
Thanks, my automake knowledge is a bit rusty so that I didn't know
about this option to limit the build to "make check".

Reviewed-By: Gert Wollny 

Am Freitag, den 02.02.2018, 16:59 +0100 schrieb Bas Nieuwenhuizen:
> We don't need the library if we don't build tests, and building
> it adds a dependency on gtest which adds a dependency on cxxabi.h.
> 
> Fixes: 6569b33b6e "mesa/st/tests: unify MockCodeLine* classes"
> ---
>  src/mesa/state_tracker/tests/Makefile.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/state_tracker/tests/Makefile.am
> b/src/mesa/state_tracker/tests/Makefile.am
> index 3c7993dc8d..9ac2815c91 100644
> --- a/src/mesa/state_tracker/tests/Makefile.am
> +++ b/src/mesa/state_tracker/tests/Makefile.am
> @@ -19,7 +19,7 @@ if HAVE_STD_CXX11
>  TESTS = st-renumerate-test
>  check_PROGRAMS = st-renumerate-test
>  
> -noinst_LIBRARIES = libmesa-st-tests-common.a
> +check_LIBRARIES = libmesa-st-tests-common.a
>  endif
>  
>  libmesa_st_tests_common_a_SOURCES = \
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Bas Nieuwenhuizen
I'd prefer if Dylan's patch goes first, as we are bumping swr too otherwise.

Though either way:

Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 2, 2018 at 8:15 PM, Dylan Baker  wrote:
> FYI: I sent out a patch a few minutes ago to set the minimum version more like
> autotools does, but I'm fine with this landing first.
>
> Reviewed-by: Dylan Baker 
>
> Quoting Marek Olšák (2018-02-02 11:07:57)
>> Adding this:
>>
>> diff --git a/meson.build b/meson.build
>> index 80ea60f..22eb702 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -991,12 +991,12 @@ endif
>>  _llvm = get_option('llvm')
>>  if _llvm == 'auto'
>>dep_llvm = dependency(
>> -'llvm', version : '>= 3.9.0', modules : llvm_modules,
>> +'llvm', version : '>= 4.0.0', modules : llvm_modules,
>>  required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
>> with_gallium_opencl,
>>)
>>with_llvm = dep_llvm.found()
>>  elif _llvm == 'true'
>> -  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : 
>> llvm_modules)
>> +  dep_llvm = dependency('llvm', version : '>= 4.0.0', modules : 
>> llvm_modules)
>>with_llvm = true
>>  else
>>dep_llvm = []
>> @@ -1019,7 +1019,7 @@ if with_llvm
>>  '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
>>]
>>  elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
>> -  error('The following drivers requires LLVM: Radv, RadeonSI, SWR. One of
>> these is enabled, but LLVM is disabled.')
>> +  error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of 
>> these
>> is enabled, but LLVM is disabled.')
>>  endif
>>
>>  dep_glvnd = []
>>
>> Marek
>>
>> On Fri, Feb 2, 2018 at 8:02 PM, Bas Nieuwenhuizen 
>> wrote:
>>
>> Also change meson.build?
>>
>> On Fri, Feb 2, 2018 at 7:34 PM, Marek Olšák  wrote:
>> > From: Marek Olšák 
>> >
>> > Only these are supported:
>> > - LLVM 4.0
>> > - LLVM 5.0
>> > - LLVM 6.0
>> > - master (7.0)
>> > ---
>> >  configure.ac   |   4 +-
>> >  src/amd/common/ac_llvm_build.c | 187
>> ++---
>> >  src/amd/common/ac_llvm_helper.cpp  |  10 --
>> >  src/amd/common/ac_llvm_util.c  |  39 +
>> >  src/amd/common/ac_llvm_util.h  |  14 +-
>> >  src/amd/common/ac_nir_to_llvm.c|  32 +---
>> >  src/amd/vulkan/radv_device.c   |   4 -
>> >  src/gallium/drivers/radeonsi/si_compute.c  |   3 +-
>> >  src/gallium/drivers/radeonsi/si_get.c  |  13 +-
>> >  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
>> >  .../drivers/radeonsi/si_shader_tgsi_setup.c|   2 -
>> >  11 files changed, 72 insertions(+), 239 deletions(-)
>> >
>> > diff --git a/configure.ac b/configure.ac
>> > index a54b7cb..8ed606c 100644
>> > --- a/configure.ac
>> > +++ b/configure.ac
>> > @@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
>> >  XSHMFENCE_REQUIRED=1.1
>> >  XVMC_REQUIRED=1.0.6
>> >  PYTHON_MAKO_REQUIRED=0.8.0
>> >  LIBSENSORS_REQUIRED=4.0.0
>> >  ZLIB_REQUIRED=1.2.3
>> >
>> >  dnl LLVM versions
>> >  LLVM_REQUIRED_GALLIUM=3.3.0
>> >  LLVM_REQUIRED_OPENCL=3.9.0
>> >  LLVM_REQUIRED_R600=3.9.0
>> > -LLVM_REQUIRED_RADEONSI=3.9.0
>> > -LLVM_REQUIRED_RADV=3.9.0
>> > +LLVM_REQUIRED_RADEONSI=4.0.0
>> > +LLVM_REQUIRED_RADV=4.0.0
>> >  LLVM_REQUIRED_SWR=3.9.0
>> >
>> >  dnl Check for progs
>> >  AC_PROG_CPP
>> >  AC_PROG_CC
>> >  AC_PROG_CXX
>> >  dnl add this here, so the help for this environmnet variable is close 
>> to
>> >  dnl other CC/CXX flags related help
>> >  AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support
>> (only needed if not
>> >enabled by default and different  from
>> -std=c++11)])
>> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_
>> build.c
>> > index 6ede60a..3efcaa1 100644
>> > --- a/src/amd/common/ac_llvm_build.c
>> > +++ b/src/amd/common/ac_llvm_build.c
>> > @@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx,
>> LLVMValueRef v)
>> > return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx,
>> type), "");
>> >  }
>> >
>> >
>> >  LLVMValueRef
>> >  ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
>> >LLVMTypeRef return_type, LLVMValueRef *params,
>> >unsigned param_count, unsigned attrib_mask)
>> >  {
>> > LLVMValueRef function, call;
>> > -   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
>> > - !(attrib_mask & 

Re: [Mesa-dev] [PATCH] meson: Check for actual LLVM required versions

2018-02-02 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 2, 2018 at 7:55 PM, Dylan Baker  wrote:
> Currently we always check for 3.9.0, which is pretty safe since
> everything except radv work with >= 3.9 and 3.9 is pretty old at this
> point. However, radv actually requires 4.0, and there is a patch for
> radeonsi to do the same.
>
> Fixes: 673dda833076 ("meson: build "radv" vulkan driver for radeon hardware")
> Signed-off-by: Dylan Baker 
> ---
>  meson.build | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/meson.build b/meson.build
> index 80ea60ffa7d..0a030b92d0a 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -988,15 +988,23 @@ if with_gallium_opencl
># TODO: optional modules
>  endif
>
> +if with_amd_vk
> +  _llvm_version = '>= 4.0.0'
> +elif with_gallium_opencl or with_gallium_swr or with_gallium_r600 or 
> with_gallium_radeonsi
> +  _llvm_version = '>= 3.9.0'
> +else
> +  _llvm_version = '>= 3.3.0'
> +endif
> +
>  _llvm = get_option('llvm')
>  if _llvm == 'auto'
>dep_llvm = dependency(
> -'llvm', version : '>= 3.9.0', modules : llvm_modules,
> +'llvm', version : _llvm_version, modules : llvm_modules,
>  required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or 
> with_gallium_opencl,
>)
>with_llvm = dep_llvm.found()
>  elif _llvm == 'true'
> -  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : llvm_modules)
> +  dep_llvm = dependency('llvm', version : _llvm_version, modules : 
> llvm_modules)
>with_llvm = true
>  else
>dep_llvm = []
> --
> 2.16.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 9/9] meson: fix xvmc target linkage

2018-02-02 Thread Dylan Baker
This needs to link the state tracker with --whole-archive to expose the
right symbols.

Fixes: 22a817af8a89eb3c7 ("meson: build gallium xvmc state tracker")
Signed-off-by: Dylan Baker 
---
 src/gallium/targets/xvmc/meson.build | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/targets/xvmc/meson.build 
b/src/gallium/targets/xvmc/meson.build
index 07d6c72..4889942 100644
--- a/src/gallium/targets/xvmc/meson.build
+++ b/src/gallium/targets/xvmc/meson.build
@@ -24,6 +24,7 @@
 
 xvmc_link_args = []
 xvmc_link_depends = []
+xvmc_link_with = []
 xvmc_drivers = []
 
 if with_ld_version_script
@@ -31,6 +32,13 @@ if with_ld_version_script
   xvmc_link_depends += files('xvmc.sym')
 endif
 
+if with_dri
+  xvmc_link_with += libswdri
+endif
+if with_gallium_drisw_kms
+  xvmc_link_with += libswkmsdri
+endif
+
 libxvmc_gallium = shared_library(
   'XvMCgallium',
   'target.c',
@@ -40,9 +48,10 @@ libxvmc_gallium = shared_library(
   include_directories : [
 inc_common, inc_util, inc_gallium_winsys, inc_gallium_drivers,
   ],
+  link_whole : [libxvmc_st],
   link_with : [
-libxvmc_st, libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
-libpipe_loader_static, libws_null, libwsw,
+libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
+libpipe_loader_static, libws_null, libwsw, xvmc_link_with,
   ],
   dependencies : [dep_thread, driver_r600, driver_nouveau],
   link_depends : xvmc_link_depends,
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 8/9] meson: Fix xa target linkage

2018-02-02 Thread Dylan Baker
This needs to use --whole-archive (link_whole in meson) to properly
expose symbols.

Fixes: 0ba909f0f111824 ("meson: build gallium xa state tracker")
Signed-off-by: Dylan Baker 
---
 src/gallium/targets/xa/meson.build | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/targets/xa/meson.build 
b/src/gallium/targets/xa/meson.build
index 75808cd..7417a27 100644
--- a/src/gallium/targets/xa/meson.build
+++ b/src/gallium/targets/xa/meson.build
@@ -24,12 +24,20 @@
 
 xa_link_args = []
 xa_link_depends = []
+xa_link_with = []
 
 if with_ld_version_script
   xa_link_args += ['-Wl,--version-script', 
join_paths(meson.current_source_dir(), 'xa.sym')]
   xa_link_depends += files('xa.sym')
 endif
 
+if with_dri
+  xa_link_with += libswdri
+endif
+if with_gallium_drisw_kms
+  xa_link_with += libswkmsdri
+endif
+
 libxatracker = shared_library(
   'xatracker',
   'target.c',
@@ -39,9 +47,10 @@ libxatracker = shared_library(
   include_directories : [
 inc_common, inc_util, inc_gallium_winsys, inc_gallium_drivers,
   ],
+  link_whole : [libxa_st],
   link_with : [
-libxa_st, libgalliumvl_stub, libgallium, libmesa_util,
-libpipe_loader_static, libws_null, libwsw,
+libgalliumvl_stub, libgallium, libmesa_util, libpipe_loader_static,
+libws_null, libwsw, xa_link_with,
   ],
   link_depends : xa_link_depends,
   dependencies : [
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 7/9] meson: Fix omx-bellagio target linkage

2018-02-02 Thread Dylan Baker
This needs to use --whole-archive (link_whole in meson) to properly
expose symbols.

Fixes: 1d36dc674d528b93b ("meson: build gallium omx state tracker")
Signed-off-by: Dylan Baker 
---
 src/gallium/targets/omx-bellagio/meson.build | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/targets/omx-bellagio/meson.build 
b/src/gallium/targets/omx-bellagio/meson.build
index c9e8eb8..75f0f02 100644
--- a/src/gallium/targets/omx-bellagio/meson.build
+++ b/src/gallium/targets/omx-bellagio/meson.build
@@ -24,6 +24,14 @@
 
 omx_link_args = []
 omx_link_depends = []
+omx_link_with = []
+
+if with_dri
+  omx_link_with += libswdri
+endif
+if with_gallium_drisw_kms
+  omx_link_with += libswkmsdri
+endif
 
 if with_ld_version_script
   omx_link_args += ['-Wl,--version-script', 
join_paths(meson.current_source_dir(), 'omx.sym')]
@@ -39,9 +47,10 @@ libomx_gallium = shared_library(
   include_directories : [
 inc_common, inc_util, inc_gallium_winsys, inc_gallium_drivers,
   ],
+  link_whole : [libomx_st],
   link_with : [
-libomx_st, libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
-libpipe_loader_static, libws_null, libwsw,
+libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
+libpipe_loader_static, libws_null, libwsw, omx_link_with,
   ],
   link_depends : omx_link_depends,
   dependencies : [dep_thread, driver_r600, driver_radeonsi, driver_nouveau],
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 5/9] meson: fix vdpau target linkage

2018-02-02 Thread Dylan Baker
The VDPAU state tracker needs to be linked with whole-archive (autotools
does this). Because we are linking the whole archive we alos need to
link with libswdri and libswkmsdri if those have been enabled.

Fixes: 68076b87474e7959 ("meson: build gallium vdpau state tracker")
Signed-off-by: Dylan Baker 
---
 src/gallium/targets/vdpau/meson.build | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/targets/vdpau/meson.build 
b/src/gallium/targets/vdpau/meson.build
index a03f0ed..53af146 100644
--- a/src/gallium/targets/vdpau/meson.build
+++ b/src/gallium/targets/vdpau/meson.build
@@ -23,6 +23,7 @@
 # configure.ac)
 
 vdpau_link_args = []
+vdpau_link_with = []
 vdpau_link_depends = []
 vdpau_drivers = []
 
@@ -35,6 +36,13 @@ if with_ld_dynamic_list
   vdpau_link_depends += files('../dri-vdpau.dyn')
 endif
 
+if with_dri
+  vdpau_link_with += libswdri
+endif
+if with_gallium_drisw_kms
+  vdpau_link_with += libswkmsdri
+endif
+
 libvdpau_gallium = shared_library(
   'vdpau_gallium',
   'target.c',
@@ -44,9 +52,10 @@ libvdpau_gallium = shared_library(
   include_directories : [
 inc_common, inc_util, inc_gallium_winsys, inc_gallium_drivers,
   ],
+  link_whole : [libvdpau_st],
   link_with : [
-libvdpau_st, libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
-libpipe_loader_static, libws_null, libwsw,
+libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
+libpipe_loader_static, libws_null, libwsw, vdpau_link_with,
   ],
   dependencies : [
 dep_thread, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 6/9] meson: fix va target linkage

2018-02-02 Thread Dylan Baker
The state tracker needs to be linked with whole-archive (like
autotools). As a result there are symbols from libswdri and libswkmsdri
that are needed, so link those as well.

Fixes: 5a785d51a6d6 ("meson: build gallium va state tracker")
Signed-off-by: Dylan Baker 
---
 src/gallium/targets/va/meson.build | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/targets/va/meson.build 
b/src/gallium/targets/va/meson.build
index 1e453c9..d3999d7 100644
--- a/src/gallium/targets/va/meson.build
+++ b/src/gallium/targets/va/meson.build
@@ -24,6 +24,7 @@
 
 va_link_args = []
 va_link_depends = []
+va_link_with = []
 va_drivers = []
 
 if with_ld_version_script
@@ -31,6 +32,13 @@ if with_ld_version_script
   va_link_depends += files('va.sym')
 endif
 
+if with_dri
+  va_link_with += libswdri
+endif
+if with_gallium_drisw_kms
+  va_link_with += libswkmsdri
+endif
+
 libva_gallium = shared_library(
   'gallium_drv_video',
   'target.c',
@@ -40,9 +48,10 @@ libva_gallium = shared_library(
   include_directories : [
 inc_common, inc_util, inc_gallium_winsys, inc_gallium_drivers,
   ],
+  link_whole : [libva_st],
   link_with : [
-libva_st, libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
-libpipe_loader_static, libws_null, libwsw,
+libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
+libpipe_loader_static, libws_null, libwsw, va_link_with,
   ],
   dependencies : [
 dep_libdrm, dep_thread, driver_r600, driver_radeonsi, driver_nouveau,
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 0/9] meson: fix gallium media target linkage

2018-02-02 Thread Dylan Baker
The linkage of all of the gallium media targets is broken in various
ways in the meson build. This series should correct that by doing more
what the autotools build does.

You may note in the last 5 patches the addition of libswdri and
libswkmsdri, these are needed because we're now linking the whole state
tracker and the symbols in those libraries would otherwise be unresolved. In
this case as-needed and gc-sections were hiding this.

Changes since v2:
- Add patch to fix va-api version checking
- link xcb libs into vlwinsys instead of into each media state
  tracker/target
- Split the remaining bits into patches that fix one problem.

Dylan Baker (9):
  meson: use va-api version reported by pkg-config
  meson: link dri3 xcb libs into vlwinsys instead of into each target
  meson: actually link with libomxil-bellagio
  meson: Actually link xvmc target with libxvmc
  meson: fix vdpau target linkage
  meson: fix va target linkage
  meson: Fix omx-bellagio target linkage
  meson: Fix xa target linkage
  meson: fix xvmc target linkage

 meson.build  | 10 +-
 src/gallium/auxiliary/meson.build|  7 ++-
 src/gallium/state_trackers/va/meson.build|  6 +++---
 src/gallium/targets/omx-bellagio/meson.build | 18 --
 src/gallium/targets/va/meson.build   | 17 +++--
 src/gallium/targets/vdpau/meson.build| 16 
 src/gallium/targets/xa/meson.build   | 16 
 src/gallium/targets/xvmc/meson.build | 18 --
 8 files changed, 69 insertions(+), 39 deletions(-)

base-commit: d7235ef83b92175537e3b538634ffcff29bf0dce
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 1/9] meson: use va-api version reported by pkg-config

2018-02-02 Thread Dylan Baker
Fixes: 5a785d51a6d6 ("meson: build gallium va state tracker")
Signed-off-by: Dylan Baker 
---
 meson.build   | 2 +-
 src/gallium/state_trackers/va/meson.build | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/meson.build b/meson.build
index 323f706..4c6283c 100644
--- a/meson.build
+++ b/meson.build
@@ -533,7 +533,7 @@ with_gallium_va = _va == 'true'
 dep_va = []
 if with_gallium_va
   dep_va = dependency('libva', version : '>= 0.38.0')
-  dep_va = declare_dependency(
+  dep_va_headers = declare_dependency(
 compile_args : run_command(prog_pkgconfig, ['libva', 
'--cflags']).stdout().split()
   )
 endif
diff --git a/src/gallium/state_trackers/va/meson.build 
b/src/gallium/state_trackers/va/meson.build
index 56e68e9..35da5ab 100644
--- a/src/gallium/state_trackers/va/meson.build
+++ b/src/gallium/state_trackers/va/meson.build
@@ -1,4 +1,4 @@
-# Copyright © 2017 Intel Corporation
+# Copyright © 2017-2018 Intel Corporation
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -18,7 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-libva_version = ['2', '3', '0']
+libva_version = dep_va.version().split('.')
 
 libva_st = static_library(
   'va_st',
@@ -35,5 +35,5 @@ libva_st = static_library(
 ),
   ],
   include_directories : [inc_common],
-  dependencies : [dep_va, dep_x11_xcb, dep_xcb, dep_xcb_dri2, dep_xcb_dri3],
+  dependencies : [dep_va_headers, dep_x11_xcb, dep_xcb, dep_xcb_dri2, 
dep_xcb_dri3],
 )
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 3/9] meson: actually link with libomxil-bellagio

2018-02-02 Thread Dylan Baker
This state tracker actually needs to link, unlike vdpau.

Fixes: 1d36dc674d528b93b ("meson: build gallium omx state tracker")
Signed-off-by: Dylan Baker 
---
 meson.build | 5 -
 1 file changed, 5 deletions(-)

diff --git a/meson.build b/meson.build
index 4c6283c..2df2497 100644
--- a/meson.build
+++ b/meson.build
@@ -501,11 +501,6 @@ if with_gallium_omx
 endif
   endif
 endif
-if with_gallium_omx
-  dep_omx = declare_dependency(
-compile_args : run_command(prog_pkgconfig, ['libomxil-bellagio', 
'--cflags']).stdout().split()
-  )
-endif
 
 _va = get_option('gallium-va')
 if not system_has_kms_drm
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 4/9] meson: Actually link xvmc target with libxvmc

2018-02-02 Thread Dylan Baker
Unlike vdpau this is required.

Fixes: 22a817af8a89eb3c7 ("meson: build gallium xvmc state tracker")
Signed-off-by: Dylan Baker 
---
 meson.build | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/meson.build b/meson.build
index 2df2497..4f280d9 100644
--- a/meson.build
+++ b/meson.build
@@ -439,9 +439,6 @@ with_gallium_xvmc = _xvmc == 'true'
 dep_xvmc = []
 if with_gallium_xvmc
   dep_xvmc = dependency('xvmc', version : '>= 1.0.6')
-  dep_xvmc = declare_dependency(
-compile_args : run_command(prog_pkgconfig, ['xvmc', 
'--cflags']).stdout().split()
-  )
 endif
 
 xvmc_drivers_path = get_option('xvmc-libs-path')
-- 
git-series 0.9.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 2/9] meson: link dri3 xcb libs into vlwinsys instead of into each target

2018-02-02 Thread Dylan Baker
This makes the dependencies easier to manage, since each media target
doesn't need to worry about linking to half a dozen libraries.

Fixes: b1b65397d0c4978e3 ("meson: Build gallium auxiliary")
Signed-off-by: Dylan Baker 
---
 src/gallium/auxiliary/meson.build| 7 ++-
 src/gallium/targets/omx-bellagio/meson.build | 5 +
 src/gallium/targets/va/meson.build   | 6 +-
 src/gallium/targets/vdpau/meson.build| 3 +--
 src/gallium/targets/xa/meson.build   | 3 +--
 src/gallium/targets/xvmc/meson.build | 5 +
 6 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/meson.build 
b/src/gallium/auxiliary/meson.build
index 6f1542d..5908f9c 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -452,10 +452,15 @@ files_libgalliumvl = files(
   'vl/vl_zscan.h',
 )
 
+vlwinsys_deps = []
 files_libgalliumvlwinsys = files('vl/vl_winsys.h')
 if with_dri2
   files_libgalliumvlwinsys += files('vl/vl_winsys_dri.c')
   if with_dri3
+vlwinsys_deps += [
+  dep_xcb_sync, dep_xcb_present, dep_xshmfence, dep_xcb_xfixes,
+  dep_xcb_dri3, 
+]
 files_libgalliumvlwinsys += files('vl/vl_winsys_dri3.c')
   endif
 endif
@@ -526,6 +531,6 @@ libgalliumvlwinsys = static_library(
   'galliumvlwinsys',
   files_libgalliumvlwinsys,
   include_directories : [inc_gallium, inc_include, inc_loader, inc_src],
-  dependencies : [dep_libdrm],
+  dependencies : [dep_libdrm, vlwinsys_deps],
   build_by_default : false,
 )
diff --git a/src/gallium/targets/omx-bellagio/meson.build 
b/src/gallium/targets/omx-bellagio/meson.build
index a3fba3f..c9e8eb8 100644
--- a/src/gallium/targets/omx-bellagio/meson.build
+++ b/src/gallium/targets/omx-bellagio/meson.build
@@ -44,10 +44,7 @@ libomx_gallium = shared_library(
 libpipe_loader_static, libws_null, libwsw,
   ],
   link_depends : omx_link_depends,
-  dependencies : [
-dep_xcb, dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_libdrm, dep_thread,
-driver_r600, driver_radeonsi, driver_nouveau,
-  ],
+  dependencies : [dep_thread, driver_r600, driver_radeonsi, driver_nouveau],
   install : true,
   install_dir : omx_drivers_path,
 )
diff --git a/src/gallium/targets/va/meson.build 
b/src/gallium/targets/va/meson.build
index 0ea0cd1..1e453c9 100644
--- a/src/gallium/targets/va/meson.build
+++ b/src/gallium/targets/va/meson.build
@@ -22,7 +22,6 @@
 # Static targets are always enabled in autotools (unless you modify
 # configure.ac)
 
-va_deps = []
 va_link_args = []
 va_link_depends = []
 va_drivers = []
@@ -31,9 +30,6 @@ if with_ld_version_script
   va_link_args += ['-Wl,--version-script', 
join_paths(meson.current_source_dir(), 'va.sym')]
   va_link_depends += files('va.sym')
 endif
-if with_platform_x11
-  va_deps += [dep_xcb, dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3]
-endif
 
 libva_gallium = shared_library(
   'gallium_drv_video',
@@ -49,7 +45,7 @@ libva_gallium = shared_library(
 libpipe_loader_static, libws_null, libwsw,
   ],
   dependencies : [
-dep_libdrm, dep_thread, va_deps, driver_r600, driver_radeonsi, 
driver_nouveau,
+dep_libdrm, dep_thread, driver_r600, driver_radeonsi, driver_nouveau,
   ],
   link_depends : va_link_depends,
   install : true,
diff --git a/src/gallium/targets/vdpau/meson.build 
b/src/gallium/targets/vdpau/meson.build
index 67f1469..a03f0ed 100644
--- a/src/gallium/targets/vdpau/meson.build
+++ b/src/gallium/targets/vdpau/meson.build
@@ -49,8 +49,7 @@ libvdpau_gallium = shared_library(
 libpipe_loader_static, libws_null, libwsw,
   ],
   dependencies : [
-dep_thread, dep_xcb, dep_x11_xcb, dep_xcb_dri2, dep_libdrm,
-driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
+dep_thread, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
   ],
   link_depends : vdpau_link_depends,
 )
diff --git a/src/gallium/targets/xa/meson.build 
b/src/gallium/targets/xa/meson.build
index 8ff6486..75808cd 100644
--- a/src/gallium/targets/xa/meson.build
+++ b/src/gallium/targets/xa/meson.build
@@ -45,8 +45,7 @@ libxatracker = shared_library(
   ],
   link_depends : xa_link_depends,
   dependencies : [
-dep_xcb, dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_libdrm, dep_thread,
-driver_nouveau, driver_i915, driver_svga, driver_freedreno,
+dep_thread, driver_nouveau, driver_i915, driver_svga, driver_freedreno,
   ],
   install : true,
 )
diff --git a/src/gallium/targets/xvmc/meson.build 
b/src/gallium/targets/xvmc/meson.build
index 48759de..07d6c72 100644
--- a/src/gallium/targets/xvmc/meson.build
+++ b/src/gallium/targets/xvmc/meson.build
@@ -44,10 +44,7 @@ libxvmc_gallium = shared_library(
 libxvmc_st, libgalliumvlwinsys, libgalliumvl, libgallium, libmesa_util,
 libpipe_loader_static, libws_null, libwsw,
   ],
-  dependencies : [
-dep_xcb, dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_libdrm, dep_thread,
-driver_r600, driver_nouveau,
-  ],
+  dependencies : [dep_thread, 

Re: [Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Dylan Baker
FYI: I sent out a patch a few minutes ago to set the minimum version more like
autotools does, but I'm fine with this landing first.

Reviewed-by: Dylan Baker 

Quoting Marek Olšák (2018-02-02 11:07:57)
> Adding this:
> 
> diff --git a/meson.build b/meson.build
> index 80ea60f..22eb702 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -991,12 +991,12 @@ endif
>  _llvm = get_option('llvm')
>  if _llvm == 'auto'
>    dep_llvm = dependency(
> -    'llvm', version : '>= 3.9.0', modules : llvm_modules,
> +    'llvm', version : '>= 4.0.0', modules : llvm_modules,
>  required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
> with_gallium_opencl,
>    )
>    with_llvm = dep_llvm.found()
>  elif _llvm == 'true'
> -  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : llvm_modules)
> +  dep_llvm = dependency('llvm', version : '>= 4.0.0', modules : llvm_modules)
>    with_llvm = true
>  else
>    dep_llvm = []
> @@ -1019,7 +1019,7 @@ if with_llvm
>  '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
>    ]
>  elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
> -  error('The following drivers requires LLVM: Radv, RadeonSI, SWR. One of
> these is enabled, but LLVM is disabled.')
> +  error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of 
> these
> is enabled, but LLVM is disabled.')
>  endif
>  
>  dep_glvnd = []
> 
> Marek
> 
> On Fri, Feb 2, 2018 at 8:02 PM, Bas Nieuwenhuizen 
> wrote:
> 
> Also change meson.build?
> 
> On Fri, Feb 2, 2018 at 7:34 PM, Marek Olšák  wrote:
> > From: Marek Olšák 
> >
> > Only these are supported:
> > - LLVM 4.0
> > - LLVM 5.0
> > - LLVM 6.0
> > - master (7.0)
> > ---
> >  configure.ac                                       |   4 +-
> >  src/amd/common/ac_llvm_build.c                     | 187
> ++---
> >  src/amd/common/ac_llvm_helper.cpp                  |  10 --
> >  src/amd/common/ac_llvm_util.c                      |  39 +
> >  src/amd/common/ac_llvm_util.h                      |  14 +-
> >  src/amd/common/ac_nir_to_llvm.c                    |  32 +---
> >  src/amd/vulkan/radv_device.c                       |   4 -
> >  src/gallium/drivers/radeonsi/si_compute.c          |   3 +-
> >  src/gallium/drivers/radeonsi/si_get.c              |  13 +-
> >  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
> >  .../drivers/radeonsi/si_shader_tgsi_setup.c        |   2 -
> >  11 files changed, 72 insertions(+), 239 deletions(-)
> >
> > diff --git a/configure.ac b/configure.ac
> > index a54b7cb..8ed606c 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
> >  XSHMFENCE_REQUIRED=1.1
> >  XVMC_REQUIRED=1.0.6
> >  PYTHON_MAKO_REQUIRED=0.8.0
> >  LIBSENSORS_REQUIRED=4.0.0
> >  ZLIB_REQUIRED=1.2.3
> >
> >  dnl LLVM versions
> >  LLVM_REQUIRED_GALLIUM=3.3.0
> >  LLVM_REQUIRED_OPENCL=3.9.0
> >  LLVM_REQUIRED_R600=3.9.0
> > -LLVM_REQUIRED_RADEONSI=3.9.0
> > -LLVM_REQUIRED_RADV=3.9.0
> > +LLVM_REQUIRED_RADEONSI=4.0.0
> > +LLVM_REQUIRED_RADV=4.0.0
> >  LLVM_REQUIRED_SWR=3.9.0
> >
> >  dnl Check for progs
> >  AC_PROG_CPP
> >  AC_PROG_CC
> >  AC_PROG_CXX
> >  dnl add this here, so the help for this environmnet variable is close 
> to
> >  dnl other CC/CXX flags related help
> >  AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support
> (only needed if not
> >                                enabled by default and different  from
> -std=c++11)])
> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_
> build.c
> > index 6ede60a..3efcaa1 100644
> > --- a/src/amd/common/ac_llvm_build.c
> > +++ b/src/amd/common/ac_llvm_build.c
> > @@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx,
> LLVMValueRef v)
> >         return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx,
> type), "");
> >  }
> >
> >
> >  LLVMValueRef
> >  ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
> >                    LLVMTypeRef return_type, LLVMValueRef *params,
> >                    unsigned param_count, unsigned attrib_mask)
> >  {
> >         LLVMValueRef function, call;
> > -       bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> > -                                 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> > +       bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> >
> >         function = LLVMGetNamedFunction(ctx->module, name);
> >         if (!function) {
> >                 LLVMTypeRef param_types[32], function_type;
> >                 unsigned i;
> >
> >                 assert(param_count <= 32);
>

[Mesa-dev] [PATCH] meson: Check for actual LLVM required versions

2018-02-02 Thread Dylan Baker
Currently we always check for 3.9.0, which is pretty safe since
everything except radv work with >= 3.9 and 3.9 is pretty old at this
point. However, radv actually requires 4.0, and there is a patch for
radeonsi to do the same.

Fixes: 673dda833076 ("meson: build "radv" vulkan driver for radeon hardware")
Signed-off-by: Dylan Baker 
---
 meson.build | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/meson.build b/meson.build
index 80ea60ffa7d..0a030b92d0a 100644
--- a/meson.build
+++ b/meson.build
@@ -988,15 +988,23 @@ if with_gallium_opencl
   # TODO: optional modules
 endif
 
+if with_amd_vk
+  _llvm_version = '>= 4.0.0'
+elif with_gallium_opencl or with_gallium_swr or with_gallium_r600 or 
with_gallium_radeonsi
+  _llvm_version = '>= 3.9.0'
+else
+  _llvm_version = '>= 3.3.0'
+endif
+
 _llvm = get_option('llvm')
 if _llvm == 'auto'
   dep_llvm = dependency(
-'llvm', version : '>= 3.9.0', modules : llvm_modules,
+'llvm', version : _llvm_version, modules : llvm_modules,
 required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or 
with_gallium_opencl,
   )
   with_llvm = dep_llvm.found()
 elif _llvm == 'true'
-  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : llvm_modules)
+  dep_llvm = dependency('llvm', version : _llvm_version, modules : 
llvm_modules)
   with_llvm = true
 else
   dep_llvm = []
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Marek Olšák
Adding this:

diff --git a/meson.build b/meson.build
index 80ea60f..22eb702 100644
--- a/meson.build
+++ b/meson.build
@@ -991,12 +991,12 @@ endif
 _llvm = get_option('llvm')
 if _llvm == 'auto'
   dep_llvm = dependency(
-'llvm', version : '>= 3.9.0', modules : llvm_modules,
+'llvm', version : '>= 4.0.0', modules : llvm_modules,
 required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
with_gallium_opencl,
   )
   with_llvm = dep_llvm.found()
 elif _llvm == 'true'
-  dep_llvm = dependency('llvm', version : '>= 3.9.0', modules :
llvm_modules)
+  dep_llvm = dependency('llvm', version : '>= 4.0.0', modules :
llvm_modules)
   with_llvm = true
 else
   dep_llvm = []
@@ -1019,7 +1019,7 @@ if with_llvm
 '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
   ]
 elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
-  error('The following drivers requires LLVM: Radv, RadeonSI, SWR. One of
these is enabled, but LLVM is disabled.')
+  error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of
these is enabled, but LLVM is disabled.')
 endif

 dep_glvnd = []

Marek

On Fri, Feb 2, 2018 at 8:02 PM, Bas Nieuwenhuizen 
wrote:

> Also change meson.build?
>
> On Fri, Feb 2, 2018 at 7:34 PM, Marek Olšák  wrote:
> > From: Marek Olšák 
> >
> > Only these are supported:
> > - LLVM 4.0
> > - LLVM 5.0
> > - LLVM 6.0
> > - master (7.0)
> > ---
> >  configure.ac   |   4 +-
> >  src/amd/common/ac_llvm_build.c | 187
> ++---
> >  src/amd/common/ac_llvm_helper.cpp  |  10 --
> >  src/amd/common/ac_llvm_util.c  |  39 +
> >  src/amd/common/ac_llvm_util.h  |  14 +-
> >  src/amd/common/ac_nir_to_llvm.c|  32 +---
> >  src/amd/vulkan/radv_device.c   |   4 -
> >  src/gallium/drivers/radeonsi/si_compute.c  |   3 +-
> >  src/gallium/drivers/radeonsi/si_get.c  |  13 +-
> >  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
> >  .../drivers/radeonsi/si_shader_tgsi_setup.c|   2 -
> >  11 files changed, 72 insertions(+), 239 deletions(-)
> >
> > diff --git a/configure.ac b/configure.ac
> > index a54b7cb..8ed606c 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
> >  XSHMFENCE_REQUIRED=1.1
> >  XVMC_REQUIRED=1.0.6
> >  PYTHON_MAKO_REQUIRED=0.8.0
> >  LIBSENSORS_REQUIRED=4.0.0
> >  ZLIB_REQUIRED=1.2.3
> >
> >  dnl LLVM versions
> >  LLVM_REQUIRED_GALLIUM=3.3.0
> >  LLVM_REQUIRED_OPENCL=3.9.0
> >  LLVM_REQUIRED_R600=3.9.0
> > -LLVM_REQUIRED_RADEONSI=3.9.0
> > -LLVM_REQUIRED_RADV=3.9.0
> > +LLVM_REQUIRED_RADEONSI=4.0.0
> > +LLVM_REQUIRED_RADV=4.0.0
> >  LLVM_REQUIRED_SWR=3.9.0
> >
> >  dnl Check for progs
> >  AC_PROG_CPP
> >  AC_PROG_CC
> >  AC_PROG_CXX
> >  dnl add this here, so the help for this environmnet variable is close to
> >  dnl other CC/CXX flags related help
> >  AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support
> (only needed if not
> >enabled by default and different  from
> -std=c++11)])
> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_
> build.c
> > index 6ede60a..3efcaa1 100644
> > --- a/src/amd/common/ac_llvm_build.c
> > +++ b/src/amd/common/ac_llvm_build.c
> > @@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx,
> LLVMValueRef v)
> > return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx,
> type), "");
> >  }
> >
> >
> >  LLVMValueRef
> >  ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
> >LLVMTypeRef return_type, LLVMValueRef *params,
> >unsigned param_count, unsigned attrib_mask)
> >  {
> > LLVMValueRef function, call;
> > -   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> > - !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> > +   bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> >
> > function = LLVMGetNamedFunction(ctx->module, name);
> > if (!function) {
> > LLVMTypeRef param_types[32], function_type;
> > unsigned i;
> >
> > assert(param_count <= 32);
> >
> > for (i = 0; i < param_count; ++i) {
> > assert(params[i]);
> > @@ -714,34 +713,20 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
> >  LLVMValueRef
> >  ac_build_fs_interp(struct ac_llvm_context *ctx,
> >LLVMValueRef llvm_chan,
> >LLVMValueRef attr_number,
> >LLVMValueRef params,
> >LLVMValueRef i,
> >LLVMValueRef j)
> >  {
> > LLVMValueRef args[5];
> > LLVMValueRef p1;
> > -
> > -   if (HAVE_LLVM < 0x0400) {
> > -   LLVMValueRef ij[2];
> > -  

Re: [Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Bas Nieuwenhuizen
Also change meson.build?

On Fri, Feb 2, 2018 at 7:34 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> Only these are supported:
> - LLVM 4.0
> - LLVM 5.0
> - LLVM 6.0
> - master (7.0)
> ---
>  configure.ac   |   4 +-
>  src/amd/common/ac_llvm_build.c | 187 
> ++---
>  src/amd/common/ac_llvm_helper.cpp  |  10 --
>  src/amd/common/ac_llvm_util.c  |  39 +
>  src/amd/common/ac_llvm_util.h  |  14 +-
>  src/amd/common/ac_nir_to_llvm.c|  32 +---
>  src/amd/vulkan/radv_device.c   |   4 -
>  src/gallium/drivers/radeonsi/si_compute.c  |   3 +-
>  src/gallium/drivers/radeonsi/si_get.c  |  13 +-
>  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
>  .../drivers/radeonsi/si_shader_tgsi_setup.c|   2 -
>  11 files changed, 72 insertions(+), 239 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index a54b7cb..8ed606c 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
>  XSHMFENCE_REQUIRED=1.1
>  XVMC_REQUIRED=1.0.6
>  PYTHON_MAKO_REQUIRED=0.8.0
>  LIBSENSORS_REQUIRED=4.0.0
>  ZLIB_REQUIRED=1.2.3
>
>  dnl LLVM versions
>  LLVM_REQUIRED_GALLIUM=3.3.0
>  LLVM_REQUIRED_OPENCL=3.9.0
>  LLVM_REQUIRED_R600=3.9.0
> -LLVM_REQUIRED_RADEONSI=3.9.0
> -LLVM_REQUIRED_RADV=3.9.0
> +LLVM_REQUIRED_RADEONSI=4.0.0
> +LLVM_REQUIRED_RADV=4.0.0
>  LLVM_REQUIRED_SWR=3.9.0
>
>  dnl Check for progs
>  AC_PROG_CPP
>  AC_PROG_CC
>  AC_PROG_CXX
>  dnl add this here, so the help for this environmnet variable is close to
>  dnl other CC/CXX flags related help
>  AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support (only 
> needed if not
>enabled by default and different  from 
> -std=c++11)])
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 6ede60a..3efcaa1 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
> return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), 
> "");
>  }
>
>
>  LLVMValueRef
>  ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
>LLVMTypeRef return_type, LLVMValueRef *params,
>unsigned param_count, unsigned attrib_mask)
>  {
> LLVMValueRef function, call;
> -   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
> - !(attrib_mask & AC_FUNC_ATTR_LEGACY);
> +   bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
>
> function = LLVMGetNamedFunction(ctx->module, name);
> if (!function) {
> LLVMTypeRef param_types[32], function_type;
> unsigned i;
>
> assert(param_count <= 32);
>
> for (i = 0; i < param_count; ++i) {
> assert(params[i]);
> @@ -714,34 +713,20 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
>  LLVMValueRef
>  ac_build_fs_interp(struct ac_llvm_context *ctx,
>LLVMValueRef llvm_chan,
>LLVMValueRef attr_number,
>LLVMValueRef params,
>LLVMValueRef i,
>LLVMValueRef j)
>  {
> LLVMValueRef args[5];
> LLVMValueRef p1;
> -
> -   if (HAVE_LLVM < 0x0400) {
> -   LLVMValueRef ij[2];
> -   ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
> -   ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
> -
> -   args[0] = llvm_chan;
> -   args[1] = attr_number;
> -   args[2] = params;
> -   args[3] = ac_build_gather_values(ctx, ij, 2);
> -   return ac_build_intrinsic(ctx, "llvm.SI.fs.interp",
> - ctx->f32, args, 4,
> - AC_FUNC_ATTR_READNONE);
> -   }
>
> args[0] = i;
> args[1] = llvm_chan;
> args[2] = attr_number;
> args[3] = params;
>
> p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
> ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
>
> args[0] = p1;
> @@ -755,30 +740,20 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
>  }
>
>  LLVMValueRef
>  ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
>LLVMValueRef parameter,
>LLVMValueRef llvm_chan,
>LLVMValueRef attr_number,
>LLVMValueRef params)
>  {
> LLVMValueRef args[4];
> -   if (HAVE_LLVM < 0x0400) {
> -   args[0] = llvm_chan;
> -   args[1] = attr_number;
> -   args[2] = params;
> -
> -   return 

[Mesa-dev] [PATCH] amd: remove support for LLVM 3.9

2018-02-02 Thread Marek Olšák
From: Marek Olšák 

Only these are supported:
- LLVM 4.0
- LLVM 5.0
- LLVM 6.0
- master (7.0)
---
 configure.ac   |   4 +-
 src/amd/common/ac_llvm_build.c | 187 ++---
 src/amd/common/ac_llvm_helper.cpp  |  10 --
 src/amd/common/ac_llvm_util.c  |  39 +
 src/amd/common/ac_llvm_util.h  |  14 +-
 src/amd/common/ac_nir_to_llvm.c|  32 +---
 src/amd/vulkan/radv_device.c   |   4 -
 src/gallium/drivers/radeonsi/si_compute.c  |   3 +-
 src/gallium/drivers/radeonsi/si_get.c  |  13 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |   3 +-
 .../drivers/radeonsi/si_shader_tgsi_setup.c|   2 -
 11 files changed, 72 insertions(+), 239 deletions(-)

diff --git a/configure.ac b/configure.ac
index a54b7cb..8ed606c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -96,22 +96,22 @@ XDAMAGE_REQUIRED=1.1
 XSHMFENCE_REQUIRED=1.1
 XVMC_REQUIRED=1.0.6
 PYTHON_MAKO_REQUIRED=0.8.0
 LIBSENSORS_REQUIRED=4.0.0
 ZLIB_REQUIRED=1.2.3
 
 dnl LLVM versions
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.9.0
 LLVM_REQUIRED_R600=3.9.0
-LLVM_REQUIRED_RADEONSI=3.9.0
-LLVM_REQUIRED_RADV=3.9.0
+LLVM_REQUIRED_RADEONSI=4.0.0
+LLVM_REQUIRED_RADV=4.0.0
 LLVM_REQUIRED_SWR=3.9.0
 
 dnl Check for progs
 AC_PROG_CPP
 AC_PROG_CC
 AC_PROG_CXX
 dnl add this here, so the help for this environmnet variable is close to
 dnl other CC/CXX flags related help
 AC_ARG_VAR([CXX11_CXXFLAGS], [Compiler flag to enable C++11 support (only 
needed if not
   enabled by default and different  from 
-std=c++11)])
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 6ede60a..3efcaa1 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -213,22 +213,21 @@ ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), 
"");
 }
 
 
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
   unsigned param_count, unsigned attrib_mask)
 {
LLVMValueRef function, call;
-   bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
- !(attrib_mask & AC_FUNC_ATTR_LEGACY);
+   bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
 
function = LLVMGetNamedFunction(ctx->module, name);
if (!function) {
LLVMTypeRef param_types[32], function_type;
unsigned i;
 
assert(param_count <= 32);
 
for (i = 0; i < param_count; ++i) {
assert(params[i]);
@@ -714,34 +713,20 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_fs_interp(struct ac_llvm_context *ctx,
   LLVMValueRef llvm_chan,
   LLVMValueRef attr_number,
   LLVMValueRef params,
   LLVMValueRef i,
   LLVMValueRef j)
 {
LLVMValueRef args[5];
LLVMValueRef p1;
-   
-   if (HAVE_LLVM < 0x0400) {
-   LLVMValueRef ij[2];
-   ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
-   ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
-
-   args[0] = llvm_chan;
-   args[1] = attr_number;
-   args[2] = params;
-   args[3] = ac_build_gather_values(ctx, ij, 2);
-   return ac_build_intrinsic(ctx, "llvm.SI.fs.interp",
- ctx->f32, args, 4,
- AC_FUNC_ATTR_READNONE);
-   }
 
args[0] = i;
args[1] = llvm_chan;
args[2] = attr_number;
args[3] = params;
 
p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
 
args[0] = p1;
@@ -755,30 +740,20 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
   LLVMValueRef parameter,
   LLVMValueRef llvm_chan,
   LLVMValueRef attr_number,
   LLVMValueRef params)
 {
LLVMValueRef args[4];
-   if (HAVE_LLVM < 0x0400) {
-   args[0] = llvm_chan;
-   args[1] = attr_number;
-   args[2] = params;
-
-   return ac_build_intrinsic(ctx,
- "llvm.SI.fs.constant",
- ctx->f32, args, 3,
- AC_FUNC_ATTR_READNONE);
-   }
 
args[0] = parameter;
args[1] = llvm_chan;
args[2] = attr_number;
args[3] = params;
 

[Mesa-dev] [Bug 104915] Indexed SHADING_LANGUAGE_VERSION query not supported

2018-02-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104915

Bug ID: 104915
   Summary: Indexed SHADING_LANGUAGE_VERSION query not supported
   Product: Mesa
   Version: git
  Hardware: All
OS: All
Status: NEW
  Severity: minor
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: bald...@baldurk.org
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 137134
  --> https://bugs.freedesktop.org/attachment.cgi?id=137134=edit
GLFW sample testing shading language version queries

In GL4.3 the spec added new glGetStringi() query GL_SHADING_LANGUAGE_VERSION,
which allows an application to query a list of supported versions (rather than
just the latest, which glGetString() returns). I don't believe this ever
existed in an extension, only in this new core version.

There are two queries relevant - the GL_SHADING_LANGUAGE_VERSION passed to
glGetStringi(), and then the index ranges from 0 to whatever
glGetIntegerv(GL_NUM_SHADING_LANGUAGE_VERSIONS) returns, which is the other
query.

I've attached a simple program (using glfw for conciseness/clarity) that
exhibits the problem in an isolated run. The important bit though is:

  printf("Shading language version: %s\n",
glGetString(GL_SHADING_LANGUAGE_VERSION));

  glGetIntegerv(GL_NUM_SHADING_LANGUAGE_VERSIONS, );
  printf("%d total languages\n", numLanguages);

  for(i = 0; i < numLanguages; i++)
printf("language %d: %s\n", i, glGetStringi(GL_SHADING_LANGUAGE_VERSION,
i));

Actual output:

  Shading language version: 4.50
  0 total languages

Expected output (something along these lines):

  Shading language version: 4.50
  10 total languages
  language 0: 450 core
  language 1: 450 compatibility
  language 2: 440 core
  language 3: 440 compatibility
  language 4: 430 core
  language 5: 430 compatibility
  language 6: 420 core
  language 7: 420 compatibility
  language 8: 410 core
  language 9: 410 compatibility

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Don't expose VK_KHX_multiview on android.

2018-02-02 Thread Bas Nieuwenhuizen
On Fri, Feb 2, 2018 at 6:59 PM, Emil Velikov  wrote:
> Hi Bas,
>
> On 31 January 2018 at 11:31, Bas Nieuwenhuizen  wrote:
>> deqp does not allow any KHX extensions, and since deqp is included
>> in android-cts, android does not allow any khx extensions.
>>
>> So disable VK_KHX_multiview on android.
>> ---
>>  src/amd/vulkan/radv_extensions.py | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/amd/vulkan/radv_extensions.py 
>> b/src/amd/vulkan/radv_extensions.py
>> index ab34c01cb6..e6c6e63627 100644
>> --- a/src/amd/vulkan/radv_extensions.py
>> +++ b/src/amd/vulkan/radv_extensions.py
>> @@ -81,7 +81,7 @@ EXTENSIONS = [
>>  Extension('VK_KHR_wayland_surface',   6, 
>> 'VK_USE_PLATFORM_WAYLAND_KHR'),
>>  Extension('VK_KHR_xcb_surface',   6, 
>> 'VK_USE_PLATFORM_XCB_KHR'),
>>  Extension('VK_KHR_xlib_surface',  6, 
>> 'VK_USE_PLATFORM_XLIB_KHR'),
>> -Extension('VK_KHX_multiview', 1, True),
>> +Extension('VK_KHX_multiview', 1, '!ANDROID'),
>
> While picking the patch for stable the following questions came to
> mind. Hope you can you shed some light.
>
> Is this restriction effectively a Vulkan loader limitation or ?
> Should we use the same for the Intel Vulkan driver as well?

The testsuite Android conformance testing (deqp as part of Android
CTS) is slightly stricter than the vulkan-CTS. So enabling it results
in a perfectly working extension, but you just don't have a conformant
Android device.

I think Chad expected the KHX extensions to be disabled in all
releases, as far as I can tell they have not for the past few
releases, so I'm not entirely sure what is supposed to happen here.
I'd expect this would be needed by Intel too, but given that the Intel
driver has been tested for Android for a while and this is not in the
Intel driver yet, I'm probably overlooking their solution.

- Bas


>
> Thanks
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: fix a crash in load_gs_input() on pre-GFX9 chips

2018-02-02 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 2, 2018 at 6:56 PM, Samuel Pitoiset
 wrote:
> Fixes: df1d5174fcc ("ac/nir: replace SI.buffer.load.dword with 
> amdgcn.buffer.load")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 0f7d6258acd..e0386429037 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3074,6 +3074,9 @@ load_gs_input(struct ac_shader_abi *abi,
> ctx->ac.i32_0,
> vtx_offset, soffset,
> 0, 1, 0, true, false);
> +
> +   value[i] = LLVMBuildBitCast(ctx->builder, value[i],
> +   type, "");
> }
> }
> result = ac_build_varying_gather_values(>ac, value, 
> num_components, component);
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: enable lowering of fpow to fexp2 and flog2

2018-02-02 Thread Samuel Pitoiset
There is no fpow in hardware, so it's always lowered somewhere,
but it appears that lowering at NIR level is better. Figured while
comparing compute shaders between RadeonSI and RADV.

Polaris10:
Totals from affected shaders:
SGPRS: 18936 -> 18904 (-0.17 %)
VGPRS: 12240 -> 12220 (-0.16 %)
Spilled SGPRs: 2809 -> 2809 (0.00 %)
Code Size: 718116 -> 719848 (0.24 %) bytes
Max Waves: 1409 -> 1410 (0.07 %)

Vega10:
Totals from affected shaders:
SGPRS: 18392 -> 18392 (0.00 %)
VGPRS: 12008 -> 11920 (-0.73 %)
Spilled SGPRs: 3001 -> 2981 (-0.67 %)
Code Size: 777444 -> 778788 (0.17 %) bytes
Max Waves: 1503 -> 1504 (0.07 %)

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index af094e6220..769e991f93 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -67,6 +67,7 @@ static const struct nir_shader_compiler_options nir_options = 
{
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_ffma = true,
+   .lower_fpow = true,
.vs_inputs_dual_locations = true,
.max_unroll_iterations = 32
 };
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: Don't expose VK_KHX_multiview on android.

2018-02-02 Thread Emil Velikov
Hi Bas,

On 31 January 2018 at 11:31, Bas Nieuwenhuizen  wrote:
> deqp does not allow any KHX extensions, and since deqp is included
> in android-cts, android does not allow any khx extensions.
>
> So disable VK_KHX_multiview on android.
> ---
>  src/amd/vulkan/radv_extensions.py | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index ab34c01cb6..e6c6e63627 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -81,7 +81,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_wayland_surface',   6, 
> 'VK_USE_PLATFORM_WAYLAND_KHR'),
>  Extension('VK_KHR_xcb_surface',   6, 
> 'VK_USE_PLATFORM_XCB_KHR'),
>  Extension('VK_KHR_xlib_surface',  6, 
> 'VK_USE_PLATFORM_XLIB_KHR'),
> -Extension('VK_KHX_multiview', 1, True),
> +Extension('VK_KHX_multiview', 1, '!ANDROID'),

While picking the patch for stable the following questions came to
mind. Hope you can you shed some light.

Is this restriction effectively a Vulkan loader limitation or ?
Should we use the same for the Intel Vulkan driver as well?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: fix a crash in load_gs_input() on pre-GFX9 chips

2018-02-02 Thread Samuel Pitoiset
Fixes: df1d5174fcc ("ac/nir: replace SI.buffer.load.dword with 
amdgcn.buffer.load")
Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 0f7d6258acd..e0386429037 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3074,6 +3074,9 @@ load_gs_input(struct ac_shader_abi *abi,
ctx->ac.i32_0,
vtx_offset, soffset,
0, 1, 0, true, false);
+
+   value[i] = LLVMBuildBitCast(ctx->builder, value[i],
+   type, "");
}
}
result = ac_build_varying_gather_values(>ac, value, 
num_components, component);
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/fp64: fix integer->double conversion

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 05:56 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> Doing a straight uint/int->fp32->fp64 conversion causes
> some precision issues, Roland suggested splitting the
> integer into two portions and doing two separate
> int->fp32->fp64 conversions then adding the results.
> 
> This passes the tests in CTS and piglit.
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 118 
> +
>  1 file changed, 90 insertions(+), 28 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 13aa681049..22f2736b03 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -4490,44 +4490,106 @@ static int egcm_int_to_double(struct r600_shader_ctx 
> *ctx)
>  {
>   struct tgsi_full_instruction *inst = 
> >parse.FullToken.FullInstruction;
>   struct r600_bytecode_alu alu;
> - int i, r;
> - int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
> + int i, c, r;
> + int write_mask = inst->Dst[0].Register.WriteMask;
> + int temp_reg = r600_get_temp(ctx);
>  
>   assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
>   inst->Instruction.Opcode == TGSI_OPCODE_U2D);
>  
> - for (i = 0; i <= (lasti+1)/2; i++) {
> - memset(, 0, sizeof(struct r600_bytecode_alu));
> - alu.op = ctx->inst_info->op;
> -
> - r600_bytecode_src([0], >src[0], i);
> - alu.dst.sel = ctx->temp_reg;
> - alu.dst.chan = i;
> - alu.dst.write = 1;
> - alu.last = 1;
> + for (c = 0; c < 2; c++) {
> + int dchan = c * 2;
> + if (write_mask & (0x3 << dchan)) {
> + /* split into 24-bit int and 8-bit int */
> + memset(, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP2_AND_INT;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = dchan;
> + r600_bytecode_src([0], >src[0], c);
> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[1].value = 0xff00;
> + alu.dst.write = 1;
> + r = r600_bytecode_add_alu(ctx->bc, );
> + if (r)
> + return r;
>  
> - r = r600_bytecode_add_alu(ctx->bc, );
> - if (r)
> - return r;
> + memset(, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP2_AND_INT;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = dchan + 1;
> + r600_bytecode_src([0], >src[0], c);
> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[1].value = 0xff;
> + alu.dst.write = 1;
> + alu.last = 1;
> + r = r600_bytecode_add_alu(ctx->bc, );
> + if (r)
> + return r;
> + }
>   }
>  
> - for (i = 0; i <= lasti; i++) {
> - memset(, 0, sizeof(struct r600_bytecode_alu));
> - alu.op = ALU_OP1_FLT32_TO_FLT64;
> + for (c = 0; c < 2; c++) {
> + int dchan = c * 2;
> + if (write_mask & (0x3 << dchan)) {
> + for (i = dchan; i <= dchan + 1; i++) {
> + memset(, 0, sizeof(struct 
> r600_bytecode_alu));
> + alu.op = i == dchan ? ctx->inst_info->op : 
> ALU_OP1_UINT_TO_FLT;
>  
> - alu.src[0].chan = i/2;
> - if (i%2 == 0)
> - alu.src[0].sel = ctx->temp_reg;
> - else {
> - alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> - alu.src[0].value = 0x0;
> + alu.src[0].sel = temp_reg;
> + alu.src[0].chan = i;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = i;
> + alu.dst.write = 1;
> + alu.last = i == dchan + 1;
> +
> + r = r600_bytecode_add_alu(ctx->bc, );
> + if (r)
> + return r;
> + }
>   }
That'll still work on eg (cypress) where UINT_TO_FLT is scalar, right?
I just realized that for the low 8 bits you could actually skip the
masking and use UBYTE0_FLT instead if that instruction does what the
docs say :-). Though I guess on Cayman that won't be much of an
improvement, but might shave off another instruction or two on Cypress
(as this one is a vector instruction)...
In any case,

Reviewed-by: Roland Scheidegger 

> - tgsi_dst(ctx, >Dst[0], 

Re: [Mesa-dev] [PATCH] r600: fix resq for buffer images.

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 08:29 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> If this is an image buffer, we need to calculate the correct resource
> id.
> 
> Fixes:
> KHR-GL45.shader_image_size.*
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/drivers/r600/r600_shader.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 9a0d6b5dd1..8c4460a5d5 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -8821,7 +8821,10 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
>   (inst->Src[0].Register.File == TGSI_FILE_IMAGE && 
> inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
>   if (ctx->bc->chip_class < EVERGREEN)
>   ctx->shader->uses_tex_buffers = true;
> - return r600_do_buffer_txq(ctx, 0, 
> ctx->shader->image_size_const_offset);
> + unsigned offset = 0;
> + if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
> + offset += R600_IMAGE_REAL_RESOURCE_OFFSET - 
> R600_MAX_CONST_BUFFERS + ctx->shader->image_size_const_offset;
Can't the offset actually be handled by r600_do_buffer_txq() somehow?
I'm always getting very confused about those offsets in any case...
But looks like it should be correct to me.

Reviewed-by: Roland Scheidegger 


> + return r600_do_buffer_txq(ctx, 0, offset);
>   }
>  
>   if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY &&
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/12] vl: add parameters for HEVC encode

2018-02-02 Thread Zhang, Boyuan
The whole series are the updated version. Changes are made mainly based on the 
comments from previous code review, plus fixing a few typos.

-Original Message-
From: Zhang, Boyuan 
Sent: February-02-18 11:11 AM
To: mesa-dev@lists.freedesktop.org
Cc: Zhang, Boyuan
Subject: [PATCH 01/12] vl: add parameters for HEVC encode

From: Boyuan Zhang 

Add HEVC encode interface

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/include/pipe/p_video_state.h | 99 
 1 file changed, 99 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 5a88e6c..2533ba4 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -120,6 +120,15 @@ enum pipe_h264_enc_picture_type
PIPE_H264_ENC_PICTURE_TYPE_SKIP = 0x04  };
 
+enum pipe_h265_enc_picture_type
+{
+   PIPE_H265_ENC_PICTURE_TYPE_P = 0x00,
+   PIPE_H265_ENC_PICTURE_TYPE_B = 0x01,
+   PIPE_H265_ENC_PICTURE_TYPE_I = 0x02,
+   PIPE_H265_ENC_PICTURE_TYPE_IDR = 0x03,
+   PIPE_H265_ENC_PICTURE_TYPE_SKIP = 0x04 };
+
 enum pipe_h264_enc_rate_control_method
 {
PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE = 0x00, @@ -129,6 +138,15 @@ enum 
pipe_h264_enc_rate_control_method
PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE = 0x04  };
 
+enum pipe_h265_enc_rate_control_method
+{
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE = 0x00,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP = 0x01,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP = 0x02,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT = 0x03,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE = 0x04 };
+
 struct pipe_picture_desc
 {
enum pipe_video_profile profile;
@@ -412,6 +430,87 @@ struct pipe_h264_enc_picture_desc
 
 };
 
+struct pipe_h265_enc_seq_param
+{
+   uint8_t  general_profile_idc;
+   uint8_t  general_level_idc;
+   uint8_t  general_tier_flag;
+   uint32_t intra_period;
+   uint16_t pic_width_in_luma_samples;
+   uint16_t pic_height_in_luma_samples;
+   uint32_t chroma_format_idc;
+   uint32_t bit_depth_luma_minus8;
+   uint32_t bit_depth_chroma_minus8;
+   bool strong_intra_smoothing_enabled_flag;
+   bool amp_enabled_flag;
+   bool sample_adaptive_offset_enabled_flag;
+   bool pcm_enabled_flag;
+   bool sps_temporal_mvp_enabled_flag;
+   uint8_t  log2_min_luma_coding_block_size_minus3;
+   uint8_t  log2_diff_max_min_luma_coding_block_size;
+   uint8_t  log2_min_transform_block_size_minus2;
+   uint8_t  log2_diff_max_min_transform_block_size;
+   uint8_t  max_transform_hierarchy_depth_inter;
+   uint8_t  max_transform_hierarchy_depth_intra;
+};
+
+struct pipe_h265_enc_pic_param
+{
+   uint8_t log2_parallel_merge_level_minus2;
+   uint8_t nal_unit_type;
+   bool constrained_intra_pred_flag;
+};
+
+struct pipe_h265_enc_slice_param
+{
+   uint8_t max_num_merge_cand;
+   int8_t slice_cb_qp_offset;
+   int8_t slice_cr_qp_offset;
+   int8_t slice_beta_offset_div2;
+   int8_t slice_tc_offset_div2;
+   bool cabac_init_flag;
+   uint32_t slice_deblocking_filter_disabled_flag;
+   bool slice_loop_filter_across_slices_enabled_flag;
+};
+
+struct pipe_h265_enc_rate_control
+{
+   enum pipe_h265_enc_rate_control_method rate_ctrl_method;
+   unsigned target_bitrate;
+   unsigned peak_bitrate;
+   unsigned frame_rate_num;
+   unsigned frame_rate_den;
+   unsigned quant_i_frames;
+   unsigned vbv_buffer_size;
+   unsigned vbv_buf_lv;
+   unsigned target_bits_picture;
+   unsigned peak_bits_picture_integer;
+   unsigned peak_bits_picture_fraction;
+   unsigned fill_data_enable;
+   unsigned enforce_hrd;
+};
+
+struct pipe_h265_enc_picture_desc
+{
+   struct pipe_picture_desc base;
+
+   struct pipe_h265_enc_seq_param seq;
+   struct pipe_h265_enc_pic_param pic;
+   struct pipe_h265_enc_slice_param slice;
+   struct pipe_h265_enc_rate_control rc;
+
+   enum pipe_h265_enc_picture_type picture_type;
+   unsigned decoded_curr_pic;
+   unsigned reference_frames[16];
+   unsigned frame_num;
+   unsigned pic_order_cnt;
+   unsigned pic_order_cnt_type;
+   unsigned ref_idx_l0;
+   unsigned ref_idx_l1;
+   bool not_referenced;
+   struct util_hash_table *frame_idx;
+};
+
 struct pipe_h265_sps
 {
uint8_t chroma_format_idc;
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] st/va: enable dual instances encode only for H264

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Logics that related to dual instances encode should only be done for
H264, not other codecs.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/picture.c |  3 ++-
 src/gallium/state_trackers/va/surface.c | 23 +--
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index d5fa947..57f53ac 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -610,7 +610,8 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
}
 
context->decoder->end_frame(context->decoder, context->target, 
>desc.base);
-   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE &&
+  u_reduce_video_profile(context->templat.profile) == 
PIPE_VIDEO_FORMAT_MPEG4_AVC) {
   int idr_period = context->desc.h264enc.gop_size / context->gop_coeff;
   int p_remain_in_idr = idr_period - context->desc.h264enc.frame_num;
   surf->frame_num_cnt = context->desc.h264enc.frame_num_cnt;
diff --git a/src/gallium/state_trackers/va/surface.c 
b/src/gallium/state_trackers/va/surface.c
index 636505b..9823232 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -36,6 +36,7 @@
 #include "util/u_rect.h"
 #include "util/u_sampler.h"
 #include "util/u_surface.h"
+#include "util/u_video.h"
 
 #include "vl/vl_compositor.h"
 #include "vl/vl_video_buffer.h"
@@ -122,16 +123,18 @@ vlVaSyncSurface(VADriverContextP ctx, VASurfaceID 
render_target)
}
 
if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
-  int frame_diff;
-  if (context->desc.h264enc.frame_num_cnt >= surf->frame_num_cnt)
- frame_diff = context->desc.h264enc.frame_num_cnt - 
surf->frame_num_cnt;
-  else
- frame_diff = 0x - surf->frame_num_cnt + 1 + 
context->desc.h264enc.frame_num_cnt;
-  if ((frame_diff == 0) &&
-  (surf->force_flushed == false) &&
-  (context->desc.h264enc.frame_num_cnt % 2 != 0)) {
- context->decoder->flush(context->decoder);
- context->first_single_submitted = true;
+  if (u_reduce_video_profile(context->templat.profile) == 
PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ int frame_diff;
+ if (context->desc.h264enc.frame_num_cnt >= surf->frame_num_cnt)
+frame_diff = context->desc.h264enc.frame_num_cnt - 
surf->frame_num_cnt;
+ else
+frame_diff = 0x - surf->frame_num_cnt + 1 + 
context->desc.h264enc.frame_num_cnt;
+ if ((frame_diff == 0) &&
+ (surf->force_flushed == false) &&
+ (context->desc.h264enc.frame_num_cnt % 2 != 0)) {
+context->decoder->flush(context->decoder);
+context->first_single_submitted = true;
+ }
   }
   context->decoder->get_feedback(context->decoder, surf->feedback, 
&(surf->coded_buf->coded_size));
   surf->feedback = NULL;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/12] radeonsi: enable vcn encode for HEVC main

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Enable vcn encode for HEVC main profile on Raven.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeonsi/si_get.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index 1c84a25..8382721 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -588,8 +588,10 @@ static int si_get_video_param(struct pipe_screen *screen,
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
-   return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+   return (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
(si_vce_is_fw_version_supported(sscreen) ||
+   sscreen->info.family == CHIP_RAVEN)) ||
+   (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
sscreen->info.family == CHIP_RAVEN);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] st/va: add entrypoint check for HEVC

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Add entrypoint check for HEVC to differentiate decode and encode jobs.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/context.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index f03b326..f567f54 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -263,16 +263,18 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID 
config_id, int picture_width,
 
  case PIPE_VIDEO_FORMAT_HEVC:
  context->templat.max_references = num_render_targets;
- context->desc.h265.pps = CALLOC_STRUCT(pipe_h265_pps);
- if (!context->desc.h265.pps) {
-FREE(context);
-return VA_STATUS_ERROR_ALLOCATION_FAILED;
- }
- context->desc.h265.pps->sps = CALLOC_STRUCT(pipe_h265_sps);
- if (!context->desc.h265.pps->sps) {
-FREE(context->desc.h265.pps);
-FREE(context);
-return VA_STATUS_ERROR_ALLOCATION_FAILED;
+ if (config->entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+context->desc.h265.pps = CALLOC_STRUCT(pipe_h265_pps);
+if (!context->desc.h265.pps) {
+   FREE(context);
+   return VA_STATUS_ERROR_ALLOCATION_FAILED;
+}
+context->desc.h265.pps->sps = CALLOC_STRUCT(pipe_h265_sps);
+if (!context->desc.h265.pps->sps) {
+   FREE(context->desc.h265.pps);
+   FREE(context);
+   return VA_STATUS_ERROR_ALLOCATION_FAILED;
+}
  }
  break;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] st/va: add HEVC encode functions

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Add a separate file for HEVC encode functions.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/Makefile.sources   |  1 +
 src/gallium/state_trackers/va/meson.build|  2 +-
 src/gallium/state_trackers/va/picture.c  | 31 +-
 src/gallium/state_trackers/va/picture_hevc_enc.c | 75 
 src/gallium/state_trackers/va/va_private.h   |  6 ++
 5 files changed, 111 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/state_trackers/va/picture_hevc_enc.c

diff --git a/src/gallium/state_trackers/va/Makefile.sources 
b/src/gallium/state_trackers/va/Makefile.sources
index 8a69828..f3a13f2 100644
--- a/src/gallium/state_trackers/va/Makefile.sources
+++ b/src/gallium/state_trackers/va/Makefile.sources
@@ -10,6 +10,7 @@ C_SOURCES := \
picture_h264.c \
picture_h264_enc.c \
picture_hevc.c \
+   picture_hevc_enc.c \
picture_vc1.c \
picture_mjpeg.c \
postproc.c \
diff --git a/src/gallium/state_trackers/va/meson.build 
b/src/gallium/state_trackers/va/meson.build
index 0dec48c..bddd5ef 100644
--- a/src/gallium/state_trackers/va/meson.build
+++ b/src/gallium/state_trackers/va/meson.build
@@ -26,7 +26,7 @@ libva_st = static_library(
 'buffer.c', 'config.c', 'context.c', 'display.c', 'image.c', 'picture.c',
 'picture_mpeg12.c', 'picture_mpeg4.c', 'picture_h264.c', 'picture_hevc.c',
 'picture_vc1.c', 'picture_mjpeg.c', 'postproc.c', 'subpicture.c',
-'surface.c', 'picture_h264_enc.c',
+'surface.c', 'picture_h264_enc.c', 'picture_hevc_enc.c',
   ),
   c_args : [
 c_vis_args,
diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 57f53ac..240f25a 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -316,6 +316,10 @@ handleVAEncMiscParameterTypeRateControl(vlVaContext 
*context, VAEncMiscParameter
   status = vlVaHandleVAEncMiscParameterTypeRateControlH264(context, misc);
   break;
 
+   case PIPE_VIDEO_FORMAT_HEVC:
+  status = vlVaHandleVAEncMiscParameterTypeRateControlHEVC(context, misc);
+  break;
+
default:
   break;
}
@@ -333,6 +337,10 @@ handleVAEncMiscParameterTypeFrameRate(vlVaContext 
*context, VAEncMiscParameterBu
   status = vlVaHandleVAEncMiscParameterTypeFrameRateH264(context, misc);
   break;
 
+   case PIPE_VIDEO_FORMAT_HEVC:
+  status = vlVaHandleVAEncMiscParameterTypeFrameRateHEVC(context, misc);
+  break;
+
default:
   break;
}
@@ -350,6 +358,10 @@ handleVAEncSequenceParameterBufferType(vlVaDriver *drv, 
vlVaContext *context, vl
   status = vlVaHandleVAEncSequenceParameterBufferTypeH264(drv, context, 
buf);
   break;
 
+   case PIPE_VIDEO_FORMAT_HEVC:
+  status = vlVaHandleVAEncSequenceParameterBufferTypeHEVC(drv, context, 
buf);
+  break;
+
default:
   break;
}
@@ -390,6 +402,10 @@ handleVAEncPictureParameterBufferType(vlVaDriver *drv, 
vlVaContext *context, vlV
   status = vlVaHandleVAEncPictureParameterBufferTypeH264(drv, context, 
buf);
   break;
 
+   case PIPE_VIDEO_FORMAT_HEVC:
+  status = vlVaHandleVAEncPictureParameterBufferTypeHEVC(drv, context, 
buf);
+  break;
+
default:
   break;
}
@@ -407,6 +423,10 @@ handleVAEncSliceParameterBufferType(vlVaDriver *drv, 
vlVaContext *context, vlVaB
   status = vlVaHandleVAEncSliceParameterBufferTypeH264(drv, context, buf);
   break;
 
+   case PIPE_VIDEO_FORMAT_HEVC:
+  status = vlVaHandleVAEncSliceParameterBufferTypeHEVC(drv, context, buf);
+  break;
+
default:
   break;
}
@@ -600,8 +620,11 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID 
context_id)
 
if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
   coded_buf = context->coded_buf;
-  getEncParamPresetH264(context);
-  context->desc.h264enc.frame_num_cnt++;
+  if (u_reduce_video_profile(context->templat.profile) == 
PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ getEncParamPresetH264(context);
+ context->desc.h264enc.frame_num_cnt++;
+  } else if (u_reduce_video_profile(context->templat.profile) == 
PIPE_VIDEO_FORMAT_HEVC)
+ getEncParamPresetH265(context);
   context->decoder->begin_frame(context->decoder, context->target, 
>desc.base);
   context->decoder->encode_bitstream(context->decoder, context->target,
  coded_buf->derived_surface.resource, 
);
@@ -630,7 +653,9 @@ vlVaEndPicture(VADriverContextP ctx, VAContextID context_id)
 context->first_single_submitted = false;
  surf->force_flushed = true;
   }
-   }
+   } else if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE &&
+  u_reduce_video_profile(context->templat.profile) == 
PIPE_VIDEO_FORMAT_HEVC)
+  

[Mesa-dev] [PATCH 11/12] st/va: implement HEVC encode functions

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Implement HEVC encode functions based on VAAPI HEVC encode interface.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/picture_hevc_enc.c | 150 ++-
 1 file changed, 144 insertions(+), 6 deletions(-)

diff --git a/src/gallium/state_trackers/va/picture_hevc_enc.c 
b/src/gallium/state_trackers/va/picture_hevc_enc.c
index 4b56207..8906901 100644
--- a/src/gallium/state_trackers/va/picture_hevc_enc.c
+++ b/src/gallium/state_trackers/va/picture_hevc_enc.c
@@ -32,7 +32,50 @@
 VAStatus
 vlVaHandleVAEncPictureParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext 
*context, vlVaBuffer *buf)
 {
-   /* TODO */
+   VAEncPictureParameterBufferHEVC *h265;
+   vlVaBuffer *coded_buf;
+   int i;
+
+   h265 = buf->data;
+   context->desc.h265enc.decoded_curr_pic = h265->decoded_curr_pic.picture_id;
+
+   for (i = 0; i < 15; i++)
+  context->desc.h265enc.reference_frames[i] = 
h265->reference_frames[i].picture_id;
+
+   context->desc.h265enc.pic_order_cnt = h265->decoded_curr_pic.pic_order_cnt;
+   coded_buf = handle_table_get(drv->htab, h265->coded_buf);
+
+   if (!coded_buf->derived_surface.resource)
+  coded_buf->derived_surface.resource = 
pipe_buffer_create(drv->pipe->screen, PIPE_BIND_VERTEX_BUFFER,
+PIPE_USAGE_STREAM, 
coded_buf->size);
+
+   context->coded_buf = coded_buf;
+   context->desc.h265enc.pic.log2_parallel_merge_level_minus2 = 
h265->log2_parallel_merge_level_minus2;
+   context->desc.h265enc.pic.nal_unit_type = h265->nal_unit_type;
+   context->desc.h265enc.rc.quant_i_frames = h265->pic_init_qp;
+
+   switch(h265->pic_fields.bits.coding_type) {
+   case 1:
+  if (h265->pic_fields.bits.idr_pic_flag)
+ context->desc.h265enc.picture_type = PIPE_H265_ENC_PICTURE_TYPE_IDR;
+  else
+ context->desc.h265enc.picture_type = PIPE_H265_ENC_PICTURE_TYPE_I;
+  break;
+   case 2:
+  context->desc.h265enc.picture_type = PIPE_H265_ENC_PICTURE_TYPE_P;
+  break;
+   case 3:
+   case 4:
+   case 5:
+  return VA_STATUS_ERROR_UNIMPLEMENTED; //no b frame support
+  break;
+   }
+
+   context->desc.h265enc.pic.constrained_intra_pred_flag = 
h265->pic_fields.bits.constrained_intra_pred_flag;
+
+   util_hash_table_set(context->desc.h265enc.frame_idx,
+   UINT_TO_PTR(h265->decoded_curr_pic.picture_id),
+   UINT_TO_PTR(context->desc.h265enc.frame_num));
 
return VA_STATUS_SUCCESS;
 }
@@ -40,7 +83,33 @@ vlVaHandleVAEncPictureParameterBufferTypeHEVC(vlVaDriver 
*drv, vlVaContext *cont
 VAStatus
 vlVaHandleVAEncSliceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext 
*context, vlVaBuffer *buf)
 {
-   /* TODO */
+   VAEncSliceParameterBufferHEVC *h265;
+
+   h265 = buf->data;
+   context->desc.h265enc.ref_idx_l0 = VA_INVALID_ID;
+   context->desc.h265enc.ref_idx_l1 = VA_INVALID_ID;
+
+   for (int i = 0; i < 15; i++) {
+  if (h265->ref_pic_list0[i].picture_id != VA_INVALID_ID) {
+ if (context->desc.h265enc.ref_idx_l0 == VA_INVALID_ID)
+context->desc.h265enc.ref_idx_l0 = 
PTR_TO_UINT(util_hash_table_get(context->desc.h265enc.frame_idx,
+   
UINT_TO_PTR(h265->ref_pic_list0[i].picture_id)));
+  }
+  if (h265->ref_pic_list1[i].picture_id != VA_INVALID_ID && 
h265->slice_type == 1) {
+ if (context->desc.h265enc.ref_idx_l1 == VA_INVALID_ID)
+context->desc.h265enc.ref_idx_l1 = 
PTR_TO_UINT(util_hash_table_get(context->desc.h265enc.frame_idx,
+   
UINT_TO_PTR(h265->ref_pic_list1[i].picture_id)));
+  }
+   }
+
+   context->desc.h265enc.slice.max_num_merge_cand = h265->max_num_merge_cand;
+   context->desc.h265enc.slice.slice_cb_qp_offset = h265->slice_cb_qp_offset;
+   context->desc.h265enc.slice.slice_cr_qp_offset = h265->slice_cr_qp_offset;
+   context->desc.h265enc.slice.slice_beta_offset_div2 = 
h265->slice_beta_offset_div2;
+   context->desc.h265enc.slice.slice_tc_offset_div2 = 
h265->slice_tc_offset_div2;
+   context->desc.h265enc.slice.cabac_init_flag = 
h265->slice_fields.bits.cabac_init_flag;
+   context->desc.h265enc.slice.slice_deblocking_filter_disabled_flag = 
h265->slice_fields.bits.slice_deblocking_filter_disabled_flag;
+   context->desc.h265enc.slice.slice_loop_filter_across_slices_enabled_flag = 
h265->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag;
 
return VA_STATUS_SUCCESS;
 }
@@ -48,7 +117,38 @@ vlVaHandleVAEncSliceParameterBufferTypeHEVC(vlVaDriver 
*drv, vlVaContext *contex
 VAStatus
 vlVaHandleVAEncSequenceParameterBufferTypeHEVC(vlVaDriver *drv, vlVaContext 
*context, vlVaBuffer *buf)
 {
-   /* TODO */
+   VAEncSequenceParameterBufferHEVC *h265 = (VAEncSequenceParameterBufferHEVC 
*)buf->data;
+
+   if (!context->decoder) {
+  context->templat.level = 

[Mesa-dev] [PATCH 04/12] radeon/vcn: add ib implementations for HEVC

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Implement required ibs for vcn HEVC encode.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 267 
 1 file changed, 222 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 06b8092..a651f7e 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -231,6 +231,27 @@ static void radeon_enc_session_init(struct radeon_encoder 
*enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_session_init_hevc(struct radeon_encoder *enc)
+{
+   enc->enc_pic.session_init.encode_standard = 
RENCODE_ENCODE_STANDARD_HEVC;
+   enc->enc_pic.session_init.aligned_picture_width = 
align(enc->base.width, 64);
+   enc->enc_pic.session_init.aligned_picture_height = 
align(enc->base.height, 16);
+   enc->enc_pic.session_init.padding_width = 
enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+   enc->enc_pic.session_init.padding_height = 
enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+   enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE;
+   enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SESSION_INIT);
+   RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard);
+   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+   RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+   RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_layer_control(struct radeon_encoder *enc)
 {
enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
@@ -262,6 +283,19 @@ static void radeon_enc_slice_control(struct radeon_encoder 
*enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_slice_control_hevc(struct radeon_encoder *enc)
+{
+   enc->enc_pic.hevc_slice_ctrl.slice_control_mode = 
RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS;
+   enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice = 
align(enc->base.width, 64) / 64 * align(enc->base.height, 64) / 64;
+   
enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment = 
enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice;
+
+   RADEON_ENC_BEGIN(RENCODE_HEVC_IB_PARAM_SLICE_CONTROL);
+   RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.slice_control_mode);
+   
RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice);
+   
RADEON_ENC_CS(enc->enc_pic.hevc_slice_ctrl.fixed_ctbs_per_slice.num_ctbs_per_slice_segment);
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_spec_misc(struct radeon_encoder *enc)
 {
enc->enc_pic.spec_misc.constrained_intra_pred_flag = 0;
@@ -283,27 +317,68 @@ static void radeon_enc_spec_misc(struct radeon_encoder 
*enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_spec_misc_hevc(struct radeon_encoder *enc, struct 
pipe_picture_desc *picture)
+{
+   struct pipe_h265_enc_picture_desc *pic = (struct 
pipe_h265_enc_picture_desc *)picture;
+   enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 = 
pic->seq.log2_min_luma_coding_block_size_minus3;
+   enc->enc_pic.hevc_spec_misc.amp_disabled = !pic->seq.amp_enabled_flag;
+   enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled = 
pic->seq.strong_intra_smoothing_enabled_flag;
+   enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag = 
pic->pic.constrained_intra_pred_flag;
+   enc->enc_pic.hevc_spec_misc.cabac_init_flag = 
pic->slice.cabac_init_flag;
+   enc->enc_pic.hevc_spec_misc.half_pel_enabled = 1;
+   enc->enc_pic.hevc_spec_misc.quarter_pel_enabled = 1;
+
+   RADEON_ENC_BEGIN(RENCODE_HEVC_IB_PARAM_SPEC_MISC);
+   
RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3);
+   RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.amp_disabled);
+   
RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled);
+   RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag);
+   RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.cabac_init_flag);
+   RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.half_pel_enabled);
+   RADEON_ENC_CS(enc->enc_pic.hevc_spec_misc.quarter_pel_enabled);
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_rc_session_init(struct radeon_encoder *enc, struct 
pipe_picture_desc *picture)
 {
-   struct pipe_h264_enc_picture_desc *pic = (struct 
pipe_h264_enc_picture_desc *)picture;
-   

[Mesa-dev] [PATCH 05/12] radeon/vcn: add header implementations for HEVC

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Implement encoding of sps, pps, vps, aud, and slice headers for HEVC
based on HEVC specs.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 348 +++-
 1 file changed, 347 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index a651f7e..c86c2f3 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -551,6 +551,86 @@ static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc)
+{
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
+   uint32_t *size_in_bytes = >cs->current.buf[enc->cs->current.cdw++];
+   int i;
+
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+   radeon_enc_code_fixed_bits(enc, 0x0001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x4201, 16);
+   radeon_enc_byte_align(enc);
+   radeon_enc_set_emulation_prevention(enc, true);
+   radeon_enc_code_fixed_bits(enc, 0x0, 4);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1, 3);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
+   radeon_enc_code_fixed_bits(enc, 0x6000, 32);
+   radeon_enc_code_fixed_bits(enc, 0xb000, 32);
+   radeon_enc_code_fixed_bits(enc, 0x0, 16);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
+
+   for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) ; 
i++)
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+
+   if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
+   for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); 
i < 8; i++)
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+   }
+
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
+   radeon_enc_code_ue(enc, enc->enc_pic.pic_width_in_luma_samples);
+   radeon_enc_code_ue(enc, enc->enc_pic.pic_height_in_luma_samples);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
+   radeon_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
+   radeon_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3);
+   //Only support CTBSize 64
+   radeon_enc_code_ue(enc, 6 - 
(enc->enc_pic.hevc_spec_misc.log2_min_luma_coding_block_size_minus3 + 3));
+   radeon_enc_code_ue(enc, 
enc->enc_pic.log2_min_transform_block_size_minus2);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.log2_diff_max_min_transform_block_size);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.max_transform_hierarchy_depth_inter);
+   radeon_enc_code_ue(enc, 
enc->enc_pic.max_transform_hierarchy_depth_intra);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 
!enc->enc_pic.hevc_spec_misc.amp_disabled, 1);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.sample_adaptive_offset_enabled_flag, 1);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
+
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, 0);
+   radeon_enc_code_ue(enc, 0);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0, 1);
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.hevc_spec_misc.strong_intra_smoothing_enabled, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+   radeon_enc_byte_align(enc);
+   radeon_enc_flush_headers(enc);
+   *size_in_bytes = (enc->bits_output + 7) / 8;
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
 {
RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
@@ -586,6 +666,150 @@ static void radeon_enc_nalu_pps(struct radeon_encoder 
*enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc)
+{
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+   

[Mesa-dev] [PATCH 06/12] st/va: move H264 enc functions into separate file

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Move all H264 encode related functions into separate file. Similar to
VAAPI decode side, there will be separate file for each codec on encode
side as well.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/Makefile.sources   |   1 +
 src/gallium/state_trackers/va/meson.build|   2 +-
 src/gallium/state_trackers/va/picture.c  | 188 ++---
 src/gallium/state_trackers/va/picture_h264_enc.c | 202 +++
 src/gallium/state_trackers/va/va_private.h   |   6 +
 5 files changed, 260 insertions(+), 139 deletions(-)
 create mode 100644 src/gallium/state_trackers/va/picture_h264_enc.c

diff --git a/src/gallium/state_trackers/va/Makefile.sources 
b/src/gallium/state_trackers/va/Makefile.sources
index 2d6546b..8a69828 100644
--- a/src/gallium/state_trackers/va/Makefile.sources
+++ b/src/gallium/state_trackers/va/Makefile.sources
@@ -8,6 +8,7 @@ C_SOURCES := \
picture_mpeg12.c \
picture_mpeg4.c \
picture_h264.c \
+   picture_h264_enc.c \
picture_hevc.c \
picture_vc1.c \
picture_mjpeg.c \
diff --git a/src/gallium/state_trackers/va/meson.build 
b/src/gallium/state_trackers/va/meson.build
index 56e68e9..0dec48c 100644
--- a/src/gallium/state_trackers/va/meson.build
+++ b/src/gallium/state_trackers/va/meson.build
@@ -26,7 +26,7 @@ libva_st = static_library(
 'buffer.c', 'config.c', 'context.c', 'display.c', 'image.c', 'picture.c',
 'picture_mpeg12.c', 'picture_mpeg4.c', 'picture_h264.c', 'picture_hevc.c',
 'picture_vc1.c', 'picture_mjpeg.c', 'postproc.c', 'subpicture.c',
-'surface.c',
+'surface.c', 'picture_h264_enc.c',
   ),
   c_args : [
 c_vis_args,
diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 8951573..d5fa947 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -99,46 +99,6 @@ vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID 
surface_id,
   *ref_frame = NULL;
 }
 
-static void
-getEncParamPreset(vlVaContext *context)
-{
-   //motion estimation preset
-   context->desc.h264enc.motion_est.motion_est_quarter_pixel = 0x0001;
-   context->desc.h264enc.motion_est.lsmvert = 0x0002;
-   context->desc.h264enc.motion_est.enc_disable_sub_mode = 0x0078;
-   context->desc.h264enc.motion_est.enc_en_ime_overw_dis_subm = 0x0001;
-   context->desc.h264enc.motion_est.enc_ime_overw_dis_subm_no = 0x0001;
-   context->desc.h264enc.motion_est.enc_ime2_search_range_x = 0x0004;
-   context->desc.h264enc.motion_est.enc_ime2_search_range_y = 0x0004;
-
-   //pic control preset
-   context->desc.h264enc.pic_ctrl.enc_cabac_enable = 0x0001;
-   context->desc.h264enc.pic_ctrl.enc_constraint_set_flags = 0x0040;
-
-   //rate control
-   context->desc.h264enc.rate_ctrl.vbv_buffer_size = 2000;
-   context->desc.h264enc.rate_ctrl.vbv_buf_lv = 48;
-   context->desc.h264enc.rate_ctrl.fill_data_enable = 1;
-   context->desc.h264enc.rate_ctrl.enforce_hrd = 1;
-   context->desc.h264enc.enable_vui = false;
-   if (context->desc.h264enc.rate_ctrl.frame_rate_num == 0 ||
-   context->desc.h264enc.rate_ctrl.frame_rate_den == 0) {
- context->desc.h264enc.rate_ctrl.frame_rate_num = 30;
- context->desc.h264enc.rate_ctrl.frame_rate_den = 1;
-   }
-   context->desc.h264enc.rate_ctrl.target_bits_picture =
-  context->desc.h264enc.rate_ctrl.target_bitrate *
-  ((float)context->desc.h264enc.rate_ctrl.frame_rate_den /
-  context->desc.h264enc.rate_ctrl.frame_rate_num);
-   context->desc.h264enc.rate_ctrl.peak_bits_picture_integer =
-  context->desc.h264enc.rate_ctrl.peak_bitrate *
-  ((float)context->desc.h264enc.rate_ctrl.frame_rate_den /
-  context->desc.h264enc.rate_ctrl.frame_rate_num);
-
-   context->desc.h264enc.rate_ctrl.peak_bits_picture_fraction = 0;
-   context->desc.h264enc.ref_pic_mode = 0x0201;
-}
-
 static VAStatus
 handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer 
*buf)
 {
@@ -349,55 +309,52 @@ handleVASliceDataBufferType(vlVaContext *context, 
vlVaBuffer *buf)
 static VAStatus
 handleVAEncMiscParameterTypeRateControl(vlVaContext *context, 
VAEncMiscParameterBuffer *misc)
 {
-   VAEncMiscParameterRateControl *rc = (VAEncMiscParameterRateControl 
*)misc->data;
-   if (context->desc.h264enc.rate_ctrl.rate_ctrl_method ==
-   PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT)
-  context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second;
-   else
-  context->desc.h264enc.rate_ctrl.target_bitrate = rc->bits_per_second * 
(rc->target_percentage / 100.0);
-   context->desc.h264enc.rate_ctrl.peak_bitrate = rc->bits_per_second;
-   if (context->desc.h264enc.rate_ctrl.target_bitrate < 200)
-  context->desc.h264enc.rate_ctrl.vbv_buffer_size = 

[Mesa-dev] [PATCH 02/12] radeon/vcn: add vcn encode interface for HEVC

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Add vcn encode interface for HEVC, and rename radeon_enc_h264_enc_pic
to radeon_enc_pic since radeon_enc_pic is used by both H264 and HEVC.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.h | 81 -
 1 file changed, 79 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
b/src/gallium/drivers/radeon/radeon_vcn_enc.h
index 0385860..86b4136 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -48,6 +48,10 @@
 #define RENCODE_IB_PARAM_FEEDBACK_BUFFER   0x0010
 #define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU0x0020
 
+#define RENCODE_HEVC_IB_PARAM_SLICE_CONTROL0x0011
+#define RENCODE_HEVC_IB_PARAM_SPEC_MISC0x0012
+#define RENCODE_HEVC_IB_PARAM_DEBLOCKING_FILTER0x0013
+
 #define RENCODE_H264_IB_PARAM_SLICE_CONTROL0x0021
 #define RENCODE_H264_IB_PARAM_SPEC_MISC0x0022
 #define RENCODE_H264_IB_PARAM_ENCODE_PARAMS0x0023
@@ -67,6 +71,7 @@
 #define RENCODE_IF_MINOR_VERSION_MASK  0x
 #define RENCODE_IF_MINOR_VERSION_SHIFT 0
 
+#define RENCODE_ENCODE_STANDARD_HEVC   0
 #define RENCODE_ENCODE_STANDARD_H264   1
 
 #define RENCODE_PREENCODE_MODE_NONE0x
@@ -77,6 +82,9 @@
 #define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS  0x
 #define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_BITS 0x0001
 
+#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_CTBS 0x
+#define RENCODE_HEVC_SLICE_CONTROL_MODE_FIXED_BITS 0x0001
+
 #define RENCODE_RATE_CONTROL_METHOD_NONE   0x
 #define RENCODE_RATE_CONTROL_METHOD_LATENCY_CONSTRAINED_VBR0x0001
 #define RENCODE_RATE_CONTROL_METHOD_PEAK_CONSTRAINED_VBR   0x0002
@@ -95,6 +103,11 @@
 #define RENCODE_HEADER_INSTRUCTION_END 0x
 #define RENCODE_HEADER_INSTRUCTION_COPY0x0001
 
+#define RENCODE_HEVC_HEADER_INSTRUCTION_DEPENDENT_SLICE_END0x0001
+#define RENCODE_HEVC_HEADER_INSTRUCTION_FIRST_SLICE0x00010001
+#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_SEGMENT  0x00010002
+#define RENCODE_HEVC_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00010003
+
 #define RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB   0x0002
 #define RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA 0x00020001
 
@@ -181,6 +194,25 @@ typedef struct rvcn_enc_h264_slice_control_s
 };
 } rvcn_enc_h264_slice_control_t;
 
+typedef struct rvcn_enc_hevc_slice_control_s
+{
+uint32_t   slice_control_mode;
+union
+{
+struct
+{
+uint32_t   num_ctbs_per_slice;
+uint32_t   num_ctbs_per_slice_segment;
+} fixed_ctbs_per_slice;
+
+struct
+{
+uint32_t   num_bits_per_slice;
+uint32_t   num_bits_per_slice_segment;
+} fixed_bits_per_slice;
+};
+} rvcn_enc_hevc_slice_control_t;
+
 typedef struct rvcn_enc_h264_spec_misc_s
 {
 uint32_t   constrained_intra_pred_flag;
@@ -192,6 +224,17 @@ typedef struct rvcn_enc_h264_spec_misc_s
 uint32_t   level_idc;
 } rvcn_enc_h264_spec_misc_t;
 
+typedef struct rvcn_enc_hevc_spec_misc_s
+{
+uint32_t   log2_min_luma_coding_block_size_minus3;
+uint32_t   amp_disabled;
+uint32_t   strong_intra_smoothing_enabled;
+uint32_t   constrained_intra_pred_flag;
+uint32_t   cabac_init_flag;
+uint32_t   half_pel_enabled;
+uint32_t   quarter_pel_enabled;
+} rvcn_enc_hevc_spec_misc_t;
+
 typedef struct rvcn_enc_rate_ctl_session_init_s
 {
 uint32_t   rate_control_method;
@@ -276,6 +319,16 @@ typedef struct rvcn_enc_h264_deblocking_filter_s
 int32_tcr_qp_offset;
 } rvcn_enc_h264_deblocking_filter_t;
 
+typedef struct rvcn_enc_hevc_deblocking_filter_s
+{
+uint32_t   loop_filter_across_slices_enabled;
+int32_tdeblocking_filter_disabled;
+int32_tbeta_offset_div2;
+int32_ttc_offset_div2;
+int32_tcb_qp_offset;
+int32_tcr_qp_offset;
+} rvcn_enc_hevc_deblocking_filter_t;
+
 typedef struct rvcn_enc_intra_refresh_s
 {
 uint32_t   intra_refresh_mode;
@@ -331,7 +384,7 @@ struct pipe_video_codec *radeon_create_encoder(struct 
pipe_context *context,
struct radeon_winsys* ws,
radeon_enc_get_buffer get_buffer);
 
-struct radeon_enc_h264_enc_pic {
+struct radeon_enc_pic {
enumpipe_h264_enc_picture_type picture_type;
 
unsignedframe_num;
@@ -343,21 +396,45 @@ struct radeon_enc_h264_enc_pic {

[Mesa-dev] [PATCH 07/12] st/va: add HEVC picture desc

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Add HEVC picture desc, and add codec check when creating and destroying
context.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/state_trackers/va/context.c| 26 ++
 src/gallium/state_trackers/va/va_private.h |  1 +
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c 
b/src/gallium/state_trackers/va/context.c
index 78e1f19..f03b326 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -284,8 +284,18 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID 
config_id, int picture_width,
context->desc.base.profile = config->profile;
context->desc.base.entry_point = config->entrypoint;
if (config->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
-  context->desc.h264enc.rate_ctrl.rate_ctrl_method = config->rc;
-  context->desc.h264enc.frame_idx = util_hash_table_create(handle_hash, 
handle_compare);
+  switch (u_reduce_video_profile(context->templat.profile)) {
+  case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ context->desc.h264enc.rate_ctrl.rate_ctrl_method = config->rc;
+ context->desc.h264enc.frame_idx = util_hash_table_create(handle_hash, 
handle_compare);
+ break;
+  case PIPE_VIDEO_FORMAT_HEVC:
+ context->desc.h265enc.rc.rate_ctrl_method = config->rc;
+ context->desc.h265enc.frame_idx = util_hash_table_create(handle_hash, 
handle_compare);
+ break;
+  default:
+ break;
+  }
}
 
mtx_lock(>mutex);
@@ -314,8 +324,16 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID 
context_id)
 
if (context->decoder) {
   if (context->desc.base.entry_point == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
- if (context->desc.h264enc.frame_idx)
-util_hash_table_destroy (context->desc.h264enc.frame_idx);
+ if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+if (context->desc.h264enc.frame_idx)
+   util_hash_table_destroy (context->desc.h264enc.frame_idx);
+ }
+ if (u_reduce_video_profile(context->decoder->profile) ==
+ PIPE_VIDEO_FORMAT_HEVC) {
+if (context->desc.h265enc.frame_idx)
+   util_hash_table_destroy (context->desc.h265enc.frame_idx);
+ }
   } else {
  if (u_reduce_video_profile(context->decoder->profile) ==
PIPE_VIDEO_FORMAT_MPEG4_AVC) {
diff --git a/src/gallium/state_trackers/va/va_private.h 
b/src/gallium/state_trackers/va/va_private.h
index eef75c6..9b526ea 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -270,6 +270,7 @@ typedef struct {
   struct pipe_h265_picture_desc h265;
   struct pipe_mjpeg_picture_desc mjpeg;
   struct pipe_h264_enc_picture_desc h264enc;
+  struct pipe_h265_enc_picture_desc h265enc;
} desc;
 
struct {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] radeon/vcn: support picture parameters for HEVC

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Pass pipe_picture_desc instead of pipe_h264_enc_picture_desc so that
it can be used for different codecs. Add functions to handle picture
parameters that will be used for HEVC encode.

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 72 +++--
 src/gallium/drivers/radeon/radeon_vcn_enc.h |  2 +-
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 11 ++--
 3 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 06579c8..388a333 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -38,20 +38,61 @@
 #include "radeon_video.h"
 #include "radeon_vcn_enc.h"
 
-static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
+static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct 
pipe_picture_desc *picture)
 {
-   enc->enc_pic.picture_type = pic->picture_type;
-   enc->enc_pic.frame_num = pic->frame_num;
-   enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
-   enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
-   enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
-   enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
-   enc->enc_pic.not_referenced = pic->not_referenced;
-   enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H264_ENC_PICTURE_TYPE_IDR);
-   enc->enc_pic.crop_left = 0;
-   enc->enc_pic.crop_right = (align(enc->base.width, 16) - 
enc->base.width) / 2;
-   enc->enc_pic.crop_top = 0;
-   enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+   if (u_reduce_video_profile(picture->profile) == 
PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+  struct pipe_h264_enc_picture_desc *pic = (struct 
pipe_h264_enc_picture_desc *)picture;
+  enc->enc_pic.picture_type = pic->picture_type;
+  enc->enc_pic.frame_num = pic->frame_num;
+  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+  enc->enc_pic.not_referenced = pic->not_referenced;
+  enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H264_ENC_PICTURE_TYPE_IDR);
+  enc->enc_pic.crop_left = 0;
+  enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) 
/ 2;
+  enc->enc_pic.crop_top = 0;
+  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+   } else if (u_reduce_video_profile(picture->profile) == 
PIPE_VIDEO_FORMAT_HEVC) {
+  struct pipe_h265_enc_picture_desc *pic = (struct 
pipe_h265_enc_picture_desc *)picture;
+  enc->enc_pic.picture_type = pic->picture_type;
+  enc->enc_pic.frame_num = pic->frame_num;
+  enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+  enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+  enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+  enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+  enc->enc_pic.not_referenced = pic->not_referenced;
+  enc->enc_pic.is_idr = (pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_IDR) ||
+(pic->picture_type == 
PIPE_H265_ENC_PICTURE_TYPE_I);
+  enc->enc_pic.crop_left = 0;
+  enc->enc_pic.crop_right = (align(enc->base.width, 16) - enc->base.width) 
/ 2;
+  enc->enc_pic.crop_top = 0;
+  enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+  enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
+  enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
+  enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
+  enc->enc_pic.max_poc = pic->seq.intra_period;
+  enc->enc_pic.log2_max_poc = 0;
+  for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
+ i = (i >> 1);
+  enc->enc_pic.chroma_format_idc = pic->seq.chroma_format_idc;
+  enc->enc_pic.pic_width_in_luma_samples = 
pic->seq.pic_width_in_luma_samples;
+  enc->enc_pic.pic_height_in_luma_samples = 
pic->seq.pic_height_in_luma_samples;
+  enc->enc_pic.log2_diff_max_min_luma_coding_block_size = 
pic->seq.log2_diff_max_min_luma_coding_block_size;
+  enc->enc_pic.log2_min_transform_block_size_minus2 = 
pic->seq.log2_min_transform_block_size_minus2;
+  enc->enc_pic.log2_diff_max_min_transform_block_size = 
pic->seq.log2_diff_max_min_transform_block_size;
+  enc->enc_pic.max_transform_hierarchy_depth_inter = 
pic->seq.max_transform_hierarchy_depth_inter;
+  enc->enc_pic.max_transform_hierarchy_depth_intra = 
pic->seq.max_transform_hierarchy_depth_intra;
+  enc->enc_pic.log2_parallel_merge_level_minus2 = 
pic->pic.log2_parallel_merge_level_minus2;
+  

[Mesa-dev] [PATCH 01/12] vl: add parameters for HEVC encode

2018-02-02 Thread boyuan.zhang
From: Boyuan Zhang 

Add HEVC encode interface

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 
---
 src/gallium/include/pipe/p_video_state.h | 99 
 1 file changed, 99 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 5a88e6c..2533ba4 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -120,6 +120,15 @@ enum pipe_h264_enc_picture_type
PIPE_H264_ENC_PICTURE_TYPE_SKIP = 0x04
 };
 
+enum pipe_h265_enc_picture_type
+{
+   PIPE_H265_ENC_PICTURE_TYPE_P = 0x00,
+   PIPE_H265_ENC_PICTURE_TYPE_B = 0x01,
+   PIPE_H265_ENC_PICTURE_TYPE_I = 0x02,
+   PIPE_H265_ENC_PICTURE_TYPE_IDR = 0x03,
+   PIPE_H265_ENC_PICTURE_TYPE_SKIP = 0x04
+};
+
 enum pipe_h264_enc_rate_control_method
 {
PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE = 0x00,
@@ -129,6 +138,15 @@ enum pipe_h264_enc_rate_control_method
PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE = 0x04
 };
 
+enum pipe_h265_enc_rate_control_method
+{
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_DISABLE = 0x00,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT_SKIP = 0x01,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP = 0x02,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_CONSTANT = 0x03,
+   PIPE_H265_ENC_RATE_CONTROL_METHOD_VARIABLE = 0x04
+};
+
 struct pipe_picture_desc
 {
enum pipe_video_profile profile;
@@ -412,6 +430,87 @@ struct pipe_h264_enc_picture_desc
 
 };
 
+struct pipe_h265_enc_seq_param
+{
+   uint8_t  general_profile_idc;
+   uint8_t  general_level_idc;
+   uint8_t  general_tier_flag;
+   uint32_t intra_period;
+   uint16_t pic_width_in_luma_samples;
+   uint16_t pic_height_in_luma_samples;
+   uint32_t chroma_format_idc;
+   uint32_t bit_depth_luma_minus8;
+   uint32_t bit_depth_chroma_minus8;
+   bool strong_intra_smoothing_enabled_flag;
+   bool amp_enabled_flag;
+   bool sample_adaptive_offset_enabled_flag;
+   bool pcm_enabled_flag;
+   bool sps_temporal_mvp_enabled_flag;
+   uint8_t  log2_min_luma_coding_block_size_minus3;
+   uint8_t  log2_diff_max_min_luma_coding_block_size;
+   uint8_t  log2_min_transform_block_size_minus2;
+   uint8_t  log2_diff_max_min_transform_block_size;
+   uint8_t  max_transform_hierarchy_depth_inter;
+   uint8_t  max_transform_hierarchy_depth_intra;
+};
+
+struct pipe_h265_enc_pic_param
+{
+   uint8_t log2_parallel_merge_level_minus2;
+   uint8_t nal_unit_type;
+   bool constrained_intra_pred_flag;
+};
+
+struct pipe_h265_enc_slice_param
+{
+   uint8_t max_num_merge_cand;
+   int8_t slice_cb_qp_offset;
+   int8_t slice_cr_qp_offset;
+   int8_t slice_beta_offset_div2;
+   int8_t slice_tc_offset_div2;
+   bool cabac_init_flag;
+   uint32_t slice_deblocking_filter_disabled_flag;
+   bool slice_loop_filter_across_slices_enabled_flag;
+};
+
+struct pipe_h265_enc_rate_control
+{
+   enum pipe_h265_enc_rate_control_method rate_ctrl_method;
+   unsigned target_bitrate;
+   unsigned peak_bitrate;
+   unsigned frame_rate_num;
+   unsigned frame_rate_den;
+   unsigned quant_i_frames;
+   unsigned vbv_buffer_size;
+   unsigned vbv_buf_lv;
+   unsigned target_bits_picture;
+   unsigned peak_bits_picture_integer;
+   unsigned peak_bits_picture_fraction;
+   unsigned fill_data_enable;
+   unsigned enforce_hrd;
+};
+
+struct pipe_h265_enc_picture_desc
+{
+   struct pipe_picture_desc base;
+
+   struct pipe_h265_enc_seq_param seq;
+   struct pipe_h265_enc_pic_param pic;
+   struct pipe_h265_enc_slice_param slice;
+   struct pipe_h265_enc_rate_control rc;
+
+   enum pipe_h265_enc_picture_type picture_type;
+   unsigned decoded_curr_pic;
+   unsigned reference_frames[16];
+   unsigned frame_num;
+   unsigned pic_order_cnt;
+   unsigned pic_order_cnt_type;
+   unsigned ref_idx_l0;
+   unsigned ref_idx_l1;
+   bool not_referenced;
+   struct util_hash_table *frame_idx;
+};
+
 struct pipe_h265_sps
 {
uint8_t chroma_format_idc;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: don't do stack workarounds for hemlock

2018-02-02 Thread Roland Scheidegger
Am 02.02.2018 um 12:16 schrieb Emil Velikov:
> Hi Roland,
> 
> On 30 January 2018 at 05:07,   wrote:
>> From: Roland Scheidegger 
>>
>> By the looks of it it seems hemlock is treated separately to cypress, but
>> certainly it won't need the stack workarounds cedar/redwood (and
>> seemingly every other eg chip except cypress/juniper) need.
>> (Discovered by accident.)
>> ---
> Is it worth picking this for stable branches?
> 
Should be safe. Albeit doing the workarounds should cause no harm
neither, and I have zero idea what the cost of doing them is (presumably
not too high)...
I think dual-gpu cards weren't all that popular with linux users neither
:-).

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] autotools: Only build libmesa-st-tests-common.a for tests.

2018-02-02 Thread Bas Nieuwenhuizen
We don't need the library if we don't build tests, and building
it adds a dependency on gtest which adds a dependency on cxxabi.h.

Fixes: 6569b33b6e "mesa/st/tests: unify MockCodeLine* classes"
---
 src/mesa/state_tracker/tests/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/tests/Makefile.am 
b/src/mesa/state_tracker/tests/Makefile.am
index 3c7993dc8d..9ac2815c91 100644
--- a/src/mesa/state_tracker/tests/Makefile.am
+++ b/src/mesa/state_tracker/tests/Makefile.am
@@ -19,7 +19,7 @@ if HAVE_STD_CXX11
 TESTS = st-renumerate-test
 check_PROGRAMS = st-renumerate-test
 
-noinst_LIBRARIES = libmesa-st-tests-common.a
+check_LIBRARIES = libmesa-st-tests-common.a
 endif
 
 libmesa_st_tests_common_a_SOURCES = \
-- 
2.16.0.rc1.238.g530d649a79-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104374] Cemu 1.11.1 via Wine using Mesa Mild exposes LLVM AMDGPU bugs on RX Vega..

2018-02-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104374

Michel Dänzer  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #3 from Michel Dänzer  ---


*** This bug has been marked as a duplicate of bug 104902 ***

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v1 0/7] Implement commont gralloc_handle_t in libdrm

2018-02-02 Thread Tomasz Figa
On Fri, Feb 2, 2018 at 11:00 PM, Rob Herring  wrote:
> On Fri, Feb 2, 2018 at 2:01 AM, Tomasz Figa  wrote:
>> Hi Rob,
>>
>> On Tue, Jan 30, 2018 at 9:36 PM, Robert Foss  
>> wrote:
   uint32_t (*get_fd)(buffer_handle_t handle, uint32_t plane);
   uint64_t (*get_modifier)(buffer_handle_t handle, uint32_t
 plane);
   uint32_t (*get_offsets)(buffer_handle_t handle, uint32_t plane);
   uint32_t (*get_stride)(buffer_handle_t handle, uint32_t plane);
   ...
 } gralloc_funcs_t;


 These ones? >
 Yeah, if we could retrieve such function pointer struct using perform
 or any equivalent (like the implementation-specific methods in
 gralloc1, but not sure if that's going to be used in practice
 anywhere), it could work for us.
>>>
>>>
>>> So this is where you and Rob Herring lose me, I don't think I understand
>>> quite how the gralloc1 call would be used, and how it would tie into this
>>> handle struct. I think I could do with some guidance on this.
>>
>> This would be very similar to gralloc0 perform call. gralloc1
>> implementations need to provide getFunction() callback [1], which
>> returns a pointer to given function. The list of standard functions is
>> defined in the gralloc1.h header [2], but we could take some random
>> big number and use it for our function that fills in provided
>> gralloc_funcs_t struct with necessary pointers.
>>
>> [1] 
>> https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/gralloc1.h#300
>> [2] 
>> https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/gralloc1.h#134
>
> This is a deadend because it won't work with a HIDL based
> implementation (aka gralloc 2.0). You can't set function pointers (or
> any pointers) because gralloc runs in a different process. Yes,
> currently gralloc is a pass-thru HAL, but AIUI that will go away.

Part of it. I can't see IMapper being implemented by a separate
process. You can't map a buffer into one process from another process.

But anyway, it's a good point, thanks, I almost forgot about its
existence. I'll do further investigation.

Best regards,
Tomasz
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v1 0/7] Implement commont gralloc_handle_t in libdrm

2018-02-02 Thread Rob Herring
On Fri, Feb 2, 2018 at 2:01 AM, Tomasz Figa  wrote:
> Hi Rob,
>
> On Tue, Jan 30, 2018 at 9:36 PM, Robert Foss  
> wrote:
>>>   uint32_t (*get_fd)(buffer_handle_t handle, uint32_t plane);
>>>   uint64_t (*get_modifier)(buffer_handle_t handle, uint32_t
>>> plane);
>>>   uint32_t (*get_offsets)(buffer_handle_t handle, uint32_t plane);
>>>   uint32_t (*get_stride)(buffer_handle_t handle, uint32_t plane);
>>>   ...
>>> } gralloc_funcs_t;
>>>
>>>
>>> These ones? >
>>> Yeah, if we could retrieve such function pointer struct using perform
>>> or any equivalent (like the implementation-specific methods in
>>> gralloc1, but not sure if that's going to be used in practice
>>> anywhere), it could work for us.
>>
>>
>> So this is where you and Rob Herring lose me, I don't think I understand
>> quite how the gralloc1 call would be used, and how it would tie into this
>> handle struct. I think I could do with some guidance on this.
>
> This would be very similar to gralloc0 perform call. gralloc1
> implementations need to provide getFunction() callback [1], which
> returns a pointer to given function. The list of standard functions is
> defined in the gralloc1.h header [2], but we could take some random
> big number and use it for our function that fills in provided
> gralloc_funcs_t struct with necessary pointers.
>
> [1] 
> https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/gralloc1.h#300
> [2] 
> https://android.googlesource.com/platform/hardware/libhardware/+/master/include/hardware/gralloc1.h#134

This is a deadend because it won't work with a HIDL based
implementation (aka gralloc 2.0). You can't set function pointers (or
any pointers) because gralloc runs in a different process. Yes,
currently gralloc is a pass-thru HAL, but AIUI that will go away.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH (resend)] r600/sb: Don't require array declarations for TGSI_FILE_SYSTEM_VALUE

2018-02-02 Thread Ilia Mirkin
On Fri, Feb 2, 2018 at 7:55 AM, Gert Wollny  wrote:
> Am Freitag, den 02.02.2018, 06:56 -0500 schrieb Ilia Mirkin:
>> On Fri, Feb 2, 2018 at 4:07 AM, Gert Wollny 
>> wrote:
>> > Am Freitag, den 02.02.2018, 09:04 +0100 schrieb Roland Scheidegger:
>> > >
>> > >
>> > > Yes, the _GL spec_ says it is an array.
>> > > But in gallium it can't be. Therefore I think it's incorrect if
>> > > we
>> > > end up with array accesses there (albeit I was too lazy to
>> > > actually
>> > > look at the tgsi, but I'm pretty sure it isn't declard as an
>> > > array).
>> >
>> > the TGSI for the relevant shader in the piglit looks like this:
>> >
>> > FRAG
>> > DCL SV[0], SAMPLEMASK
>> > DCL OUT[0], COLOR
>> > DCL CONST[0][0]
>> > DCL TEMP[0..1], LOCAL
>> > DCL ADDR[0]
>> > IMM[0] FLT32 {1., 0., 0., 0.}
>> > IMM[1] INT32 {1, 0, 0, 0}
>> >   0: MOV TEMP[0], IMM[0].xyyx
>> >   1: UARL ADDR[0].x, CONST[0][0].
>> >   2: USEQ TEMP[1].x, SV[ADDR[0].x]., IMM[1].
>>
>> OK, this is a big problem. I'm guessing the GLSL code was something
>> like
>>
>> gl_SampleMaskIn[uniform]
>>
> This is how the piglit is written, and the standard definess
> gl_SampleMaskIn is defined as an array, so this makes sense.
>
>> What this got translated into was an indirect access into the *global
>> implicit array of system values*. We don't want that. glsl_to_tgsi
>> should just be dropping the indirect access entirely.
> Just found this comment in mesa/st/st_glsl_to.tgsi.cpp:6495
>
>  "TODO: If we ever support more than 32 samples, this will have
>to become an array."
>
> which would imply to me that in this stage the indirect access might at
> one point become relevant.

Pretty sure I wrote that :) (commit c5d822dad90)

I added it as part of ARB_sample_shading which just has a
gl_SampleMask output in FS. I'm sure I didn't think about indirect
accesses at the time. (Not even sure they'd be legal on the output...
I guess probably would.)

To support indirect accesses, one would have to declare an arrayid,
probably -- either way, it'd be a whole thing to support it. For now,
it should be disallowed and that indirect arg should get dropped.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH (resend)] r600/sb: Don't require array declarations for TGSI_FILE_SYSTEM_VALUE

2018-02-02 Thread Gert Wollny
Am Freitag, den 02.02.2018, 06:56 -0500 schrieb Ilia Mirkin:
> On Fri, Feb 2, 2018 at 4:07 AM, Gert Wollny 
> wrote:
> > Am Freitag, den 02.02.2018, 09:04 +0100 schrieb Roland Scheidegger:
> > > 
> > > 
> > > Yes, the _GL spec_ says it is an array.
> > > But in gallium it can't be. Therefore I think it's incorrect if
> > > we
> > > end up with array accesses there (albeit I was too lazy to
> > > actually
> > > look at the tgsi, but I'm pretty sure it isn't declard as an
> > > array).
> > 
> > the TGSI for the relevant shader in the piglit looks like this:
> > 
> > FRAG
> > DCL SV[0], SAMPLEMASK
> > DCL OUT[0], COLOR
> > DCL CONST[0][0]
> > DCL TEMP[0..1], LOCAL
> > DCL ADDR[0]
> > IMM[0] FLT32 {1., 0., 0., 0.}
> > IMM[1] INT32 {1, 0, 0, 0}
> >   0: MOV TEMP[0], IMM[0].xyyx
> >   1: UARL ADDR[0].x, CONST[0][0].
> >   2: USEQ TEMP[1].x, SV[ADDR[0].x]., IMM[1].
> 
> OK, this is a big problem. I'm guessing the GLSL code was something
> like
> 
> gl_SampleMaskIn[uniform]
> 
This is how the piglit is written, and the standard definess
gl_SampleMaskIn is defined as an array, so this makes sense. 

> What this got translated into was an indirect access into the *global
> implicit array of system values*. We don't want that. glsl_to_tgsi
> should just be dropping the indirect access entirely.
Just found this comment in mesa/st/st_glsl_to.tgsi.cpp:6495

 "TODO: If we ever support more than 32 samples, this will have
   to become an array." 

which would imply to me that in this stage the indirect access might at
one point become relevant. 
   
Best, 
Gert
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] glx/test: fix building for osx

2018-02-02 Thread Emil Velikov
On 1 February 2018 at 17:48, Jon Turney  wrote:
> An additional stub for applegl_create_context() is needed
> Cannot test indirect API as it's not built on osx, currently
>
> Signed-off-by: Jon Turney 
> ---
>  src/glx/tests/fake_glx_screen.cpp | 11 +++
>  src/glx/tests/indirect_api.cpp|  4 
>  2 files changed, 15 insertions(+)
>
> diff --git a/src/glx/tests/fake_glx_screen.cpp 
> b/src/glx/tests/fake_glx_screen.cpp
> index 801f54a6fa..71e4e8ce48 100644
> --- a/src/glx/tests/fake_glx_screen.cpp
> +++ b/src/glx/tests/fake_glx_screen.cpp
> @@ -75,6 +75,17 @@ indirect_create_context_attribs(struct glx_screen *base,
> return indirect_create_context(base, config_base, shareList, 0);
>  }
>
> +#ifdef GLX_USE_APPLEGL
> +extern "C" struct glx_context *
> +applegl_create_context(struct glx_screen *base,
> +  struct glx_config *config_base,
> +  struct glx_context *shareList,
> +  int renderType)
> +{
> +   return indirect_create_context(base, config_base, shareList, renderType);
> +}
> +#endif
> +
>  /* This is necessary so that we don't have to link with glxcurrent.c
>   * which would require us to link with X libraries and what not.
>   */
> diff --git a/src/glx/tests/indirect_api.cpp b/src/glx/tests/indirect_api.cpp
> index 34304a185e..b9a4ca0655 100644
> --- a/src/glx/tests/indirect_api.cpp
> +++ b/src/glx/tests/indirect_api.cpp
> @@ -705,6 +705,8 @@ void __indirect_glFramebufferTextureLayer(void) { }
>  }
>  /*@}*/
>
> +#ifndef GLX_USE_APPLEGL
> +
I'd tweak this like below. It gives a nice warning, so people don't forget ;-)
Regardless, the series is
Reviewed-by: Emil Velikov 

#ifdef GLX_USE_APPLEGL
#warning Indirect GLX tests are not build
#else

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] glx/apple: include util/debug.h for env_var_as_boolean prototype

2018-02-02 Thread Emil Velikov
On 2 February 2018 at 10:16, Eric Engestrom  wrote:
> On Sunday, 2018-01-28 14:18:27 +, Jon Turney wrote:
>> mesa/src/glx/glxcmds.c:1295:21: error: implicit declaration of function 
>> 'env_var_as_boolean' is invalid in C99 
>> [-Werror,-Wimplicit-function-declaration]
>> mesa/src/glx/apple/apple_visual.c:85:28: error: implicit declaration of 
>> function 'env_var_as_boolean' is invalid in C99 
>> [-Werror,-Wimplicit-function-declaration]
>
> Oops, these are mine :(
>
> Fixes: 3fdbc46b42dcfd3af52d8 "glx: turn LIBGL_DUMP_VISUALID into a boolean"
> Fixes: d2768a397d6fb8a094765 "glx: turn LIBGL_PROFILE_CORE into a boolean"
> Fixes: 5c68ea29f31283768c8e1 "egl+glx: turn LIBGL_ALWAYS_SOFTWARE into a 
> boolean"
> Fixes: 43e2d58698f76b96b36ff "glx: turn LIBGL_ALLOW_SOFTWARE into a boolean"
> Reviewed-by: Eric Engestrom 
>
Right forgot about those. In all fairness the nearly everything from
Jon is suitable for the stable branches.
Some of the patches had landed, but we can skim through and pick them up.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] glx/apple: include util/debug.h for env_var_as_boolean prototype

2018-02-02 Thread Emil Velikov
On 2 February 2018 at 10:16, Eric Engestrom  wrote:
> On Sunday, 2018-01-28 14:18:27 +, Jon Turney wrote:
>> mesa/src/glx/glxcmds.c:1295:21: error: implicit declaration of function 
>> 'env_var_as_boolean' is invalid in C99 
>> [-Werror,-Wimplicit-function-declaration]
>> mesa/src/glx/apple/apple_visual.c:85:28: error: implicit declaration of 
>> function 'env_var_as_boolean' is invalid in C99 
>> [-Werror,-Wimplicit-function-declaration]
>
> Oops, these are mine :(
>
> Fixes: 3fdbc46b42dcfd3af52d8 "glx: turn LIBGL_DUMP_VISUALID into a boolean"
> Fixes: d2768a397d6fb8a094765 "glx: turn LIBGL_PROFILE_CORE into a boolean"
> Fixes: 5c68ea29f31283768c8e1 "egl+glx: turn LIBGL_ALWAYS_SOFTWARE into a 
> boolean"
> Fixes: 43e2d58698f76b96b36ff "glx: turn LIBGL_ALLOW_SOFTWARE into a boolean"
> Reviewed-by: Eric Engestrom 
>
>> ---
>>  src/glx/apple/apple_visual.c | 1 +
>>  src/glx/glxcmds.c| 1 +
>>  2 files changed, 2 insertions(+)
>>
>> diff --git a/src/glx/apple/apple_visual.c b/src/glx/apple/apple_visual.c
>> index d482bfc4e71..4a90d77c3a5 100644
>> --- a/src/glx/apple/apple_visual.c
>> +++ b/src/glx/apple/apple_visual.c
>> @@ -32,6 +32,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>
> Nit: use "" instead of <> for local includes :)
>
>>
>>  /*  */
>>  #define glTexImage1D glTexImage1D_OSX
>> diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
>> index eee45d962d7..943b81754f3 100644
>> --- a/src/glx/glxcmds.c
>> +++ b/src/glx/glxcmds.c
>> @@ -43,6 +43,7 @@
>>  #ifdef GLX_USE_APPLEGL
>>  #include "apple/apple_glx_context.h"
>>  #include "apple/apple_glx.h"
>> +#include "util/debug.h"
>>  #else
>>  #include 
>>  #ifdef XF86VIDMODE
>> --
>> 2.15.1
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH (resend)] r600/sb: Don't require array declarations for TGSI_FILE_SYSTEM_VALUE

2018-02-02 Thread Ilia Mirkin
On Fri, Feb 2, 2018 at 4:07 AM, Gert Wollny  wrote:
> Am Freitag, den 02.02.2018, 09:04 +0100 schrieb Roland Scheidegger:
>>
>>
>> Yes, the _GL spec_ says it is an array.
>> But in gallium it can't be. Therefore I think it's incorrect if we
>> end up with array accesses there (albeit I was too lazy to actually
>> look at the tgsi, but I'm pretty sure it isn't declard as an array).
> the TGSI for the relevant shader in the piglit looks like this:
>
> FRAG
> DCL SV[0], SAMPLEMASK
> DCL OUT[0], COLOR
> DCL CONST[0][0]
> DCL TEMP[0..1], LOCAL
> DCL ADDR[0]
> IMM[0] FLT32 {1., 0., 0., 0.}
> IMM[1] INT32 {1, 0, 0, 0}
>   0: MOV TEMP[0], IMM[0].xyyx
>   1: UARL ADDR[0].x, CONST[0][0].
>   2: USEQ TEMP[1].x, SV[ADDR[0].x]., IMM[1].

OK, this is a big problem. I'm guessing the GLSL code was something like

gl_SampleMaskIn[uniform]

What this got translated into was an indirect access into the *global
implicit array of system values*. We don't want that. glsl_to_tgsi
should just be dropping the indirect access entirely.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: don't do stack workarounds for hemlock

2018-02-02 Thread Emil Velikov
Hi Roland,

On 30 January 2018 at 05:07,   wrote:
> From: Roland Scheidegger 
>
> By the looks of it it seems hemlock is treated separately to cypress, but
> certainly it won't need the stack workarounds cedar/redwood (and
> seemingly every other eg chip except cypress/juniper) need.
> (Discovered by accident.)
> ---
Is it worth picking this for stable branches?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/8] mesa: Track position/generic0 aliasing in the VAO.

2018-02-02 Thread Emil Velikov
On 1 February 2018 at 07:32,   wrote:
> From: Mathias Fröhlich 
>
> Since the first material attribute no longer aliases with
> the generic0 attribute, only aliasing between generic0 and
> position is left and entirely dependent on the enabled
> state of the VAO. So introduce a gl_attribute_map_mode
> in the VAO that is used to track how the position
> and the generic 0 attribute alias.
> Provide a static const array that can be used to
> map from vertex program input indices to VERT_ATTRIB_*
> indices. The outer dimension of the array is meant to
> be indexed directly by the new VAO member variable.
> Also provide methods on the VAO to convert bitmasks of
> VERT_BIT's from the VAO numbering to the vertex processing
> inputs numbering.
>
> v2: s,unsigned char,GLubyte,g
> s,_ATTRIBUTE_MAP_MODE_MAX,ATTRIBUTE_MAP_MODE_MAX,g
> Change comment style, add comments.
>
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/main/arrayobj.c | 131 
> +++
>  src/mesa/main/arrayobj.h |  74 ++
>  src/mesa/main/enable.c   |   5 ++
>  src/mesa/main/mtypes.h   |  18 +++
>  src/mesa/main/varray.c   |  18 +--
>  5 files changed, 242 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
> index 7208f4c534..360d097ec1 100644
> --- a/src/mesa/main/arrayobj.c
> +++ b/src/mesa/main/arrayobj.c
> @@ -54,6 +54,135 @@
>  #include "util/bitscan.h"
>
>
> +const GLubyte
> +_mesa_vao_attribute_map[ATTRIBUTE_MAP_MODE_MAX][VERT_ATTRIB_MAX] =
> +{
> +   /* ATTRIBUTE_MAP_MODE_IDENTITY
> +*
> +* Grab vertex processing attribute VERT_ATTRIB_POS from
> +* the VAO attribute VERT_ATTRIB_POS, and grab vertex processing
> +* attribute VERT_ATTRIB_GENERIC0 from the VAO attribute
> +* VERT_ATTRIB_GENERIC0.
> +*/
> +   {
> +  VERT_ATTRIB_POS, /* VERT_ATTRIB_POS */
Feel free to use C99 designated initializers. All supported compilers
understand them.
Even MSVC 2013 Update 4 ;-)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] mesa: Put materials at the end of the generic block.

2018-02-02 Thread Emil Velikov
Hi Mathias,

I've noticed you pushed this already. Just sharing some fly-by idea.

On 1 February 2018 at 07:32,   wrote:
> From: Mathias Fröhlich 
>
> The materials are now moved to the end of the
> generic attributes block to the range 4-15.
>
> Before, the way the position and generic 0 attribute
> is handled was dependent on the presence and kind of
> the currently attached vertex program. With this
> change the way the position attribute and the generic 0
> attribute is treated only depends on the enabled
> flag of those two arrays.
> This will later help to untangle the update dependencies
> between enabled arrays and shader inputs.
>
> v2: s,VERT_ATTRIB_MAT_OFFSET,VERT_ATTRIB_MAT0,g
>
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/compiler/shader_enums.h   |  7 ++-
>  src/mesa/tnl/t_context.h  |  4 ++--
>  src/mesa/vbo/vbo_exec_array.c | 14 +++---
>  src/mesa/vbo/vbo_exec_draw.c  | 10 +-
>  src/mesa/vbo/vbo_save_draw.c  |  8 
>  5 files changed, 24 insertions(+), 19 deletions(-)
>
> diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
> index aa296adb5a..fb78ad384c 100644
> --- a/src/compiler/shader_enums.h
> +++ b/src/compiler/shader_enums.h
> @@ -127,6 +127,8 @@ const char *gl_vert_attrib_name(gl_vert_attrib attrib);
>   * VERT_ATTRIB_MAT
>   *   include the generic shader attributes used to alias
>   *   varying material values for the TNL shader programs.
> + *   They are located at the end of the generic attribute
> + *   block not to overlap with the generic 0 attribute.
>   */
>  #define VERT_ATTRIB_FF(i)   (VERT_ATTRIB_POS + (i))
>  #define VERT_ATTRIB_FF_MAX  VERT_ATTRIB_GENERIC0
> @@ -137,7 +139,10 @@ const char *gl_vert_attrib_name(gl_vert_attrib attrib);
>  #define VERT_ATTRIB_GENERIC(i)  (VERT_ATTRIB_GENERIC0 + (i))
>  #define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS
>
> -#define VERT_ATTRIB_MAT(i)  VERT_ATTRIB_GENERIC(i)
> +#define VERT_ATTRIB_MAT0\
> +   (VERT_ATTRIB_GENERIC_MAX - VERT_ATTRIB_MAT_MAX)
> +#define VERT_ATTRIB_MAT(i)  \
> +   VERT_ATTRIB_GENERIC((i) + VERT_ATTRIB_MAT0)
>  #define VERT_ATTRIB_MAT_MAX MAT_ATTRIB_MAX
>
>  /**
> diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
> index 48d7ced791..082110c607 100644
> --- a/src/mesa/tnl/t_context.h
> +++ b/src/mesa/tnl/t_context.h
> @@ -158,8 +158,8 @@ enum {
>  #define _TNL_FIRST_GENERIC _TNL_ATTRIB_GENERIC0
>  #define _TNL_LAST_GENERIC  _TNL_ATTRIB_GENERIC15
>
> -#define _TNL_FIRST_MAT   _TNL_ATTRIB_MAT_FRONT_AMBIENT /* GENERIC0 */
> -#define _TNL_LAST_MAT_TNL_ATTRIB_MAT_BACK_INDEXES  /* GENERIC11 */
> +#define _TNL_FIRST_MAT   _TNL_ATTRIB_MAT_FRONT_AMBIENT /* GENERIC4 */
> +#define _TNL_LAST_MAT_TNL_ATTRIB_MAT_BACK_INDEXES  /* GENERIC15 */
>
>  /* Number of available texture attributes */
>  #define _TNL_NUM_TEX 8
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 412b6b669c..1e4c56de9d 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -335,20 +335,20 @@ recalculate_input_bindings(struct gl_context *ctx)
>   }
>}
>
> -  for (i = 0; i < VERT_ATTRIB_MAT_MAX; i++) {
> - inputs[VERT_ATTRIB_MAT(i)] =
> ->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT + i];
> - const_inputs |= VERT_BIT_MAT(i);
> -  }
> -
>/* Could use just about anything, just to fill in the empty
> * slots:
> */
> -  for (i = VERT_ATTRIB_MAT_MAX; i < VERT_ATTRIB_GENERIC_MAX; i++) {
> +  for (i = 0; i < VERT_ATTRIB_MAT0; i++) {
>   inputs[VERT_ATTRIB_GENERIC(i)] =
>  >currval[VBO_ATTRIB_GENERIC0 + i];
>   const_inputs |= VERT_BIT_GENERIC(i);
>}
> +
> +  for (i = 0; i < VERT_ATTRIB_MAT_MAX; i++) {
> + inputs[VERT_ATTRIB_MAT(i)] =
> +>currval[VBO_ATTRIB_MAT_FRONT_AMBIENT + i];
> + const_inputs |= VERT_BIT_MAT(i);
> +  }
>break;
>
> case VP_SHADER:
> diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
> index 2b7784694f..bd82825b51 100644
> --- a/src/mesa/vbo/vbo_exec_draw.c
> +++ b/src/mesa/vbo/vbo_exec_draw.c
> @@ -187,16 +187,16 @@ vbo_exec_bind_arrays(struct gl_context *ctx)
> /* Overlay other active attributes */
> switch (get_vp_mode(exec->ctx)) {
> case VP_FF:
> +  for (attr = 0; attr < VERT_ATTRIB_MAT0; attr++) {
> + assert(VERT_ATTRIB_GENERIC(attr) < ARRAY_SIZE(exec->vtx.inputs));
> + exec->vtx.inputs[VERT_ATTRIB_GENERIC(attr)] =
> +>currval[VBO_ATTRIB_GENERIC0+attr];
> +  }
>for (attr = 0; attr < VERT_ATTRIB_MAT_MAX; attr++) {
>   assert(VERT_ATTRIB_MAT(attr) < ARRAY_SIZE(exec->vtx.inputs));
>   exec->vtx.inputs[VERT_ATTRIB_MAT(attr)] =
>  >currval[VBO_ATTRIB_MAT_FRONT_AMBIENT+attr];
>

Re: [Mesa-dev] [PATCH] i965: check if compare is 0 explicitely, when downsizing a format

2018-02-02 Thread Alejandro Piñeiro
Reviewed-by: Alejandro Piñeiro 

On 01/02/18 16:03, Andres Gomez wrote:
> downsize_format_if_needed takes an integer as number of uploads
> parameter. Hence, let's do an integer comparation instead of a boolean
> check, since that is confusing.
>
> Since we are at it, fix a couple of wrongly tabbed indents.
>
> Cc: Alejandro Piñeiro 
> Cc: Kenneth Graunke 
> Signed-off-by: Andres Gomez 
> ---
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index a39a254dacd..45636fe69df 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -365,7 +365,7 @@ is_passthru_format(uint32_t format)
>  
>  UNUSED static int
>  uploads_needed(uint32_t format,
> -bool is_dual_slot)
> +   bool is_dual_slot)
>  {
> if (!is_passthru_format(format))
>return 1;
> @@ -409,14 +409,14 @@ downsize_format_if_needed(uint32_t format,
>  */
> switch (format) {
> case ISL_FORMAT_R64_PASSTHRU:
> -  return !upload ? ISL_FORMAT_R32G32_FLOAT
> - : ISL_FORMAT_R32_FLOAT;
> +  return upload == 0 ? ISL_FORMAT_R32G32_FLOAT
> + : ISL_FORMAT_R32_FLOAT;
> case ISL_FORMAT_R64G64_PASSTHRU:
> -  return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
> - : ISL_FORMAT_R32_FLOAT;
> +  return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
> + : ISL_FORMAT_R32_FLOAT;
> case ISL_FORMAT_R64G64B64_PASSTHRU:
> -  return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
> - : ISL_FORMAT_R32G32_FLOAT;
> +  return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
> + : ISL_FORMAT_R32G32_FLOAT;
> case ISL_FORMAT_R64G64B64A64_PASSTHRU:
>return ISL_FORMAT_R32G32B32A32_FLOAT;
> default:
> @@ -635,7 +635,7 @@ genX(emit_vertices)(struct brw_context *brw)
>uint32_t comp2 = VFCOMP_STORE_SRC;
>uint32_t comp3 = VFCOMP_STORE_SRC;
>const unsigned num_uploads = GEN_GEN < 8 ?
> -  uploads_needed(format, input->is_dual_slot) : 1;
> + uploads_needed(format, input->is_dual_slot) : 1;
>  
>  #if GEN_GEN >= 8
>/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >