[Mesa-dev] [RFC PATCH] st/mesa: skip texture validation logic when nothing has changed

2017-06-09 Thread Timothy Arceri
Based on the same logic in the i965 driver 2f225f61451abd51 and
16060c5adcd4.

perf reports st_finalize_texture() going from 0.60% -> 0.16% with
this change when running the Xonotic benchmark from PTS.
---

 A full run of piglit on radeonsi produced no regressions. No other drivers
 have been tested.

 src/mesa/state_tracker/st_cb_texture.c | 28 
 src/mesa/state_tracker/st_manager.c|  2 ++
 src/mesa/state_tracker/st_texture.h|  9 +
 3 files changed, 39 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 99c59f7..443bb7b 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -147,20 +147,22 @@ st_DeleteTextureImage(struct gl_context * ctx, struct 
gl_texture_image *img)
 
 /** called via ctx->Driver.NewTextureObject() */
 static struct gl_texture_object *
 st_NewTextureObject(struct gl_context * ctx, GLuint name, GLenum target)
 {
struct st_texture_object *obj = ST_CALLOC_STRUCT(st_texture_object);
 
DBG("%s\n", __func__);
_mesa_initialize_texture_object(ctx, >base, name, target);
 
+   obj->needs_validation = true;
+
return >base;
 }
 
 /** called via ctx->Driver.DeleteTextureObject() */
 static void 
 st_DeleteTextureObject(struct gl_context *ctx,
struct gl_texture_object *texObj)
 {
struct st_context *st = st_context(ctx);
struct st_texture_object *stObj = st_texture_object(texObj);
@@ -599,20 +601,22 @@ st_AllocTextureImageBuffer(struct gl_context *ctx,
struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
const GLuint level = texImage->Level;
GLuint width = texImage->Width;
GLuint height = texImage->Height;
GLuint depth = texImage->Depth;
 
DBG("%s\n", __func__);
 
assert(!stImage->pt); /* xxx this might be wrong */
 
+   stObj->needs_validation = true;
+
etc_fallback_allocate(st, stImage);
 
/* Look if the parent texture object has space for this image */
if (stObj->pt &&
level <= stObj->pt->last_level &&
st_texture_match_image(st, stObj->pt, texImage)) {
   /* this image will fit in the existing texture object's memory */
   pipe_resource_reference(>pt, stObj->pt);
   return GL_TRUE;
}
@@ -2478,20 +2482,30 @@ st_finalize_texture(struct gl_context *ctx,
  pipe_resource_reference(>pt, st_obj->buffer);
  st_texture_release_all_sampler_views(st, stObj);
   }
   return GL_TRUE;
 
}
 
firstImage = 
st_texture_image_const(stObj->base.Image[cubeMapFace][stObj->base.BaseLevel]);
assert(firstImage);
 
+   /* Skip the loop over images in the common case of no images having
+* changed.  But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we
+* haven't looked at, then we do need to look at those new images.
+*/
+   if (!stObj->needs_validation &&
+   stObj->base.BaseLevel >= stObj->validated_first_level &&
+   stObj->lastLevel <= stObj->validated_last_level) {
+  return GL_TRUE;
+   }
+
/* If both firstImage and stObj point to a texture which can contain
 * all active images, favour firstImage.  Note that because of the
 * completeness requirement, we know that the image dimensions
 * will match.
 */
if (firstImage->pt &&
firstImage->pt != stObj->pt &&
(!stObj->pt || firstImage->pt->last_level >= stObj->pt->last_level)) {
   pipe_resource_reference(>pt, firstImage->pt);
   st_texture_release_all_sampler_views(st, stObj);
@@ -2624,20 +2638,24 @@ st_finalize_texture(struct gl_context *ctx,
 (stImage->base.Width == u_minify(ptWidth, level) &&
  stImage->base.Height == height &&
  stImage->base.Depth == depth)) {
/* src image fits expected dest mipmap level size */
copy_image_data_to_texture(st, stObj, level, stImage);
 }
  }
   }
}
 
+   stObj->validated_first_level = stObj->base.BaseLevel;
+   stObj->validated_last_level = stObj->lastLevel;
+   stObj->needs_validation = false;
+
return GL_TRUE;
 }
 
 
 /**
  * Called via ctx->Driver.AllocTextureStorage() to allocate texture memory
  * for a whole mipmap stack.
  */
 static GLboolean
 st_AllocTextureStorage(struct gl_context *ctx,
@@ -2705,20 +2723,25 @@ st_AllocTextureStorage(struct gl_context *ctx,
   GLuint face;
   for (face = 0; face < numFaces; face++) {
  struct st_texture_image *stImage =
 st_texture_image(texObj->Image[face][level]);
  pipe_resource_reference(>pt, stObj->pt);
 
  etc_fallback_allocate(st, stImage);
   }
}
 
+   /* The texture is in a validated state, so no need to check later. */
+   stObj->needs_validation = false;
+   stObj->validated_first_level = 0;
+   stObj->validated_last_level = levels - 1;
+
return GL_TRUE;
 }
 
 
 static GLboolean
 st_TestProxyTexImage(struct 

Re: [Mesa-dev] [PATCH v5] egl/android: support for EGL_KHR_partial_update

2017-06-09 Thread Harish Krupo

Hi Eric,

On 06/09/2017 08:59 PM, Eric Engestrom wrote:

On Friday, 2017-06-09 20:13:34 +0530, Harish Krupo wrote:

This patch adds support for the EGL_KHR_partial_update extension for
android platform. It passes 36/37 tests in dEQP for EGL_KHR_partial_update.
1 test not supported.

v2: add fallback for eglSetDamageRegionKHR (Tapani)

v3: The native_window_set_surface_damage call is available only from
 Android version 6.0. Reintroduce the ANDROID_VERSION guard and
 advertise extension only if version is >= 6.0. (Emil Velikov)

v4: use newly introduced ANDROID_API_LEVEL guard rather than
 ANDROID_VERSION guard to advertise the extension.The extension
 is advertised only if ANDROID_API_LEVEL >= 23 (Android 6.0 or
 greater). Add fallback function for platforms other than Android.
 Fix possible math overflow. (Emil Velikov)
 Return immediately when n_rects is 0. Place function's entrypoint
 in alphabetical order. (Eric Engestrom)

v5: Replace unnecessary calloc with malloc (Eric)
 Check for BAD_ALLOC error (Emil)
 Check for error in native_window_set_damage_region. (Emil, Tapani,
 Eric).

Signed-off-by: Harish Krupo 
Reviewed-by: Emil Velikov 
Reviewed-by: Eric Engestrom 
Reviewed-by: Tapani Pälli 


Looks good to me.
Do you want me to push this for you?



Yeah, sure! :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: unmap the stream_uploader buffer before drawing

2017-06-09 Thread Brian Paul
Some drivers require that the vertex buffers be unmapped prior to
drawing.  This change unmaps the stream_uploader buffer after we've
uploaded the zero-stride attributes (unless the driver supports
rendering with mapped buffers).

This fixes a regression in the VMware driver since 17f776c27be266f2.
Some Mesa demos such as mandelbrot and brick would display black
quads instead of the expected rendering.

--

Marek: can you please verify that this is the right place for this
call (and not in st_draw_vbo())?
---
 src/mesa/state_tracker/st_atom_array.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index c7467eb..6af1355 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -670,6 +670,10 @@ setup_non_interleaved_attribs(struct st_context *st,
 array->Size, array->Doubles, );
}
 
+   if (!ctx->Const.AllowMappedBuffersDuringExecution) {
+  u_upload_unmap(st->pipe->stream_uploader);
+   }
+
set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
 
/* Unreference uploaded zero-stride vertex buffers. */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/util: whitespace, formatting fixes in u_upload_mgr.c

2017-06-09 Thread Brian Paul
---
 src/gallium/auxiliary/util/u_upload_mgr.c | 54 +--
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c 
b/src/gallium/auxiliary/util/u_upload_mgr.c
index 9528495..4bb14d6 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -1,8 +1,8 @@
 /**
- * 
+ *
  * Copyright 2009 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **/
 
 /* Helper utility for uploading user buffers & other data, and
@@ -59,7 +59,7 @@ struct u_upload_mgr *
 u_upload_create(struct pipe_context *pipe, unsigned default_size,
 unsigned bind, enum pipe_resource_usage usage)
 {
-   struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr );
+   struct u_upload_mgr *upload = CALLOC_STRUCT(u_upload_mgr);
if (!upload)
   return NULL;
 
@@ -104,7 +104,8 @@ u_upload_clone(struct pipe_context *pipe, struct 
u_upload_mgr *upload)
   upload->usage);
 }
 
-static void upload_unmap_internal(struct u_upload_mgr *upload, boolean 
destroying)
+static void
+upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying)
 {
if (!destroying && upload->map_persistent)
   return;
@@ -124,30 +125,32 @@ static void upload_unmap_internal(struct u_upload_mgr 
*upload, boolean destroyin
 }
 
 
-void u_upload_unmap( struct u_upload_mgr *upload )
+void
+u_upload_unmap(struct u_upload_mgr *upload)
 {
upload_unmap_internal(upload, FALSE);
 }
 
 
-static void u_upload_release_buffer(struct u_upload_mgr *upload)
+static void
+u_upload_release_buffer(struct u_upload_mgr *upload)
 {
/* Unmap and unreference the upload buffer. */
upload_unmap_internal(upload, TRUE);
-   pipe_resource_reference( >buffer, NULL );
+   pipe_resource_reference(>buffer, NULL);
 }
 
 
-void u_upload_destroy( struct u_upload_mgr *upload )
+void
+u_upload_destroy(struct u_upload_mgr *upload)
 {
-   u_upload_release_buffer( upload );
-   FREE( upload );
+   u_upload_release_buffer(upload);
+   FREE(upload);
 }
 
 
 static void
-u_upload_alloc_buffer(struct u_upload_mgr *upload,
-  unsigned min_size)
+u_upload_alloc_buffer(struct u_upload_mgr *upload, unsigned min_size)
 {
struct pipe_screen *screen = upload->pipe->screen;
struct pipe_resource buffer;
@@ -155,9 +158,9 @@ u_upload_alloc_buffer(struct u_upload_mgr *upload,
 
/* Release the old buffer, if present:
 */
-   u_upload_release_buffer( upload );
+   u_upload_release_buffer(upload);
 
-   /* Allocate a new one: 
+   /* Allocate a new one:
 */
size = align(MAX2(upload->default_size, min_size), 4096);
 
@@ -232,7 +235,7 @@ u_upload_alloc(struct u_upload_mgr *upload,
   offset,
   buffer_size - offset,
   upload->map_flags,
- >transfer);
+  >transfer);
   if (unlikely(!upload->map)) {
  upload->transfer = NULL;
  *out_offset = ~0;
@@ -256,13 +259,14 @@ u_upload_alloc(struct u_upload_mgr *upload,
upload->offset = offset + size;
 }
 
-void u_upload_data(struct u_upload_mgr *upload,
-   unsigned min_out_offset,
-   unsigned size,
-   unsigned alignment,
-   const void *data,
-   unsigned *out_offset,
-   struct pipe_resource **outbuf)
+void
+u_upload_data(struct u_upload_mgr *upload,
+  unsigned min_out_offset,
+  unsigned size,
+  unsigned alignment,
+  const void *data,
+  unsigned *out_offset,
+  struct pipe_resource **outbuf)
 {

Re: [Mesa-dev] [PATCH 09/17] ac: add i32_0 convenience member to ac_llvm_context

2017-06-09 Thread Connor Abbott
Sure.

On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen
 wrote:
> Merge this with patch 14?
>
> On Sat, Jun 10, 2017 at 1:47 AM, Connor Abbott
>  wrote:
>> From: Connor Abbott 
>>
>> To match si_shader_context.
>>
>> Signed-off-by: Connor Abbott 
>> ---
>>  src/amd/common/ac_llvm_build.c | 2 ++
>>  src/amd/common/ac_llvm_build.h | 2 ++
>>  2 files changed, 4 insertions(+)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
>> index 237e929..c411880 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -62,6 +62,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
>> LLVMContextRef context)
>> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>> ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
>>
>> +ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
>> +
>> ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
>>  "range", 5);
>>
>> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
>> index ebb78fb..bde0aa8 100644
>> --- a/src/amd/common/ac_llvm_build.h
>> +++ b/src/amd/common/ac_llvm_build.h
>> @@ -46,6 +46,8 @@ struct ac_llvm_context {
>> LLVMTypeRef v4f32;
>> LLVMTypeRef v16i8;
>>
>> +LLVMValueRef i32_0;
>> +
>> unsigned range_md_kind;
>> unsigned invariant_load_md_kind;
>> unsigned uniform_md_kind;
>> --
>> 2.9.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-06-09 Thread Connor Abbott
On Fri, Jun 9, 2017 at 5:04 PM, Bas Nieuwenhuizen
 wrote:
> On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
>  wrote:
>> From: Connor Abbott 
>>
>> Signed-off-by: Connor Abbott 
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 75 
>> +
>>  src/amd/vulkan/radv_device.c|  8 +
>>  src/amd/vulkan/radv_pipeline.c  |  2 ++
>>  3 files changed, 85 insertions(+)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>> b/src/amd/common/ac_nir_to_llvm.c
>> index 5bbd1c5..111e575 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct 
>> nir_to_llvm_context *ctx,
>> case nir_intrinsic_load_patch_vertices_in:
>> result = LLVMConstInt(ctx->i32, 
>> ctx->options->key.tcs.input_vertices, false);
>> break;
>> +   case nir_intrinsic_ballot:
>> +   result = ac_build_ballot(>ac, get_src(ctx, 
>> instr->src[0]));
>> +   break;
>> +   case nir_intrinsic_read_first_invocation: {
>> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +   ac_build_optimization_barrier(>ac, );
>> +   LLVMValueRef srcs[1] = { src0 };
>> +   result = ac_build_intrinsic(>ac, 
>> "llvm.amdgcn.readfirstlane",
>> +   ctx->i32, srcs, 1,
>> +   AC_FUNC_ATTR_NOUNWIND |
>> +   AC_FUNC_ATTR_READNONE |
>> +   AC_FUNC_ATTR_CONVERGENT);
>> +   break;
>> +}
>> +   case nir_intrinsic_read_invocation: {
>> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +   ac_build_optimization_barrier(>ac, );
>> +   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
>> +   result = ac_build_intrinsic(>ac, "llvm.amdgcn.readlane",
>> +   ctx->i32, srcs, 2,
>> +   AC_FUNC_ATTR_NOUNWIND |
>> +   AC_FUNC_ATTR_READNONE |
>> +   AC_FUNC_ATTR_CONVERGENT);
>> +   break;
>> +}
>> +   case nir_intrinsic_load_subgroup_invocation:
>> +   result = ac_get_thread_id(>ac);
>> +   break;
>> +   case nir_intrinsic_load_subgroup_size:
>> +   result = LLVMConstInt(ctx->i32, 64, 0);
>> +   break;
>> +   case nir_intrinsic_all_invocations:
>> +   result = LLVMBuildSExt(ctx->builder,
>> +  ac_build_vote_all(>ac,
>> +get_src(ctx, 
>> instr->src[0])),
>> +  ctx->i32, "");
>
> How well does LLVM optimize this? I've always found the boolean as
> int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
> whether LLVM is able to optimize the SExt away or if a select might be
> better.

From looking at the shader dump of my test, LLVM seems to be able to
optimize it away. In fact, it's what radeonsi uses for all their
comparisons (since TGSI also uses -1 for true), so I'd expect it to be
at least as good as a select; it might be better.

It might be interesting to make booleans have a bit-size of 1, like in
LLVM... it would probably require a lot of churn, though.

>
>
>> +   break;
>> +   case nir_intrinsic_any_invocations:
>> +   result = LLVMBuildSExt(ctx->builder,
>> +  ac_build_vote_any(>ac,
>> +get_src(ctx, 
>> instr->src[0])),
>> +  ctx->i32, "");
>> +   break;
>> +   case nir_intrinsic_all_invocations_equal:
>> +   result = LLVMBuildSExt(ctx->builder,
>> +  ac_build_vote_eq(>ac,
>> +get_src(ctx, 
>> instr->src[0])),
>> +  ctx->i32, "");
>> +   break;
>> +   case nir_intrinsic_load_subgroup_eq_mask: {
>> +   LLVMValueRef id = ac_get_thread_id(>ac);
>> +   id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
>> +   result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 
>> 1, 0), id, "");
>> +   break;
>> +   }
>> +   case nir_intrinsic_load_subgroup_ge_mask:
>> +   case nir_intrinsic_load_subgroup_gt_mask:
>> +   case nir_intrinsic_load_subgroup_le_mask:
>> +   case nir_intrinsic_load_subgroup_lt_mask: {
>> +   LLVMValueRef id = ac_get_thread_id(>ac);
>> +   if (instr->intrinsic == 

Re: [Mesa-dev] [PATCH 09/17] ac: add i32_0 convenience member to ac_llvm_context

2017-06-09 Thread Bas Nieuwenhuizen
Merge this with patch 14?

On Sat, Jun 10, 2017 at 1:47 AM, Connor Abbott
 wrote:
> From: Connor Abbott 
>
> To match si_shader_context.
>
> Signed-off-by: Connor Abbott 
> ---
>  src/amd/common/ac_llvm_build.c | 2 ++
>  src/amd/common/ac_llvm_build.h | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 237e929..c411880 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -62,6 +62,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
> LLVMContextRef context)
> ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
> ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
>
> +ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
> +
> ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
>  "range", 5);
>
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index ebb78fb..bde0aa8 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -46,6 +46,8 @@ struct ac_llvm_context {
> LLVMTypeRef v4f32;
> LLVMTypeRef v16i8;
>
> +LLVMValueRef i32_0;
> +
> unsigned range_md_kind;
> unsigned invariant_load_md_kind;
> unsigned uniform_md_kind;
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-06-09 Thread Bas Nieuwenhuizen
On Sat, Jun 10, 2017 at 1:50 AM, Connor Abbott
 wrote:
> From: Connor Abbott 
>
> Signed-off-by: Connor Abbott 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 75 
> +
>  src/amd/vulkan/radv_device.c|  8 +
>  src/amd/vulkan/radv_pipeline.c  |  2 ++
>  3 files changed, 85 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 5bbd1c5..111e575 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context 
> *ctx,
> case nir_intrinsic_load_patch_vertices_in:
> result = LLVMConstInt(ctx->i32, 
> ctx->options->key.tcs.input_vertices, false);
> break;
> +   case nir_intrinsic_ballot:
> +   result = ac_build_ballot(>ac, get_src(ctx, 
> instr->src[0]));
> +   break;
> +   case nir_intrinsic_read_first_invocation: {
> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> +   ac_build_optimization_barrier(>ac, );
> +   LLVMValueRef srcs[1] = { src0 };
> +   result = ac_build_intrinsic(>ac, 
> "llvm.amdgcn.readfirstlane",
> +   ctx->i32, srcs, 1,
> +   AC_FUNC_ATTR_NOUNWIND |
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> +   break;
> +}
> +   case nir_intrinsic_read_invocation: {
> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> +   ac_build_optimization_barrier(>ac, );
> +   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
> +   result = ac_build_intrinsic(>ac, "llvm.amdgcn.readlane",
> +   ctx->i32, srcs, 2,
> +   AC_FUNC_ATTR_NOUNWIND |
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> +   break;
> +}
> +   case nir_intrinsic_load_subgroup_invocation:
> +   result = ac_get_thread_id(>ac);
> +   break;
> +   case nir_intrinsic_load_subgroup_size:
> +   result = LLVMConstInt(ctx->i32, 64, 0);
> +   break;
> +   case nir_intrinsic_all_invocations:
> +   result = LLVMBuildSExt(ctx->builder,
> +  ac_build_vote_all(>ac,
> +get_src(ctx, 
> instr->src[0])),
> +  ctx->i32, "");

How well does LLVM optimize this? I've always found the boolean as
int32 with -1 and 0 an awkward mapping to LLVM, and am wondering
whether LLVM is able to optimize the SExt away or if a select might be
better.


> +   break;
> +   case nir_intrinsic_any_invocations:
> +   result = LLVMBuildSExt(ctx->builder,
> +  ac_build_vote_any(>ac,
> +get_src(ctx, 
> instr->src[0])),
> +  ctx->i32, "");
> +   break;
> +   case nir_intrinsic_all_invocations_equal:
> +   result = LLVMBuildSExt(ctx->builder,
> +  ac_build_vote_eq(>ac,
> +get_src(ctx, 
> instr->src[0])),
> +  ctx->i32, "");
> +   break;
> +   case nir_intrinsic_load_subgroup_eq_mask: {
> +   LLVMValueRef id = ac_get_thread_id(>ac);
> +   id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
> +   result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 
> 0), id, "");
> +   break;
> +   }
> +   case nir_intrinsic_load_subgroup_ge_mask:
> +   case nir_intrinsic_load_subgroup_gt_mask:
> +   case nir_intrinsic_load_subgroup_le_mask:
> +   case nir_intrinsic_load_subgroup_lt_mask: {
> +   LLVMValueRef id = ac_get_thread_id(>ac);
> +   if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
> +   instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
> +   /* All bits set except LSB */
> +   result = LLVMConstInt(ctx->i64, -2, 0);
> +   } else {
> +   /* All bits set */
> +   result = LLVMConstInt(ctx->i64, -1, 0);
> +   }
> +   id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
> +   result = LLVMBuildShl(ctx->builder, result, id, "");
> +   if (instr->intrinsic == 

[Mesa-dev] [PATCH 16/17] ac: enable the AMDGPU asm parser

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Ported from radeonsi. Needed for emitting optimization barriers, which
contain inline asm.

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_util.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index e20456e..0b795ef 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -40,6 +40,10 @@ static void ac_init_llvm_target()
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
 
+
+   /* For inline assembly. */
+   LLVMInitializeAMDGPUAsmParser();
+
/*
 * Workaround for bug in llvm 4.0 that causes image intrinsics
 * to disappear.
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/17] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_nir_to_llvm.c | 75 +
 src/amd/vulkan/radv_device.c|  8 +
 src/amd/vulkan/radv_pipeline.c  |  2 ++
 3 files changed, 85 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5bbd1c5..111e575 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4069,6 +4069,81 @@ static void visit_intrinsic(struct nir_to_llvm_context 
*ctx,
case nir_intrinsic_load_patch_vertices_in:
result = LLVMConstInt(ctx->i32, 
ctx->options->key.tcs.input_vertices, false);
break;
+   case nir_intrinsic_ballot:
+   result = ac_build_ballot(>ac, get_src(ctx, instr->src[0]));
+   break;
+   case nir_intrinsic_read_first_invocation: {
+   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+   ac_build_optimization_barrier(>ac, );
+   LLVMValueRef srcs[1] = { src0 };
+   result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.readfirstlane",
+   ctx->i32, srcs, 1,
+   AC_FUNC_ATTR_NOUNWIND |
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+   break;
+}
+   case nir_intrinsic_read_invocation: {
+   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+   ac_build_optimization_barrier(>ac, );
+   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
+   result = ac_build_intrinsic(>ac, "llvm.amdgcn.readlane",
+   ctx->i32, srcs, 2,
+   AC_FUNC_ATTR_NOUNWIND |
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+   break;
+}
+   case nir_intrinsic_load_subgroup_invocation:
+   result = ac_get_thread_id(>ac);
+   break;
+   case nir_intrinsic_load_subgroup_size:
+   result = LLVMConstInt(ctx->i32, 64, 0);
+   break;
+   case nir_intrinsic_all_invocations:
+   result = LLVMBuildSExt(ctx->builder,
+  ac_build_vote_all(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->i32, "");
+   break;
+   case nir_intrinsic_any_invocations:
+   result = LLVMBuildSExt(ctx->builder,
+  ac_build_vote_any(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->i32, "");
+   break;
+   case nir_intrinsic_all_invocations_equal:
+   result = LLVMBuildSExt(ctx->builder,
+  ac_build_vote_eq(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->i32, "");
+   break;
+   case nir_intrinsic_load_subgroup_eq_mask: {
+   LLVMValueRef id = ac_get_thread_id(>ac);
+   id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
+   result = LLVMBuildShl(ctx->builder, LLVMConstInt(ctx->i64, 1, 
0), id, "");
+   break;
+   }
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask: {
+   LLVMValueRef id = ac_get_thread_id(>ac);
+   if (instr->intrinsic == nir_intrinsic_load_subgroup_gt_mask ||
+   instr->intrinsic == nir_intrinsic_load_subgroup_le_mask) {
+   /* All bits set except LSB */
+   result = LLVMConstInt(ctx->i64, -2, 0);
+   } else {
+   /* All bits set */
+   result = LLVMConstInt(ctx->i64, -1, 0);
+   }
+   id = LLVMBuildZExt(ctx->builder, id, ctx->i64, "");
+   result = LLVMBuildShl(ctx->builder, result, id, "");
+   if (instr->intrinsic == nir_intrinsic_load_subgroup_le_mask ||
+   instr->intrinsic == nir_intrinsic_load_subgroup_lt_mask)
+   result = LLVMBuildNot(ctx->builder, result, "");
+   break;
+   }
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(>instr, stderr);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index e9bf44c..ea50acc 100644
--- a/src/amd/vulkan/radv_device.c
+++ 

[Mesa-dev] [PATCH 15/17] radeonsi: move the guts of ARB_shader_group_vote emission to ac

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c   | 30 ++
 src/amd/common/ac_llvm_build.h   |  6 ++
 src/gallium/drivers/radeonsi/si_shader.c | 24 +++-
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index efbeb65..795a62f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -265,6 +265,36 @@ ac_build_ballot(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
+ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+   return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+}
+
+LLVMValueRef
+ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+   return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+}
+
+LLVMValueRef
+ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+
+   LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+vote_set, active_set, "");
+   LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+   return LLVMBuildOr(ctx->builder, all, none, "");
+}
+
+LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 8258b21..df5743f 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -76,6 +76,12 @@ void ac_build_optimization_barrier(struct ac_llvm_context 
*ctx,
 
 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
 
+LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef 
value);
+
+LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef 
value);
+
+LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0c6c318..4c32c18 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3579,13 +3579,8 @@ static void vote_all_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef active_set, vote_set;
-   LLVMValueRef tmp;
-
-   active_set = ac_build_ballot(>ac, ctx->i32_1);
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
-   tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
+LLVMValueRef tmp = ac_build_vote_all(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
@@ -3597,13 +3592,8 @@ static void vote_any_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef vote_set;
-   LLVMValueRef tmp;
 
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
-
-   tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
-   vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+LLVMValueRef tmp = ac_build_vote_any(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
@@ -3615,16 +3605,8 @@ static void vote_eq_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef active_set, vote_set;
-   LLVMValueRef all, none, tmp;
-
-   active_set = ac_build_ballot(>ac, ctx->i32_1);
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
-   all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
-   none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
-vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
-   tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+LLVMValueRef tmp = ac_build_vote_eq(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, 

[Mesa-dev] [PATCH 12/17] ac: add i64 type to ac_llvm_context

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c | 1 +
 src/amd/common/ac_llvm_build.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c4cf7ce..472a1b8 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -59,6 +59,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index ab7c86c7..cd03c95 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -42,6 +42,7 @@ struct ac_llvm_context {
LLVMTypeRef i8;
LLVMTypeRef i32;
LLVMTypeRef f32;
+LLVMTypeRef i64;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v16i8;
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/17] radeonsi: move si_emit_ballot() to ac

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c   | 26 ++
 src/amd/common/ac_llvm_build.h   |  4 
 src/gallium/drivers/radeonsi/si_shader.c | 38 +---
 3 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 472a1b8..8145bcb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -238,6 +238,32 @@ ac_build_optimization_barrier(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
+ac_build_ballot(struct ac_llvm_context *ctx,
+   LLVMValueRef value)
+{
+   LLVMValueRef args[3] = {
+   value,
+   ctx->i32_0,
+   LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+   };
+
+   /* We currently have no other way to prevent LLVM from lifting the icmp
+* calls to a dominating basic block.
+*/
+   ac_build_optimization_barrier(ctx, [0]);
+
+   if (LLVMTypeOf(args[0]) != ctx->i32)
+   args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+
+   return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index cd03c95..839ed7d 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -71,6 +71,10 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize);
 
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
   LLVMValueRef *pvgpr);
+
+
+LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index bb03ac9..0c6c318 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3572,32 +3572,6 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
}
 }
 
-static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
-  LLVMValueRef value)
-{
-   struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef args[3] = {
-   value,
-   ctx->i32_0,
-   LLVMConstInt(ctx->i32, LLVMIntNE, 0)
-   };
-
-   /* We currently have no other way to prevent LLVM from lifting the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(>ac, [0]);
-
-   if (LLVMTypeOf(args[0]) != ctx->i32)
-   args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, 
"");
-
-   return lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.icmp.i32",
- ctx->i64, args, 3,
- LP_FUNC_ATTR_NOUNWIND |
- LP_FUNC_ATTR_READNONE |
- LP_FUNC_ATTR_CONVERGENT);
-}
-
 static void vote_all_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
@@ -3608,8 +3582,8 @@ static void vote_all_emit(
LLVMValueRef active_set, vote_set;
LLVMValueRef tmp;
 
-   active_set = si_emit_ballot(ctx, ctx->i32_1);
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   active_set = ac_build_ballot(>ac, ctx->i32_1);
+   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
emit_data->output[emit_data->chan] =
@@ -3626,7 +3600,7 @@ static void vote_any_emit(
LLVMValueRef vote_set;
LLVMValueRef tmp;
 
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
@@ -3644,8 +3618,8 @@ static void vote_eq_emit(
LLVMValueRef active_set, vote_set;
LLVMValueRef all, none, tmp;
 
-   active_set = si_emit_ballot(ctx, ctx->i32_1);
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   active_set = ac_build_ballot(>ac, ctx->i32_1);
+   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
all = 

[Mesa-dev] [PATCH 11/17] radeonsi: move emit_optimization_barrier() to ac

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c   | 43 ++
 src/amd/common/ac_llvm_build.h   |  2 ++
 src/gallium/drivers/radeonsi/si_shader.c | 45 ++--
 3 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 99b9134..c4cf7ce 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -36,6 +36,7 @@
 #include "ac_exp_param.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
+#include "util/u_atomic.h"
 #include "sid.h"
 
 #include "shader_enums.h"
@@ -193,6 +194,48 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize)
}
 }
 
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
+ */
+void
+ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr)
+{
+   static int counter = 0;
+
+   LLVMBuilderRef builder = ctx->builder;
+   char code[16];
+
+   snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
+
+   if (!pvgpr) {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", 
true, false);
+   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+   } else {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, >i32, 1, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
+   LLVMValueRef vgpr = *pvgpr;
+   LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+   unsigned vgpr_size = ac_get_type_size(vgpr_type);
+   LLVMValueRef vgpr0;
+
+   assert(vgpr_size % 4 == 0);
+
+   vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, 
vgpr_size / 4), "");
+   vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+   vgpr0 = LLVMBuildCall(builder, inlineasm, , 1, "");
+   vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, 
"");
+   vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+   *pvgpr = vgpr;
+   }
+}
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 615f828..ab7c86c7 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -68,6 +68,8 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char 
*name,
 
 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned 
bufsize);
 
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+  LLVMValueRef *pvgpr);
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a5720d4..bb03ac9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3316,47 +3316,6 @@ static void si_llvm_return_fs_outputs(struct 
lp_build_tgsi_context *bld_base)
ctx->return_value = ret;
 }
 
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- *
- * Optionally, a value can be passed through the inline assembly to prevent
- * LLVM from hoisting calls to ReadNone functions.
- */
-static void emit_optimization_barrier(struct si_shader_context *ctx,
- LLVMValueRef *pvgpr)
-{
-   static int counter = 0;
-
-   LLVMBuilderRef builder = ctx->gallivm.builder;
-   char code[16];
-
-   snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
-
-   if (!pvgpr) {
-   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, 
false);
-   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", 
true, false);
-   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
-   } else {
-   LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, >i32, 1, 
false);
-   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
-   LLVMValueRef vgpr = *pvgpr;
-   LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
-   unsigned vgpr_size = llvm_get_type_size(vgpr_type);
-   LLVMValueRef vgpr0;
-
- 

[Mesa-dev] [PATCH 14/17] ac: add i32_1 convenience member to ac_llvm_context

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c | 1 +
 src/amd/common/ac_llvm_build.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 8145bcb..efbeb65 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -65,6 +65,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
 
 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
 
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
 "range", 5);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 839ed7d..8258b21 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -48,6 +48,7 @@ struct ac_llvm_context {
LLVMTypeRef v16i8;
 
 LLVMValueRef i32_0;
+LLVMValueRef i32_1;
 
unsigned range_md_kind;
unsigned invariant_load_md_kind;
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] radeonsi: move llvm_get_type_size() to ac

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c   | 24 +++
 src/amd/common/ac_llvm_build.h   |  2 ++
 src/gallium/drivers/radeonsi/si_shader.c | 41 +++-
 3 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c411880..99b9134 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -81,6 +81,30 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }
 
+unsigned
+ac_get_type_size(LLVMTypeRef type)
+{
+   LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+   switch (kind) {
+   case LLVMIntegerTypeKind:
+   return LLVMGetIntTypeWidth(type) / 8;
+   case LLVMFloatTypeKind:
+   return 4;
+   case LLVMPointerTypeKind:
+   return 8;
+   case LLVMVectorTypeKind:
+   return LLVMGetVectorSize(type) *
+  ac_get_type_size(LLVMGetElementType(type));
+   case LLVMArrayTypeKind:
+   return LLVMGetArrayLength(type) *
+  ac_get_type_size(LLVMGetElementType(type));
+   default:
+   assert(0);
+   return 0;
+   }
+}
+
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index bde0aa8..615f828 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -59,6 +59,8 @@ struct ac_llvm_context {
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
 
+unsigned ac_get_type_size(LLVMTypeRef type);
+
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index ddfaa3b..a5720d4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -72,8 +72,6 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
 static void si_dump_shader_key(unsigned processor, const struct si_shader 
*shader,
   FILE *f);
 
-static unsigned llvm_get_type_size(LLVMTypeRef type);
-
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
@@ -3980,29 +3978,6 @@ static void declare_streamout_params(struct 
si_shader_context *ctx,
}
 }
 
-static unsigned llvm_get_type_size(LLVMTypeRef type)
-{
-   LLVMTypeKind kind = LLVMGetTypeKind(type);
-
-   switch (kind) {
-   case LLVMIntegerTypeKind:
-   return LLVMGetIntTypeWidth(type) / 8;
-   case LLVMFloatTypeKind:
-   return 4;
-   case LLVMPointerTypeKind:
-   return 8;
-   case LLVMVectorTypeKind:
-   return LLVMGetVectorSize(type) *
-  llvm_get_type_size(LLVMGetElementType(type));
-   case LLVMArrayTypeKind:
-   return LLVMGetArrayLength(type) *
-  llvm_get_type_size(LLVMGetElementType(type));
-   default:
-   assert(0);
-   return 0;
-   }
-}
-
 static void declare_lds_as_pointer(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm = >gallivm;
@@ -4447,10 +4422,10 @@ static void create_function(struct si_shader_context 
*ctx)
shader->info.num_input_vgprs = 0;
 
for (i = 0; i <= last_sgpr; ++i)
-   shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 
4;
+   shader->info.num_input_sgprs += ac_get_type_size(params[i]) / 4;
 
for (; i < num_params; ++i)
-   shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 
4;
+   shader->info.num_input_vgprs += ac_get_type_size(params[i]) / 4;
 
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;
@@ -5417,7 +5392,7 @@ static void si_count_scratch_private_memory(struct 
si_shader_context *ctx)
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
/* No idea why LLVM aligns allocas to 4 elements. */
unsigned alignment = LLVMGetAlignment(inst);
-   unsigned dw_size = align(llvm_get_type_size(type) / 4, 
alignment);
+   unsigned dw_size = align(ac_get_type_size(type) / 4, 
alignment);
ctx->shader->config.private_mem_vgprs += dw_size;
}
   

[Mesa-dev] [PATCH 09/17] ac: add i32_0 convenience member to ac_llvm_context

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

To match si_shader_context.

Signed-off-by: Connor Abbott 
---
 src/amd/common/ac_llvm_build.c | 2 ++
 src/amd/common/ac_llvm_build.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 237e929..c411880 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -62,6 +62,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
 
+ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
 "range", 5);
 
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index ebb78fb..bde0aa8 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -46,6 +46,8 @@ struct ac_llvm_context {
LLVMTypeRef v4f32;
LLVMTypeRef v16i8;
 
+LLVMValueRef i32_0;
+
unsigned range_md_kind;
unsigned invariant_load_md_kind;
unsigned uniform_md_kind;
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/17] compiler: add new system values for SPV_KHR_shader_ballot

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

For SPIR-V, Khronos decided to make the SubGroup*Mask system values
consist of 4 32-bit components rather than one 64-bit component.
Although we'll lower away the difference in nir_lower_system_values so
drivers won't have to deal with them, adding these system values makes
it easier to implement the SPIRV-to-NIR bits.

Signed-off-by: Connor Abbott 
---
 src/compiler/shader_enums.c |  5 +
 src/compiler/shader_enums.h | 11 +++
 2 files changed, 16 insertions(+)

diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index b2ca80b..571a43e 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -211,6 +211,11 @@ gl_system_value_name(gl_system_value sysval)
  ENUM(SYSTEM_VALUE_SUBGROUP_GT_MASK),
  ENUM(SYSTEM_VALUE_SUBGROUP_LE_MASK),
  ENUM(SYSTEM_VALUE_SUBGROUP_LT_MASK),
+ ENUM(SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT),
  ENUM(SYSTEM_VALUE_VERTEX_ID),
  ENUM(SYSTEM_VALUE_INSTANCE_ID),
  ENUM(SYSTEM_VALUE_INSTANCE_INDEX),
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 07db476..174b5ca 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -373,6 +373,17 @@ typedef enum
SYSTEM_VALUE_SUBGROUP_GT_MASK,
SYSTEM_VALUE_SUBGROUP_LE_MASK,
SYSTEM_VALUE_SUBGROUP_LT_MASK,
+
+   /**
+* These are the same as system values above, except that they consist of 4
+* 32-bit components rather than 1 64-bit component. This matches the
+* semantics of the SPIR-V KHR_shader_ballot extension.
+*/
+   SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT,
/*@}*/
 
/*@}*/
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/17] mesa: fix 64-bit issues with system_values_read

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

We're about to bump the number of system values above 32. The
system_values_read bitfield itself is 64 bits, but some users weren't
taking that into account. Fix the ones I could find by grepping for
"system_values_read". This prevents regressions at least with radeonsi
and other Gallium drivers, and probably i965 too.

Signed-off-by: Connor Abbott 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c| 2 +-
 src/intel/compiler/brw_vec4_gs_visitor.cpp | 3 ++-
 src/mesa/program/programopt.c  | 2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c   | 6 +++---
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index d4914ac..e5daef4 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -588,7 +588,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned 
file, unsigned index,
   src = nir_src_for_ssa(>dest.ssa);
 
   b->shader->info.system_values_read |=
- (1 << nir_system_value_from_intrinsic(op));
+ (1ull << nir_system_value_from_intrinsic(op));
 
   break;
}
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp 
b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index d0236df..bdae3d4 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -653,7 +653,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
*log_data,
   shader->info.clip_distance_array_size;
 
prog_data->include_primitive_id =
-  (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 
0;
+  (shader->info.system_values_read &
+   BITFIELD64_BIT(SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
 
prog_data->invocations = shader->info.gs.invocations;
 
diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c
index f560bce..f389d2b 100644
--- a/src/mesa/program/programopt.c
+++ b/src/mesa/program/programopt.c
@@ -597,7 +597,7 @@ _mesa_program_fragment_position_to_sysval(struct gl_program 
*prog)
   return;
 
prog->info.inputs_read &= ~BITFIELD64_BIT(VARYING_SLOT_POS);
-   prog->info.system_values_read |= 1 << SYSTEM_VALUE_FRAG_COORD;
+   prog->info.system_values_read |= BITFIELD64_BIT(SYSTEM_VALUE_FRAG_COORD);
 
for (i = 0; i < prog->arb.NumInstructions; i++) {
   struct prog_instruction *inst = prog->arb.Instructions + i;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 76cd4dc..bdabfa1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6450,10 +6450,10 @@ st_translate_program(
/* Declare misc input registers
 */
{
-  GLbitfield sysInputs = proginfo->info.system_values_read;
+  GLbitfield64 sysInputs = proginfo->info.system_values_read;
 
   for (i = 0; sysInputs; i++) {
- if (sysInputs & (1 << i)) {
+ if (sysInputs & BITFIELD64_BIT(i)) {
 unsigned semName = _mesa_sysval_to_semantic(i);
 
 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -6484,7 +6484,7 @@ st_translate_program(
emit_wpos(st_context(ctx), t, proginfo, ureg,
  program->wpos_transform_const);
 
-sysInputs &= ~(1 << i);
+sysInputs &= ~BITFIELD64_BIT(i);
  }
   }
}
@@ -6786,7 +6786,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
/* This must be done before the uniform storage is associated. */
if (shader->Stage == MESA_SHADER_FRAGMENT &&
(prog->info.inputs_read & VARYING_BIT_POS ||
-prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
+prog->info.system_values_read &
+BITFIELD64_BIT(SYSTEM_VALUE_FRAG_COORD))) {
   static const gl_state_index wposTransformState[STATE_LENGTH] = {
  STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
   };
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 984ff92..5556525 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -938,9 +938,9 @@ st_translate_mesa_program(
 
/* Declare misc input registers
 */
-   GLbitfield sysInputs = program->info.system_values_read;
+   GLbitfield64 sysInputs = program->info.system_values_read;
for (i = 0; sysInputs; i++) {
-  if (sysInputs & (1 << i)) {
+  if (sysInputs & BITFIELD64_BIT(i)) {
  unsigned semName = _mesa_sysval_to_semantic(i);
 
  t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -972,7 +972,7 @@ st_translate_mesa_program(
  semName == TGSI_SEMANTIC_POSITION)
 emit_wpos(st_context(ctx), t, program, ureg);
 
-  sysInputs &= ~(1 << i);
+  sysInputs &= 

[Mesa-dev] [PATCH 08/17] nir/spirv: add plumbing for KHR_shader_ballot and KHR_subgroup_vote

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/compiler/spirv/nir_spirv.h |  2 +
 src/compiler/spirv/spirv_to_nir.c  | 80 ++
 src/compiler/spirv/vtn_variables.c | 28 +
 3 files changed, 110 insertions(+)

diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h
index 7f16866..75fe3a8 100644
--- a/src/compiler/spirv/nir_spirv.h
+++ b/src/compiler/spirv/nir_spirv.h
@@ -51,6 +51,8 @@ struct nir_spirv_supported_extensions {
bool image_write_without_format;
bool int64;
bool multiview;
+   bool shader_ballot;
+   bool shader_group_vote;
 };
 
 nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 0a5eb0e..a45ddbc 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2552,6 +2552,69 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
nir_builder_instr_insert(>nb, >instr);
 }
 
+static void
+vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
+const uint32_t *w, unsigned count)
+{
+   nir_intrinsic_op intrinsic_op;
+   switch (opcode) {
+   case SpvOpSubgroupBallotKHR:
+  intrinsic_op = nir_intrinsic_ballot;
+  break;
+   case SpvOpSubgroupFirstInvocationKHR:
+  intrinsic_op = nir_intrinsic_read_first_invocation;
+  break;
+   case SpvOpSubgroupReadInvocationKHR:
+  intrinsic_op = nir_intrinsic_read_invocation;
+  break;
+   case SpvOpSubgroupAllKHR:
+  intrinsic_op = nir_intrinsic_all_invocations;
+  break;
+   case SpvOpSubgroupAnyKHR:
+  intrinsic_op = nir_intrinsic_any_invocations;
+  break;
+   case SpvOpSubgroupAllEqualKHR:
+  intrinsic_op = nir_intrinsic_all_invocations_equal;
+  break;
+   default:
+  unreachable("unknown subgroup instruction");
+  break;
+   }
+
+   nir_intrinsic_instr *intrin =
+  nir_intrinsic_instr_create(b->shader, intrinsic_op);
+
+   intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
+
+   if (opcode == SpvOpSubgroupReadInvocationKHR) {
+  intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+   }
+
+   intrin->num_components = intrin->src[0].ssa->num_components;
+   nir_ssa_dest_init(>instr, >dest,
+ intrin->num_components,
+ (opcode == SpvOpSubgroupBallotKHR) ? 64 : 32,
+ NULL);
+   nir_builder_instr_insert(>nb, >instr);
+
+   nir_ssa_def *result = >dest.ssa;
+
+   if (opcode == SpvOpSubgroupBallotKHR) {
+  /* convert from 64-bit to 4 32-bit components */
+  nir_ssa_def *tmp = nir_unpack_64_2x32(>nb, result);
+  nir_ssa_def *zero = nir_imm_int(>nb, 0);
+  result = nir_vec4(>nb, nir_channel(>nb, tmp, 0),
+nir_channel(>nb, tmp, 1),
+zero, zero);
+   }
+
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   const struct glsl_type *result_type =
+  vtn_value(b, w[1], vtn_value_type_type)->type->type;
+   val->ssa = vtn_create_ssa_value(b, result_type);
+   val->ssa->def = result;
+}
+
 static unsigned
 gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
 {
@@ -2734,6 +2797,14 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
  spv_check_supported(multiview, cap);
  break;
 
+  case SpvCapabilitySubgroupBallotKHR:
+ spv_check_supported(shader_ballot, cap);
+ break;
+
+  case SpvCapabilitySubgroupVoteKHR:
+ spv_check_supported(shader_ballot, cap);
+ break;
+
   default:
  unreachable("Unhandled capability");
   }
@@ -3238,6 +3309,15 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp 
opcode,
   vtn_handle_barrier(b, opcode, w, count);
   break;
 
+   case SpvOpSubgroupBallotKHR:
+   case SpvOpSubgroupFirstInvocationKHR:
+   case SpvOpSubgroupReadInvocationKHR:
+   case SpvOpSubgroupAllKHR:
+   case SpvOpSubgroupAnyKHR:
+   case SpvOpSubgroupAllEqualKHR:
+  vtn_handle_subgroup(b, opcode, w, count);
+  break;
+
default:
   unreachable("Unhandled opcode");
}
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 0f0cc1c..b62bf70 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1040,6 +1040,34 @@ vtn_get_builtin_location(struct vtn_builder *b,
   *location = SYSTEM_VALUE_VIEW_INDEX;
   set_mode_system_value(mode);
   break;
+   case SpvBuiltInSubgroupSize:
+  *location = SYSTEM_VALUE_SUBGROUP_SIZE;
+  set_mode_system_value(mode);
+  break;
+   case SpvBuiltInSubgroupLocalInvocationId:
+  *location = SYSTEM_VALUE_SUBGROUP_INVOCATION;
+  set_mode_system_value(mode);
+  break;
+   case SpvBuiltInSubgroupEqMaskKHR:
+  *location = SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT;
+  

[Mesa-dev] [PATCH 07/17] nir/lower_system_values: handle SPIR-V shader_ballot system values

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Lower them to the GL version.

Signed-off-by: Connor Abbott 
---
 src/compiler/nir/nir_lower_system_values.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 810100a..3ba81ad 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -110,6 +110,44 @@ convert_block(nir_block *block, nir_builder *b)
  }
  break;
 
+  case SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT: {
+ nir_intrinsic_op op;
+ switch (var->data.location) {
+ case SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_eq_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_ge_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_gt_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_le_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_lt_mask;
+break;
+ default:
+unreachable("bad intrinsic");
+ }
+ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->shader, 
op);
+ instr->num_components = 1;
+ nir_ssa_dest_init(>instr, >dest, 1, 64, NULL);
+ nir_builder_instr_insert(b, >instr);
+ 
+ sysval = nir_unpack_64_2x32(b, >dest.ssa);
+ nir_ssa_def *zero = nir_imm_int(b, 0);
+ sysval = nir_vec4(b, nir_channel(b, sysval, 0),
+   nir_channel(b, sysval, 1),
+   zero, zero);
+ break;
+  }
+
   case SYSTEM_VALUE_INSTANCE_INDEX:
  sysval = nir_iadd(b,
nir_load_instance_id(b),
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/17] nir: introduce new convergent and cross-thread attributes

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

These are properties of the instruction that must be respected when
moving it around, in addition to the usual SSA dominance guarantee.
Previously, we only had special handling for fddx and fddy, in a very
ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote, we'll
have to start handling a lot more instructions with similar constraints,
so we want to add a more formal model of what the optimizer can and
cannot do.

v2: don't add attribute for ALU instructions
v3: special-case derivative ALU instructions
Signed-off-by: Connor Abbott 
---
 src/compiler/nir/nir.h | 80 ++
 1 file changed, 80 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3b827bf..64caccb 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -985,6 +985,25 @@ typedef enum {
 * intrinsic are due to the register reads/writes.
 */
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+
+   /**
+* Indicates whether this intrinsic is "cross-thread". An operation is
+* cross-thread if results in one thread depend on inputs in another thread,
+* and therefore optimizations cannot change the execution mask when the
+* operation is called. Examples of cross-thread operations include
+* screen-space derivatives, the "any" reduction which returns "true" in
+* all threads if any thread inputs "true", etc.
+*/
+   NIR_INTRINSIC_CROSS_THREAD,
+
+   /**
+* Indicates that this intrinsic is "convergent". An operation is
+* convergent when it must always be called in convergent control flow,
+* that is, control flow with the same execution mask as when the program
+* started. If an operation is convergent, it must be cross-thread as well,
+* since the optimizer must maintain the guarantee.
+*/
+   NIR_INTRINSIC_CONVERGENT,
 } nir_intrinsic_semantic_flag;
 
 /**
@@ -1459,6 +1478,67 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
 type, nir_instr_type_parallel_copy)
 
 /*
+ * Helpers to determine if an instruction is cross-thread or convergent. See
+ * NIR_INTRINSIC_{CONVERGENT|CROSS_THREAD} for the definitions.
+ */
+static inline bool
+nir_instr_is_convergent(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+  switch (nir_instr_as_alu(instr)->op) {
+  case nir_op_fddx:
+  case nir_op_fddy:
+  case nir_op_fddx_fine:
+  case nir_op_fddy_fine:
+  case nir_op_fddx_coarse:
+  case nir_op_fddy_coarse:
+ /* Partial derivatives are convergent */
+ return true;
+
+  default:
+ return false;
+  }
+
+   case nir_instr_type_intrinsic: {
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+  return nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_CONVERGENT;
+   }
+
+   case nir_instr_type_tex:
+ switch (nir_instr_as_tex(instr)->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+/* These three take implicit derivatives, so they are convergent */
+return true;
+
+ default:
+return false;
+ }
+
+   default:
+  return false;
+   }
+}
+
+static inline bool
+nir_instr_is_cross_thread(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+  return nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_CROSS_THREAD;
+   }
+
+   default:
+  return nir_instr_is_convergent(instr);
+   }
+}
+
+/*
  * Control flow
  *
  * Control flow consists of a tree of control flow nodes, which include
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/17] nir/gcm: use the new cross-thread attribute

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

Signed-off-by: Connor Abbott 
---
 src/compiler/nir/nir_opt_gcm.c | 72 ++
 1 file changed, 23 insertions(+), 49 deletions(-)

diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index 879a77a..0fdf587 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -109,65 +109,39 @@ static bool
 gcm_pin_instructions_block(nir_block *block, struct gcm_state *state)
 {
nir_foreach_instr_safe(instr, block) {
-  switch (instr->type) {
-  case nir_instr_type_alu:
- switch (nir_instr_as_alu(instr)->op) {
- case nir_op_fddx:
- case nir_op_fddy:
- case nir_op_fddx_fine:
- case nir_op_fddy_fine:
- case nir_op_fddx_coarse:
- case nir_op_fddy_coarse:
-/* These can only go in uniform control flow; pin them for now */
-instr->pass_flags = GCM_INSTR_PINNED;
+  if (nir_instr_is_cross_thread(instr)) {
+ /* pin cross-thread operations for now */
+ instr->pass_flags = GCM_INSTR_PINNED;
+  } else {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_tex:
+ case nir_instr_type_load_const:
+instr->pass_flags = 0;
 break;
 
- default:
-instr->pass_flags = 0;
+ case nir_instr_type_intrinsic: {
+const nir_intrinsic_info *info =
+   _intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+
+if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+(info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+   instr->pass_flags = 0;
+} else {
+   instr->pass_flags = GCM_INSTR_PINNED;
+}
 break;
  }
- break;
 
-  case nir_instr_type_tex:
- switch (nir_instr_as_tex(instr)->op) {
- case nir_texop_tex:
- case nir_texop_txb:
- case nir_texop_lod:
-/* These two take implicit derivatives so they need to be pinned */
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_phi:
 instr->pass_flags = GCM_INSTR_PINNED;
 break;
 
  default:
-instr->pass_flags = 0;
-break;
- }
- break;
-
-  case nir_instr_type_load_const:
- instr->pass_flags = 0;
- break;
-
-  case nir_instr_type_intrinsic: {
- const nir_intrinsic_info *info =
-_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-
- if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
- (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
-instr->pass_flags = 0;
- } else {
-instr->pass_flags = GCM_INSTR_PINNED;
+unreachable("Invalid instruction type in GCM");
  }
- break;
-  }
-
-  case nir_instr_type_jump:
-  case nir_instr_type_ssa_undef:
-  case nir_instr_type_phi:
- instr->pass_flags = GCM_INSTR_PINNED;
- break;
-
-  default:
- unreachable("Invalid instruction type in GCM");
   }
 
   if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/17] nir: take cross-thread operations into account into a few places

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

These optimizations happened to work with derivatives, but they won't
with upcoming shader_ballot and group_vote instructions.

Signed-off-by: Connor Abbott 
---
 src/compiler/nir/nir_instr_set.c   | 22 ++
 src/compiler/nir/nir_opt_peephole_select.c | 11 +++
 2 files changed, 33 insertions(+)

diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 9cb9ed4..4bd0717 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -178,6 +178,14 @@ hash_instr(const void *data)
const nir_instr *instr = data;
uint32_t hash = _mesa_fnv32_1a_offset_bias;
 
+   /*
+* In nir_instrs_equal(), we compare the instruction's basic blocks in this
+* case. See the comment there for the explanation.
+*/
+   if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) {
+  HASH(hash, instr->block);
+   }
+
switch (instr->type) {
case nir_instr_type_alu:
   hash = hash_alu(hash, nir_instr_as_alu(instr));
@@ -256,6 +264,20 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr 
*instr2)
if (instr1->type != instr2->type)
   return false;
 
+   /*
+* If the instructions are cross-thread, then they must have the same
+* execution mask. If they are convergent, then we can always replace one
+* invocation with another since every invocation is guaranteed convergent.
+* But not so for non-convergent instructions, since different invocations
+* may be called with different execution maskes and therefore have
+* different results. Conservatively enforce that the instructions are in
+* the same basic block.
+*/
+   if (nir_instr_is_cross_thread(instr1) && !nir_instr_is_convergent(instr1)) {
+  if (instr1->block != instr2->block)
+ return false;
+   }
+
switch (instr1->type) {
case nir_instr_type_alu: {
   nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index 4ca4f80..ce41781 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -61,6 +61,17 @@ static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
 {
nir_foreach_instr(instr, block) {
+  if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) 
{
+ /* If the instruction is cross-thread, then we can't execute it
+  * conditionally when we would've executed it unconditionally before,
+  * except when the condition is uniform. If the instruction is
+  * convergent, though, we're already guaranteed that the entire
+  * region is convergent (including the condition) so we can go ahead.
+  *
+  * TODO: allow when the if-condition is uniform
+  */
+ return false;
+  }
   switch (instr->type) {
   case nir_instr_type_intrinsic: {
  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/17] radv: Support for subgroup_vote and shader_ballot

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

This series adds all the bits to enable EXT_shader_subgroup_vote and
EXT_shader_subgroup_ballot for radv. It's based on my previous series to
fix some 64-bit bugs in radv and anv, since nothing would work without
them.

Patches 1-4 are a resend of my previous series to add ARB_shader_ballot
and ARB_shader_group_vote support to NIR, with some changes suggested by
Jason and minor bugfixes. Patches 5-8 add SPIRV-to-NIR support for the
SPIR-V extensions. Finally, the rest of the patches move some of the
existing logic for ARB_shader_ballot and ARB_shader_group_vote into ac
when appropriate and turn on the extension for radv.

One question I had was about how to handle the differences between
SPIR-V and GL here. SPIR-V decided to make the things that return a
bitmask of invocations return a uvec4, whereas in the GL extension they
return a uint64_t. Right now, in NIR, we use the GL semantics, and we
translate that in vtn. I'm open to changing that, though.

I wrote a few crucible tests to test the extension (mostly copied from
the piglit ARB_shader_ballot and ARB_shader_group_vote coverage), which
I'll send out shortly. One of the tests is crashing because of
https://github.com/KhronosGroup/glslang/issues/930, but other than that
it passes the tests. I also made sure I didn't regress piglit on
radeonsi. It might be a good idea to run it on Intel's CI system,
especially given patches 5 and 6.

This series is also available at:
git://people.freedesktop.org/~cwabbott0/mesa radv-shader-ballot

Connor Abbott (17):
  nir: introduce new convergent and cross-thread attributes
  nir/gcm: use the new cross-thread attribute
  nir: take cross-thread operations into account into a few places
  nir: add ARB_shader_ballot and ARB_shader_group_vote instructions
  mesa: fix 64-bit issues with system_values_read
  compiler: add new system values for SPV_KHR_shader_ballot
  nir/lower_system_values: handle SPIR-V shader_ballot system values
  nir/spirv: add plumbing for KHR_shader_ballot and KHR_subgroup_vote
  ac: add i32_0 convenience member to ac_llvm_context
  radeonsi: move llvm_get_type_size() to ac
  radeonsi: move emit_optimization_barrier() to ac
  ac: add i64 type to ac_llvm_context
  radeonsi: move si_emit_ballot() to ac
  ac: add i32_1 convenience member to ac_llvm_context
  radeonsi: move the guts of ARB_shader_group_vote emission to ac
  ac: enable the AMDGPU asm parser
  radv/ac: enable EXT_shader_subgroup_ballot and
EXT_shader_subgroup_vote

 src/amd/common/ac_llvm_build.c | 127 +++
 src/amd/common/ac_llvm_build.h |  18 
 src/amd/common/ac_llvm_util.c  |   4 +
 src/amd/common/ac_nir_to_llvm.c|  75 
 src/amd/vulkan/radv_device.c   |   8 ++
 src/amd/vulkan/radv_pipeline.c |   2 +
 src/compiler/nir/nir.c |  28 ++
 src/compiler/nir/nir.h |  80 +
 src/compiler/nir/nir_instr_set.c   |  22 +
 src/compiler/nir/nir_intrinsics.h  |  30 +++
 src/compiler/nir/nir_lower_system_values.c |  38 
 src/compiler/nir/nir_opt_gcm.c |  72 +--
 src/compiler/nir/nir_opt_peephole_select.c |  11 +++
 src/compiler/shader_enums.c|   5 ++
 src/compiler/shader_enums.h|  11 +++
 src/compiler/spirv/nir_spirv.h |   2 +
 src/compiler/spirv/spirv_to_nir.c  |  80 +
 src/compiler/spirv/vtn_variables.c |  28 ++
 src/gallium/auxiliary/nir/tgsi_to_nir.c|   2 +-
 src/gallium/drivers/radeonsi/si_shader.c   | 136 +++--
 src/intel/compiler/brw_vec4_gs_visitor.cpp |   3 +-
 src/mesa/program/programopt.c  |   2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   9 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |   6 +-
 24 files changed, 617 insertions(+), 182 deletions(-)

-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/17] nir: add ARB_shader_ballot and ARB_shader_group_vote instructions

2017-06-09 Thread Connor Abbott
From: Connor Abbott 

v2: make every instruction an intrinsic, add missing subgroup_size,
also add system value translation stuff

Signed-off-by: Connor Abbott 
---
 src/compiler/nir/nir.c| 28 
 src/compiler/nir/nir_intrinsics.h | 30 ++
 2 files changed, 58 insertions(+)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 491b908..266ca6d 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1864,6 +1864,20 @@ nir_intrinsic_op
 nir_intrinsic_from_system_value(gl_system_value val)
 {
switch (val) {
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+  return nir_intrinsic_load_subgroup_size;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+  return nir_intrinsic_load_subgroup_invocation;
+   case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+  return nir_intrinsic_load_subgroup_eq_mask;
+   case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+  return nir_intrinsic_load_subgroup_ge_mask;
+   case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+  return nir_intrinsic_load_subgroup_gt_mask;
+   case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+  return nir_intrinsic_load_subgroup_le_mask;
+   case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+  return nir_intrinsic_load_subgroup_lt_mask;
case SYSTEM_VALUE_VERTEX_ID:
   return nir_intrinsic_load_vertex_id;
case SYSTEM_VALUE_INSTANCE_ID:
@@ -1917,6 +1931,20 @@ gl_system_value
 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
 {
switch (intrin) {
+   case nir_intrinsic_load_subgroup_size:
+  return SYSTEM_VALUE_SUBGROUP_SIZE;
+   case nir_intrinsic_load_subgroup_invocation:
+  return SYSTEM_VALUE_SUBGROUP_INVOCATION;
+   case nir_intrinsic_load_subgroup_eq_mask:
+  return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
+   case nir_intrinsic_load_subgroup_ge_mask:
+  return SYSTEM_VALUE_SUBGROUP_GE_MASK;
+   case nir_intrinsic_load_subgroup_gt_mask:
+  return SYSTEM_VALUE_SUBGROUP_GT_MASK;
+   case nir_intrinsic_load_subgroup_le_mask:
+  return SYSTEM_VALUE_SUBGROUP_LE_MASK;
+   case nir_intrinsic_load_subgroup_lt_mask:
+  return SYSTEM_VALUE_SUBGROUP_LT_MASK;
case nir_intrinsic_load_vertex_id:
   return SYSTEM_VALUE_VERTEX_ID;
case nir_intrinsic_load_instance_id:
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 21e7d90..62e4884 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -331,6 +331,36 @@ SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
 
+
+/* ARB_shader_ballot instructions */
+
+SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
+
+#define CROSS_THREAD(name, dest_components, src_components) \
+   INTRINSIC(name, 1, ARR(src_components), true, dest_components, 0, 0, \
+ xx, xx, xx, \
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER | \
+ NIR_INTRINSIC_CROSS_THREAD)
+
+CROSS_THREAD(ballot, 0, 0)
+INTRINSIC(read_invocation, 2, ARR(0, 1), true, 0, 0, 0,
+  xx, xx, xx,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER |
+  NIR_INTRINSIC_CROSS_THREAD)
+CROSS_THREAD(read_first_invocation, 0, 0)
+
+/* ARB_shader_group_vote instructions */
+
+CROSS_THREAD(any_invocations, 1, 1)
+CROSS_THREAD(all_invocations, 1, 1)
+CROSS_THREAD(all_invocations_equal, 1, 1)
+
 /* Blend constant color values.  Float values are clamped. */
 SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(blend_const_color_g_float, 1, 0, xx, xx, xx)
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] mesa/st: glsl_to_tgsi: tie in the new register renaming approach

2017-06-09 Thread Gert Wollny
This patch replaces the old register livetime estimation with the
new approach.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0e7f4b646a..b76ad42536 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,10 +55,11 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 #include "st_shader_cache.h"
-#include "st_glsl_to_tgsi_private.h"
+#include "st_glsl_to_tgsi_temprename.h"
 
 #include "util/hash_table.h"
 #include 
+#include 
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |\
(1 << PROGRAM_CONSTANT) | \
@@ -323,6 +324,7 @@ public:
 
void merge_two_dsts(void);
void merge_registers(void);
+   void merge_registers_alternative(void);
void renumber_registers(void);
 
void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
@@ -5042,6 +5044,17 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
}
 }
 
+void
+glsl_to_tgsi_visitor::merge_registers_alternative(void)
+{
+   rename_reg_pair proto ={false, 0};
+   std::vector  renames(this->next_temp, proto);
+   tgsi_temp_lifetime analysis(>instructions, this->next_temp);
+   auto lt = analysis.get_lifetimes();
+   evaluate_remapping(lt, renames);
+   rename_temp_registers([0]);
+}
+
 /* Merges temporary registers together where possible to reduce the number of
  * registers needed to run a program.
  *
@@ -6492,7 +6505,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
 
v->merge_two_dsts();
if (!skip_merge_registers)
-  v->merge_registers();
+  v->merge_registers_alternative();
v->renumber_registers();
 
/* Write the END instruction. */
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa/st: glsl_to_tgsi Implement a new lifetime tracker for temporaries

2017-06-09 Thread Gert Wollny
This patch adds new classes and tests to implement a tracker for the
life time of temporary registers for the register renaming stage of
glsl_to_tgsi. The tracker aims at estimating the shortest possible
life time for each register. The code base requires c++11, the flag is
propagated from the LLVM_CXXFLAGS.
---
 configure.ac   |   1 +
 src/mesa/Makefile.am   |   4 +-
 src/mesa/Makefile.sources  |   2 +
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 551 ++
 .../state_tracker/st_glsl_to_tgsi_temprename.h | 114 +++
 src/mesa/state_tracker/tests/Makefile.am   |  40 ++
 src/mesa/state_tracker/tests/st-renumerate-test| 210 ++
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 789 +
 8 files changed, 1709 insertions(+), 2 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
 create mode 100644 src/mesa/state_tracker/tests/Makefile.am
 create mode 100755 src/mesa/state_tracker/tests/st-renumerate-test
 create mode 100644 src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp

diff --git a/configure.ac b/configure.ac
index f379ba8573..579e159420 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2827,6 +2827,7 @@ AC_CONFIG_FILES([Makefile
src/mesa/drivers/osmesa/osmesa.pc
src/mesa/drivers/x11/Makefile
src/mesa/main/tests/Makefile
+   src/mesa/state_tracker/tests/Makefile
src/util/Makefile
src/util/tests/hash_table/Makefile
src/vulkan/Makefile])
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 53f311d2a9..72ffd61212 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -19,7 +19,7 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = . main/tests
+SUBDIRS = . main/tests state_tracker/tests
 
 if HAVE_XLIB_GLX
 SUBDIRS += drivers/x11
@@ -101,7 +101,7 @@ AM_CFLAGS = \
$(VISIBILITY_CFLAGS) \
$(MSVC2013_COMPAT_CFLAGS)
 AM_CXXFLAGS = \
-   $(LLVM_CFLAGS) \
+$(LLVM_CXXFLAGS) \
$(VISIBILITY_CXXFLAGS) \
$(MSVC2013_COMPAT_CXXFLAGS)
 
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 4450d80090..908d1acff6 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -507,6 +507,8 @@ STATETRACKER_FILES = \
state_tracker/st_glsl_to_tgsi.h \
state_tracker/st_glsl_to_tgsi_private.cpp \
state_tracker/st_glsl_to_tgsi_private.h \
+state_tracker/st_glsl_to_tgsi_temprename.cpp \
+   state_tracker/st_glsl_to_tgsi_temprename.h \
state_tracker/st_glsl_types.cpp \
state_tracker/st_glsl_types.h \
state_tracker/st_manager.c \
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
new file mode 100644
index 00..389a4b6b5f
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright © 2017 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "st_glsl_to_tgsi_temprename.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+using std::vector;
+using std::stack;
+using std::shared_ptr;
+using std::weak_ptr;
+using std::pair;
+using std::make_pair;
+using std::make_shared;
+using std::numeric_limits;
+
+tgsi_temp_lifetime::tgsi_temp_lifetime(exec_list *instructions, int ntemps):
+   lifetimes(ntemps)
+{
+   evaluate(instructions);
+}
+
+const std::vector >& tgsi_temp_lifetime::get_lifetimes() 
const
+{
+   return lifetimes;
+}
+
+void tgsi_temp_lifetime::evaluate(exec_list *instructions)
+{
+   int i = 0;
+   int loop_id = 0;

[Mesa-dev] [PATCH 1/3] mesa/st: glsl_to_tgsi move some helper classes to extra files

2017-06-09 Thread Gert Wollny
To prepare the implementation of a temp register lifetime tracker
some of the classes are moved into seperate header/implementation
files to make them accessible from other files.
---
 src/mesa/Makefile.sources  |   2 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 287 +
 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp | 241 +
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h   | 135 ++
 4 files changed, 381 insertions(+), 284 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 8a65fbe663..4450d80090 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -505,6 +505,8 @@ STATETRACKER_FILES = \
state_tracker/st_glsl_to_nir.cpp \
state_tracker/st_glsl_to_tgsi.cpp \
state_tracker/st_glsl_to_tgsi.h \
+   state_tracker/st_glsl_to_tgsi_private.cpp \
+   state_tracker/st_glsl_to_tgsi_private.h \
state_tracker/st_glsl_types.cpp \
state_tracker/st_glsl_types.h \
state_tracker/st_manager.c \
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c5d2e0fcd2..0e7f4b646a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -55,6 +55,7 @@
 #include "st_glsl_types.h"
 #include "st_nir.h"
 #include "st_shader_cache.h"
+#include "st_glsl_to_tgsi_private.h"
 
 #include "util/hash_table.h"
 #include 
@@ -65,251 +66,8 @@
 
 #define MAX_GLSL_TEXTURE_OFFSET 4
 
-class st_src_reg;
-class st_dst_reg;
+extern int swizzle_for_size(int size); 
 
-static int swizzle_for_size(int size);
-
-static int swizzle_for_type(const glsl_type *type, int component = 0)
-{
-   unsigned num_elements = 4;
-
-   if (type) {
-  type = type->without_array();
-  if (type->is_scalar() || type->is_vector() || type->is_matrix())
- num_elements = type->vector_elements;
-   }
-
-   int swizzle = swizzle_for_size(num_elements);
-   assert(num_elements + component <= 4);
-
-   swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
-   return swizzle;
-}
-
-/**
- * This struct is a corresponding struct to TGSI ureg_src.
- */
-class st_src_reg {
-public:
-   st_src_reg(gl_register_file file, int index, const glsl_type *type,
-  int component = 0, unsigned array_id = 0)
-   {
-  assert(file != PROGRAM_ARRAY || array_id != 0);
-  this->file = file;
-  this->index = index;
-  this->swizzle = swizzle_for_type(type, component);
-  this->negate = 0;
-  this->abs = 0;
-  this->index2D = 0;
-  this->type = type ? type->base_type : GLSL_TYPE_ERROR;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = array_id;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
-   {
-  assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
-  this->type = type;
-  this->file = file;
-  this->index = index;
-  this->index2D = 0;
-  this->swizzle = SWIZZLE_XYZW;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int 
index2D)
-   {
-  assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
-  this->type = type;
-  this->file = file;
-  this->index = index;
-  this->index2D = index2D;
-  this->swizzle = SWIZZLE_XYZW;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   st_src_reg()
-   {
-  this->type = GLSL_TYPE_ERROR;
-  this->file = PROGRAM_UNDEFINED;
-  this->index = 0;
-  this->index2D = 0;
-  this->swizzle = 0;
-  this->negate = 0;
-  this->abs = 0;
-  this->reladdr = NULL;
-  this->reladdr2 = NULL;
-  this->has_index2 = false;
-  this->double_reg2 = false;
-  this->array_id = 0;
-  this->is_double_vertex_input = false;
-   }
-
-   explicit st_src_reg(st_dst_reg reg);
-
-   int32_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
-   int16_t index2D;
-   uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
-   int negate:4; /**< NEGATE_XYZW mask from mesa */
-   unsigned abs:1;
-   enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum 
glsl_base_type) */
-   unsigned has_index2:1;
-   gl_register_file file:5; /**< 

[Mesa-dev] [PATCH 0/3] [RFC] mesa/st: glsl_to_tgsi: improved temp-reg lifetime estimation

2017-06-09 Thread Gert Wollny
Dear all,

as I wrote before, I was looking into the temporary register renaming.

This series of patches implements a new approach that achieves a tigher
estimation of the life time of the temporaries, and as a result the Piano
and Voloplosion benchmarks implemented in gputest [1] now work. Before
they failed with "r600_pipe_shader_create - translation from TGSI failed!"

Piglit shows 7 fixes and 6 regressions compared to git 8fac894f, but they don't
seem to be related to shaders. I've also tested other programs like the 
unignie-*
benchmarks and they didn't show regressions.

I think that the patch will need a few more iterations to remove code 
duplication
and generally adhere to the mesa style, but I think it is atthe point where I 
could
need a bit of feedback to get it into shape to be acceptable, and I'd also like 
to
mention that since I'm new to mesa this I have no commit rights.

many thanks,
Gert

[1] http://www.geeks3d.com/gputest/

Gert Wollny (3):
  mesa/st: glsl_to_tgsi move some helper classes to extra files
  mesa/st: glsl_to_tgsi Implement a new lifetime tracker for temporaries
  mesa/st: glsl_to_tgsi: tie in the new register renaming approach

 configure.ac   |   1 +
 src/mesa/Makefile.am   |   4 +-
 src/mesa/Makefile.sources  |   4 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 +---
 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp | 241 +++
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h   | 135 
 .../state_tracker/st_glsl_to_tgsi_temprename.cpp   | 551 ++
 .../state_tracker/st_glsl_to_tgsi_temprename.h | 114 +++
 src/mesa/state_tracker/tests/Makefile.am   |  40 ++
 src/mesa/state_tracker/tests/st-renumerate-test| 210 ++
 .../tests/test_glsl_to_tgsi_lifetime.cpp   | 789 +
 11 files changed, 2104 insertions(+), 287 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_private.h
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi_temprename.h
 create mode 100644 src/mesa/state_tracker/tests/Makefile.am
 create mode 100755 src/mesa/state_tracker/tests/st-renumerate-test
 create mode 100644 src/mesa/state_tracker/tests/test_glsl_to_tgsi_lifetime.cpp

-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] i965/miptree: Separate src and dst slice specifiers in slice copy

2017-06-09 Thread Jason Ekstrand

The existence of this software fallback makes me sad .


On June 9, 2017 7:05:33 AM Topi Pohjolainen  wrote:


Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 60 ---
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index f8fdde7..a4b2aeb 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1205,26 +1205,25 @@ intel_miptree_get_tile_offsets(const struct 
intel_mipmap_tree *mt,


 static void
 intel_miptree_copy_slice_sw(struct brw_context *brw,
-struct intel_mipmap_tree *dst_mt,
 struct intel_mipmap_tree *src_mt,
-int level,
-int slice,
-int width,
-int height)
+unsigned src_level, unsigned src_layer,
+struct intel_mipmap_tree *dst_mt,
+unsigned dst_level, unsigned dst_layer,
+unsigned width, unsigned height)
 {
void *src, *dst;
ptrdiff_t src_stride, dst_stride;
int cpp = dst_mt->cpp;

intel_miptree_map(brw, src_mt,
- level, slice,
+ src_level, src_layer,
  0, 0,
  width, height,
  GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
  , _stride);

intel_miptree_map(brw, dst_mt,
- level, slice,
+ dst_level, dst_layer,
  0, 0,
  width, height,
  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
@@ -1250,8 +1249,8 @@ intel_miptree_copy_slice_sw(struct brw_context *brw,
   }
}

-   intel_miptree_unmap(brw, dst_mt, level, slice);
-   intel_miptree_unmap(brw, src_mt, level, slice);
+   intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
+   intel_miptree_unmap(brw, src_mt, src_level, src_layer);

/* Don't forget to copy the stencil data over, too.  We could have skipped
 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
@@ -1260,23 +1259,28 @@ intel_miptree_copy_slice_sw(struct brw_context *brw,
 */
if (dst_mt->stencil_mt) {
   assert(src_mt->stencil_mt);
-  intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
-  level, slice, width, height);
+  intel_miptree_copy_slice_sw(brw,
+  src_mt->stencil_mt, src_level, src_layer,
+  dst_mt->stencil_mt, dst_level, dst_layer,
+  width, height);
}
 }

 static void
 intel_miptree_copy_slice(struct brw_context *brw,
-struct intel_mipmap_tree *dst_mt,
-struct intel_mipmap_tree *src_mt,
-unsigned level, unsigned slice)
+ struct intel_mipmap_tree *src_mt,
+ unsigned src_level, unsigned src_layer,
+ struct intel_mipmap_tree *dst_mt,
+ unsigned dst_level, unsigned dst_layer)

 {
+   uint32_t width = minify(src_mt->physical_width0,
+   src_level - src_mt->first_level);
+   uint32_t height = minify(src_mt->physical_height0,
+src_level - src_mt->first_level);
mesa_format format = src_mt->format;
-   uint32_t width = minify(src_mt->physical_width0, level - 
src_mt->first_level);
-   uint32_t height = minify(src_mt->physical_height0, level - 
src_mt->first_level);


-   assert(slice < src_mt->level[level].depth);
+   assert(src_layer < src_mt->level[src_level].depth);
assert(src_mt->format == dst_mt->format);

if (dst_mt->compressed) {
@@ -1292,15 +1296,17 @@ intel_miptree_copy_slice(struct brw_context *brw,
 */
if (src_mt->stencil_mt) {
   intel_miptree_copy_slice_sw(brw,
-  dst_mt, src_mt,
-  level, slice,
+  src_mt, src_level, src_layer,
+  dst_mt, dst_level, dst_layer,
   width, height);
   return;
}

uint32_t dst_x, dst_y, src_x, src_y;
-   intel_miptree_get_image_offset(dst_mt, level, slice, _x, _y);
-   intel_miptree_get_image_offset(src_mt, level, slice, _x, _y);
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
+  _x, _y);
+   intel_miptree_get_image_offset(src_mt, src_level, src_layer,
+  _x, _y);

DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",

Re: [Mesa-dev] [PATCH 1/2] i965: include gen4_blorp_exec.h into EXTRA_DIST

2017-06-09 Thread Jason Ekstrand

Reviewed-by: Jason Ekstrand 


On June 9, 2017 9:01:38 AM "Juan A. Suarez Romero"  wrote:


Otherwise, `make distcheck` will fail.
---
 src/mesa/drivers/dri/i965/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/Makefile.am 
b/src/mesa/drivers/dri/i965/Makefile.am

index 762aefc..e2d5992 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -98,6 +98,7 @@ BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)

 EXTRA_DIST = \
+   gen4_blorp_exec.h \
brw_oa_hsw.xml \
brw_oa.py

--
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/24] i965/cnl: Update few assertions

2017-06-09 Thread Jason Ekstrand

On June 9, 2017 2:41:46 PM Anuj Phogat  wrote:


On Thu, Jun 8, 2017 at 5:07 PM, Jason Ekstrand  wrote:

On Mon, May 15, 2017 at 10:05 AM, Anuj Phogat  wrote:




On Sat, May 13, 2017 at 9:43 AM, Jason Ekstrand 
wrote:


On May 12, 2017 4:41:36 PM Anuj Phogat  wrote:


Signed-off-by: Anuj Phogat 
---
 src/intel/compiler/brw_compiler.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_program.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h
b/src/intel/compiler/brw_compiler.h
index b5b1ee9..d6bbda1 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1042,7 +1042,7 @@ brw_stage_has_packed_dispatch(const struct
gen_device_info *devinfo,
 * to do a full test run with brw_fs_test_dispatch_packing() hooked
up to
 * the NIR front-end before changing this assertion.
 */
-   assert(devinfo->gen <= 9);
+   assert(devinfo->gen <= 10);



Did you actually do the test described in the above comment?


I've taken this change out of Ben's "[PATCH 08/12] i965/cnl: Add a
preliminary device for CNL"
and I doubt he got the chance to run the test. Adding him in Cc. I'll run
the test and post the
update here. Thanks for catching it.



Has anything happened here?


I did the test with full piglit run. Observed no GPU hangs.


Fantastic.  In that case, this patch is

Reviewed-by: Jason Ekstrand 

Let's land Canon Lake support!






switch (stage) {
case MESA_SHADER_FRAGMENT: {
diff --git a/src/mesa/drivers/dri/i965/brw_program.c
b/src/mesa/drivers/dri/i965/brw_program.c
index d26dce0..f442d55 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -290,7 +290,7 @@ brw_memory_barrier(struct gl_context *ctx,
GLbitfield barriers)
unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
 PIPE_CONTROL_NO_WRITE |
 PIPE_CONTROL_CS_STALL);
-   assert(brw->gen >= 7 && brw->gen <= 9);
+   assert(brw->gen >= 7 && brw->gen <= 10);

if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
GL_ELEMENT_ARRAY_BARRIER_BIT |
--
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev











___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101334] Any vulkan app seems to freeze the system

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101334

John  changed:

   What|Removed |Added

 CC||airl...@freedesktop.org

--- Comment #5 from John  ---
Adding Dave.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101334] Any vulkan app seems to freeze the system

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101334

--- Comment #4 from John  ---
Alright, after bisecting here's the problematic commit:

https://cgit.freedesktop.org/mesa/mesa/commit/?id=bcae3274692954ad2cd6dfc253579ec98d50856f

Thanks!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 00/24] Add Cannonlake support

2017-06-09 Thread Anuj Phogat
On Thu, Jun 8, 2017 at 5:23 PM, Jason Ekstrand  wrote:
> I think I've now reviewed everything except 2 patches.  For the "Update a
> few assertions" patch, you said you would run a test but never reported back
> the results.  The other is the patch for re-enabling sRGB fast-clears.  That
> one isn't needed for enabling and I'm not yet convinced that it's removing
> enough code.  I need to understand Sky lake sRGB myself before I can really
> review anything.  Also, at Chad's request, I'll probably be adjusting the
> way that code works to be based on the ISL table at which point, re-enabling
> sRGB will just naturally fall out as a result of the ISL format table
> update.
>
Thanks for reviewing rest of the patches. I'll hold back re-enabling sRGB
fast-clears patch. I've also posted new comments on "Update a few assertions"
patch.

> On Mon, Jun 5, 2017 at 10:04 AM, Anuj Phogat  wrote:
>>
>> For your reference, here is a list of patches pending review in this
>> series:
>> 3, 18, 19, 22, 23, 24.5.
>>
>> Thanks
>> Anuj
>>
>> On Fri, Jun 2, 2017 at 5:48 PM, Anuj Phogat  wrote:
>> > On Fri, Jun 2, 2017 at 4:48 PM, Jason Ekstrand 
>> > wrote:
>> >> On Mon, May 22, 2017 at 9:32 AM, Anuj Phogat 
>> >> wrote:
>> >>>
>> >>> On Fri, May 12, 2017 at 4:38 PM, Anuj Phogat 
>> >>> wrote:
>> >>> > This series adds support for Cannonlake.
>> >>> >
>> >>> > Changes from V1 to V2:
>> >>> > - Incorporated the review comments from V1.
>> >>> > - Rebased 8 months old CNL branch on top of master
>> >>> > - Wired up Linux and Android build files for gen10
>> >>> > - Replaced the use of few gen9 functions with gen10 specific
>> >>> > functions.
>> >>> > - Squashed few patches, dropped few and created new patches.
>> >>> >
>> >>> Thanks to Jason and Ken who have reviewed few patches in this series.
>> >>> Rest of them are still waiting for the review. I really want to land
>> >>> this
>> >>> series
>> >>> (at least first 15-16 patches) soon. There are some very easy patches
>> >>> any
>> >>> one can review like enabling Mesa to build for gen10 etc. Please take
>> >>> a
>> >>> look at them. Thanks :).
>> >>
>> >>
>> >> Finally got around to looking at these again...
>> >>
>> >> Now that we're switching everything over to genxml, I think it's a good
>> >> idea
>> >> to be a bit more intentional in the way we write new platform patches.
>> >> There are a number of patches in this series that are much harder to
>> >> review
>> >> than they need to be because they're written more-or-less in order of
>> >> code
>> >> development and not in a logical reviewable order.  In particular,
>> >> there's a
>> >> patch which updates a pile of switch statements to get rid of asserts
>> >> but it
>> >> just moves them all over to gen9.  Moving stuff to gen10 is in a
>> >> different
>> >> patch.  The result is that it's very hard, without squashing things
>> >> together, to tell whether or not we missed anything when we switched
>> >> them
>> >> over to actual gen10 functions.  This isn't really a criticism of Anuj
>> >> and
>> >> Ben.  They've done a lot of rebasaing on top of a lot of driver
>> >> architecture
>> >> changes.  I think this will be much easier to do better in the future.
>> >>
>> >> In my view, the ideal platform enabling patch series would look
>> >> something
>> >> like this:
>> >>
>> >>  1) Add genN.xml
>> > Does it make sense to break it down in to few patches based on manual
>> > changes we make to an auto generated genN.xml ? or send out just one
>> > patch and reviewer can diff it with previous gen and verify the changes
>> > ?
>> >
>> >>  2) Add the #defines and #includes to genxml and the build system stuff
>> >> to
>> >> generate the packing headers
>> >>  3) Update stuff in src/intel/common such as URB configuration changes
>> >>  4) Update ISL:
>> >> a) Any needed generic ISL changes such as adding new layaouts.
>> >> (Cannon
>> >> lake doesn't add anything, so nothing to do here).  This may be
>> >> multiple
>> >> patches.
>> >> b) Get ISL surface state emit code building for the new hardware.
>> >> This
>> >> includes updating the autotools and Android makefiles, adding function
>> >> prototypes, updating switch statements, etc.  If changes are needed in
>> >> isl_surface_state.c or isl_depth_stencil.c, they should be minimal bug
>> >> still
>> >> enough that the end result is correct.
>> >> 5) Update BLORP as needed for the new platform.  Sadly, there's no way
>> >> to
>> >> build-test this without the next step since BLORP doesn't build its own
>> >> genX
>> >> files.
>> >> 6) Get GL driver genxml state-upload and blorp code building and hooked
>> >> in.
>> >> Core blorp changes should go in their own patch (above) but this will
>> >> include the build system changes for blorp as well.  This also includes
>> >> updating switch statements.
>> >> 8) Implement workarounds, 

Re: [Mesa-dev] [PATCH 13/24] i965/cnl: Update few assertions

2017-06-09 Thread Anuj Phogat
On Thu, Jun 8, 2017 at 5:07 PM, Jason Ekstrand  wrote:
> On Mon, May 15, 2017 at 10:05 AM, Anuj Phogat  wrote:
>>
>>
>>
>> On Sat, May 13, 2017 at 9:43 AM, Jason Ekstrand 
>> wrote:
>>>
>>> On May 12, 2017 4:41:36 PM Anuj Phogat  wrote:
>>>
 Signed-off-by: Anuj Phogat 
 ---
  src/intel/compiler/brw_compiler.h   | 2 +-
  src/mesa/drivers/dri/i965/brw_program.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

 diff --git a/src/intel/compiler/brw_compiler.h
 b/src/intel/compiler/brw_compiler.h
 index b5b1ee9..d6bbda1 100644
 --- a/src/intel/compiler/brw_compiler.h
 +++ b/src/intel/compiler/brw_compiler.h
 @@ -1042,7 +1042,7 @@ brw_stage_has_packed_dispatch(const struct
 gen_device_info *devinfo,
  * to do a full test run with brw_fs_test_dispatch_packing() hooked
 up to
  * the NIR front-end before changing this assertion.
  */
 -   assert(devinfo->gen <= 9);
 +   assert(devinfo->gen <= 10);
>>>
>>>
>>> Did you actually do the test described in the above comment?
>>
>> I've taken this change out of Ben's "[PATCH 08/12] i965/cnl: Add a
>> preliminary device for CNL"
>> and I doubt he got the chance to run the test. Adding him in Cc. I'll run
>> the test and post the
>> update here. Thanks for catching it.
>
>
> Has anything happened here?
>
I did the test with full piglit run. Observed no GPU hangs.

>>>
>>>

 switch (stage) {
 case MESA_SHADER_FRAGMENT: {
 diff --git a/src/mesa/drivers/dri/i965/brw_program.c
 b/src/mesa/drivers/dri/i965/brw_program.c
 index d26dce0..f442d55 100644
 --- a/src/mesa/drivers/dri/i965/brw_program.c
 +++ b/src/mesa/drivers/dri/i965/brw_program.c
 @@ -290,7 +290,7 @@ brw_memory_barrier(struct gl_context *ctx,
 GLbitfield barriers)
 unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
  PIPE_CONTROL_NO_WRITE |
  PIPE_CONTROL_CS_STALL);
 -   assert(brw->gen >= 7 && brw->gen <= 9);
 +   assert(brw->gen >= 7 && brw->gen <= 10);

 if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
 GL_ELEMENT_ARRAY_BARRIER_BIT |
 --
 2.9.3

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>>
>>>
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Call intel_prepare_render() from intel_update_state()

2017-06-09 Thread Kenneth Graunke
The resolve code looks at the current color draw buffers.  These are not
valid until intel_prepare_render() is called.  You can end up with one
color buffer bound, but where the renderbuffer has zero width/height and
no miptree allocated.

You can get a call chain like: _mesa_Clear -> _mesa_update_state ->
intel_update_state, where no brw driver hooks were called, so there is
no other point at which we could have called this.

Fixes crashes in KWin where Clear was causing intel_disable_rb_aux_buffer
to crash on irb != NULL but irb->mt == NULL.

Tested-by: Tobias Klausmann 
Cc: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/brw_context.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 6d27866fcea..5433f9080ee 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -201,6 +201,8 @@ intel_update_state(struct gl_context * ctx)
 
_mesa_unlock_context_textures(ctx);
 
+   intel_prepare_render(brw);
+
/* Resolve the depth buffer's HiZ buffer. */
depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
if (depth_irb && depth_irb->mt) {
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101338] Mesa software rendering draws incompletely on Raspberry Pi

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101338

--- Comment #5 from Lloyd Wood  ---
Brian,

Thanks for checking this out. Which versions of Geomview and SaVi did you
install?

Seeing a solid blue sphere on your Intel system suggests either that
texturemapping is not used by default in SaVi (in SaVi versions earlier than
1.5.0 - play with the Rendering/map menu options, where you can select a
blue/yellow sphere or a more faithful realistic rendition) or that Geomview was
built without OpenGL support, and is only drawing using X. OpenGL is both a
build option and a command-line switch to turn it off in Geomview; configure
--with-opengl=DIR if building Geomview yourself.

To really show if OpenGL support is there and give it a workout, please open
Views/Global Coverage... in SaVi, select a map size from the popup dialog, then
turn on texturemapping from the end of the coverage window's Rendering window
and select the Views/>> Forwards... menu option to send the redrawn map bitmap
through to Geomview to wrap on the sphere during the animation. You'll see the
kind of thing shown in the top screenshots at
http://savi.sourceforge.net/papers/

The Raspberry Pi Model B problem affects all versions of SaVi I've tried on it,
and all recent versions of Geomview I've seen there are built with OpenGL,
which is why I didn't feel the need to be more detailed in my initial report,
sorry. I've never seen a similar rendering problem on the Intel systems I've
used this on, in over fifteen years (on Redhat, Cygwin, Ubuntu.)

A photo of SaVi/Geomview running on PIXEL 1.2 on the Pi Model B, showing the
rendering problem clearly, is at:
http://personal.ee.surrey.ac.uk/Personal/L.Wood/software/SaVi/images/raspberry-pi-model-b-software-mesa.jpeg

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101338] Mesa software rendering draws incompletely on Raspberry Pi

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101338

--- Comment #4 from Roland Scheidegger  ---
The original Raspberry Pi IIRC doesn't even support NEON. Theoretically, llvm
should still support all the vector instructions by decomposing them into
scalar ones, but if that really works correctly? I know it didn't work some
time ago (at all) on x86, so I wouldn't be surprised if things get miscompiled
on arm neither.

(And Brian is right, llvmpipe isn't all that well tested on anything but x86.
In theory it should work alright at least on all little endian archs, though
you really want to have a cpu with vector instructions, otherwise it not only
will be slow but there's a whole another set of potential issues with llvm code
generation. Possibly a newer llvm version could help, albeit 3.9 isn't all that
old.)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/2] r600g, compute: provide local copy of ac_binary.{h, c}

2017-06-09 Thread Jan Vesely
On Fri, 2017-06-09 at 13:44 -0500, Aaron Watry wrote:
> On Fri, Jun 9, 2017 at 12:36 PM, Jan Vesely  wrote:
> > On Fri, 2017-06-09 at 10:12 -0500, Aaron Watry wrote:
> > > On Fri, Jun 9, 2017 at 8:20 AM, Jan Vesely  wrote:
> > > > This is a verbatim copy of the code. The functions can be cleaned up 
> > > > since
> > > > r600 does not use all the stuff that gcn does.
> > > > The symbol names have been changed since we still use ac_binary.h header
> > > > (for struct definition)
> > > > 
> > > > Signed-off-by: Jan Vesely 
> > > > ---
> > > > Emil, Aaron,
> > > > 
> > > > this is the last patch to get rid of libamd_common dependency (and thus 
> > > > libdrm_amdgpu). I have only remote access to the machine atm, so it's 
> > > > compile tested only.
> > > > 
> > > > Jan
> > > > 
> > > >  configure.ac   |   5 +-
> > > >  src/gallium/drivers/r600/Automake.inc  |  10 +-
> > > >  src/gallium/drivers/r600/Makefile.am   |   2 +
> > > >  src/gallium/drivers/r600/evergreen_compute.c   | 197 
> > > > -
> > > >  .../drivers/r600/evergreen_compute_internal.h  |   2 +-
> > > >  src/gallium/drivers/radeon/r600_pipe_common.c  |  21 ---
> > > >  src/gallium/drivers/radeon/r600_pipe_common.h  |   5 -
> > > >  src/gallium/targets/pipe-loader/Makefile.am|  10 +-
> > > >  8 files changed, 200 insertions(+), 52 deletions(-)
> > > > 
> > > > diff --git a/configure.ac b/configure.ac
> > > > index 9433e3c..fc4a58f 100644
> > > > --- a/configure.ac
> > > > +++ b/configure.ac
> > > > @@ -2631,10 +2631,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test 
> > > > x$HAVE_SWRAST_DRI = xyes)
> > > >  AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
> > > >  AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
> > > > 
> > > > -# FIXME: r600g still depends and amd_common (ac_binary*) when building 
> > > > OpenCL
> > > > -AM_CONDITIONAL(HAVE_AMD_DRIVERS, test \( "x$HAVE_GALLIUM_R600" = xyes 
> > > > -a \
> > > > -  "x$enable_opencl" = xyes \) -o \
> > > > -  "x$HAVE_GALLIUM_RADEONSI" = xyes 
> > > > -o \
> > > > +AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes 
> > > > -o \
> > > >"x$HAVE_RADEON_VULKAN" = xyes)
> > > > 
> > > >  AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes 
> > > > -o \
> > > > diff --git a/src/gallium/drivers/r600/Automake.inc 
> > > > b/src/gallium/drivers/r600/Automake.inc
> > > > index 642d527..bb9f6ec 100644
> > > > --- a/src/gallium/drivers/r600/Automake.inc
> > > > +++ b/src/gallium/drivers/r600/Automake.inc
> > > > @@ -5,18 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
> > > >  TARGET_LIB_DEPS += \
> > > > $(top_builddir)/src/gallium/drivers/r600/libr600.la \
> > > > $(RADEON_LIBS) \
> > > > -   $(LIBDRM_LIBS)
> > > > +   $(LIBDRM_LIBS) \
> > > > +   $(LIBELF_LIBS)
> > > > 
> > > >  TARGET_RADEON_WINSYS = \
> > > > $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
> > > > 
> > > >  TARGET_RADEON_COMMON = \
> > > > $(top_builddir)/src/gallium/drivers/radeon/libradeon.la
> > > > -
> > > > -# TODO: drop this dependency. libamd_common requires libdrm_amdgpu.
> > > > -if HAVE_AMD_DRIVERS
> > > > -TARGET_RADEON_COMMON += \
> > > > -   $(top_builddir)/src/amd/common/libamd_common.la
> > > > -endif
> > > > -
> > > >  endif
> > > > diff --git a/src/gallium/drivers/r600/Makefile.am 
> > > > b/src/gallium/drivers/r600/Makefile.am
> > > > index 44fd51d..fbfb6e6 100644
> > > > --- a/src/gallium/drivers/r600/Makefile.am
> > > > +++ b/src/gallium/drivers/r600/Makefile.am
> > > > @@ -9,11 +9,13 @@ BUILT_SOURCES = $(R600_GENERATED_FILES)
> > > >  AM_CFLAGS = \
> > > > $(GALLIUM_DRIVER_CFLAGS) \
> > > > $(RADEON_CFLAGS) \
> > > > +   $(LIBELF_CFLAGS) \
> > > > -I$(top_srcdir)/src/amd/common
> > > > 
> > > >  AM_CXXFLAGS = \
> > > > $(GALLIUM_DRIVER_CXXFLAGS) \
> > > > $(RADEON_CFLAGS) \
> > > > +   $(LIBELF_CFLAGS) \
> > > > -I$(top_srcdir)/src/amd/common
> > > > 
> > > >  noinst_LTLIBRARIES = libr600.la
> > > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> > > > b/src/gallium/drivers/r600/evergreen_compute.c
> > > > index d30024d..69a6d8b 100644
> > > > --- a/src/gallium/drivers/r600/evergreen_compute.c
> > > > +++ b/src/gallium/drivers/r600/evergreen_compute.c
> > > > @@ -24,9 +24,10 @@
> > > >   *  Adam Rak 
> > > >   */
> > > > 
> > > > +#include 
> > > > +#include 
> > > >  #include 
> > > >  #include 
> > > > -#include "ac_binary.h"
> > > >  #include "pipe/p_defines.h"
> > > >  #include "pipe/p_state.h"
> > > >  #include "pipe/p_context.h"
> > > > @@ -179,6 +180,192 @@ static void 
> > > > 

Re: [Mesa-dev] [PATCH 3/2] r600g, compute: provide local copy of ac_binary.{h, c}

2017-06-09 Thread Aaron Watry
On Fri, Jun 9, 2017 at 12:36 PM, Jan Vesely  wrote:
> On Fri, 2017-06-09 at 10:12 -0500, Aaron Watry wrote:
>> On Fri, Jun 9, 2017 at 8:20 AM, Jan Vesely  wrote:
>> > This is a verbatim copy of the code. The functions can be cleaned up since
>> > r600 does not use all the stuff that gcn does.
>> > The symbol names have been changed since we still use ac_binary.h header
>> > (for struct definition)
>> >
>> > Signed-off-by: Jan Vesely 
>> > ---
>> > Emil, Aaron,
>> >
>> > this is the last patch to get rid of libamd_common dependency (and thus 
>> > libdrm_amdgpu). I have only remote access to the machine atm, so it's 
>> > compile tested only.
>> >
>> > Jan
>> >
>> >  configure.ac   |   5 +-
>> >  src/gallium/drivers/r600/Automake.inc  |  10 +-
>> >  src/gallium/drivers/r600/Makefile.am   |   2 +
>> >  src/gallium/drivers/r600/evergreen_compute.c   | 197 
>> > -
>> >  .../drivers/r600/evergreen_compute_internal.h  |   2 +-
>> >  src/gallium/drivers/radeon/r600_pipe_common.c  |  21 ---
>> >  src/gallium/drivers/radeon/r600_pipe_common.h  |   5 -
>> >  src/gallium/targets/pipe-loader/Makefile.am|  10 +-
>> >  8 files changed, 200 insertions(+), 52 deletions(-)
>> >
>> > diff --git a/configure.ac b/configure.ac
>> > index 9433e3c..fc4a58f 100644
>> > --- a/configure.ac
>> > +++ b/configure.ac
>> > @@ -2631,10 +2631,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test 
>> > x$HAVE_SWRAST_DRI = xyes)
>> >  AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
>> >  AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
>> >
>> > -# FIXME: r600g still depends and amd_common (ac_binary*) when building 
>> > OpenCL
>> > -AM_CONDITIONAL(HAVE_AMD_DRIVERS, test \( "x$HAVE_GALLIUM_R600" = xyes -a \
>> > -  "x$enable_opencl" = xyes \) -o \
>> > -  "x$HAVE_GALLIUM_RADEONSI" = xyes -o 
>> > \
>> > +AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o 
>> > \
>> >"x$HAVE_RADEON_VULKAN" = xyes)
>> >
>> >  AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
>> > diff --git a/src/gallium/drivers/r600/Automake.inc 
>> > b/src/gallium/drivers/r600/Automake.inc
>> > index 642d527..bb9f6ec 100644
>> > --- a/src/gallium/drivers/r600/Automake.inc
>> > +++ b/src/gallium/drivers/r600/Automake.inc
>> > @@ -5,18 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
>> >  TARGET_LIB_DEPS += \
>> > $(top_builddir)/src/gallium/drivers/r600/libr600.la \
>> > $(RADEON_LIBS) \
>> > -   $(LIBDRM_LIBS)
>> > +   $(LIBDRM_LIBS) \
>> > +   $(LIBELF_LIBS)
>> >
>> >  TARGET_RADEON_WINSYS = \
>> > $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
>> >
>> >  TARGET_RADEON_COMMON = \
>> > $(top_builddir)/src/gallium/drivers/radeon/libradeon.la
>> > -
>> > -# TODO: drop this dependency. libamd_common requires libdrm_amdgpu.
>> > -if HAVE_AMD_DRIVERS
>> > -TARGET_RADEON_COMMON += \
>> > -   $(top_builddir)/src/amd/common/libamd_common.la
>> > -endif
>> > -
>> >  endif
>> > diff --git a/src/gallium/drivers/r600/Makefile.am 
>> > b/src/gallium/drivers/r600/Makefile.am
>> > index 44fd51d..fbfb6e6 100644
>> > --- a/src/gallium/drivers/r600/Makefile.am
>> > +++ b/src/gallium/drivers/r600/Makefile.am
>> > @@ -9,11 +9,13 @@ BUILT_SOURCES = $(R600_GENERATED_FILES)
>> >  AM_CFLAGS = \
>> > $(GALLIUM_DRIVER_CFLAGS) \
>> > $(RADEON_CFLAGS) \
>> > +   $(LIBELF_CFLAGS) \
>> > -I$(top_srcdir)/src/amd/common
>> >
>> >  AM_CXXFLAGS = \
>> > $(GALLIUM_DRIVER_CXXFLAGS) \
>> > $(RADEON_CFLAGS) \
>> > +   $(LIBELF_CFLAGS) \
>> > -I$(top_srcdir)/src/amd/common
>> >
>> >  noinst_LTLIBRARIES = libr600.la
>> > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
>> > b/src/gallium/drivers/r600/evergreen_compute.c
>> > index d30024d..69a6d8b 100644
>> > --- a/src/gallium/drivers/r600/evergreen_compute.c
>> > +++ b/src/gallium/drivers/r600/evergreen_compute.c
>> > @@ -24,9 +24,10 @@
>> >   *  Adam Rak 
>> >   */
>> >
>> > +#include 
>> > +#include 
>> >  #include 
>> >  #include 
>> > -#include "ac_binary.h"
>> >  #include "pipe/p_defines.h"
>> >  #include "pipe/p_state.h"
>> >  #include "pipe/p_context.h"
>> > @@ -179,6 +180,192 @@ static void evergreen_cs_set_constant_buffer(struct 
>> > r600_context *rctx,
>> >  #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
>> >
>> >  #ifdef HAVE_OPENCL
>> > +/*
>> > + * shader binary helpers.
>> > + */
>> > +static void r600_shader_binary_init(struct ac_shader_binary *b)
>> > +{
>> > +   memset(b, 0, sizeof(*b));
>> > +}
>> > +
>> > +static void r600_shader_binary_clean(struct ac_shader_binary *b)
>> > +{
>> > +   if (!b)
>> > +  

Re: [Mesa-dev] [PATCH] nir: make various getters take const pointers

2017-06-09 Thread Connor Abbott
Reviewed-by: Connor Abbott 

On Tue, Jun 6, 2017 at 4:25 PM, Grazvydas Ignotas  wrote:
> This will allow to constify other things.
>
> Signed-off-by: Grazvydas Ignotas 
> ---
>  src/compiler/nir/nir.h  | 25 +
>  src/compiler/nir/nir_lower_io.c |  2 +-
>  2 files changed, 14 insertions(+), 13 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 3b827bf..ab7ba14 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -436,19 +436,19 @@ nir_instr_prev(nir_instr *instr)
> else
>return exec_node_data(nir_instr, prev, node);
>  }
>
>  static inline bool
> -nir_instr_is_first(nir_instr *instr)
> +nir_instr_is_first(const nir_instr *instr)
>  {
> -   return exec_node_is_head_sentinel(exec_node_get_prev(>node));
> +   return exec_node_is_head_sentinel(exec_node_get_prev_const(>node));
>  }
>
>  static inline bool
> -nir_instr_is_last(nir_instr *instr)
> +nir_instr_is_last(const nir_instr *instr)
>  {
> -   return exec_node_is_tail_sentinel(exec_node_get_next(>node));
> +   return exec_node_is_tail_sentinel(exec_node_get_next_const(>node));
>  }
>
>  typedef struct nir_ssa_def {
> /** for debugging only, can be NULL */
> const char* name;
> @@ -802,11 +802,12 @@ void nir_alu_src_copy(nir_alu_src *dest, const 
> nir_alu_src *src,
>  void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
> nir_alu_instr *instr);
>
>  /* is this source channel used? */
>  static inline bool
> -nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned 
> channel)
> +nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
> +   unsigned channel)
>  {
> if (nir_op_infos[instr->op].input_sizes[src] > 0)
>return channel < nir_op_infos[instr->op].input_sizes[src];
>
> return (instr->dest.write_mask >> channel) & 1;
> @@ -1085,11 +1086,11 @@ typedef struct {
>  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
>
>
>  #define INTRINSIC_IDX_ACCESSORS(name, flag, type)
>  \
>  static inline type   
>  \
> -nir_intrinsic_##name(nir_intrinsic_instr *instr) 
>  \
> +nir_intrinsic_##name(const nir_intrinsic_instr *instr)   
>  \
>  {
>  \
> const nir_intrinsic_info *info = _intrinsic_infos[instr->intrinsic];  
>  \
> assert(info->index_map[NIR_INTRINSIC_##flag] > 0);
>  \
> return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; 
>  \
>  }
>  \
> @@ -1219,11 +1220,11 @@ typedef struct {
>  */
> nir_deref_var *sampler;
>  } nir_tex_instr;
>
>  static inline unsigned
> -nir_tex_instr_dest_size(nir_tex_instr *instr)
> +nir_tex_instr_dest_size(const nir_tex_instr *instr)
>  {
> switch (instr->op) {
> case nir_texop_txs: {
>unsigned ret;
>switch (instr->sampler_dim) {
> @@ -1268,11 +1269,11 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
>
>  /* Returns true if this texture operation queries something about the texture
>   * rather than actually sampling it.
>   */
>  static inline bool
> -nir_tex_instr_is_query(nir_tex_instr *instr)
> +nir_tex_instr_is_query(const nir_tex_instr *instr)
>  {
> switch (instr->op) {
> case nir_texop_txs:
> case nir_texop_lod:
> case nir_texop_texture_samples:
> @@ -1291,11 +1292,11 @@ nir_tex_instr_is_query(nir_tex_instr *instr)
>unreachable("Invalid texture opcode");
> }
>  }
>
>  static inline nir_alu_type
> -nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src)
> +nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
>  {
> switch (instr->src[src].src_type) {
> case nir_tex_src_coord:
>switch (instr->op) {
>case nir_texop_txf:
> @@ -1335,11 +1336,11 @@ nir_tex_instr_src_type(nir_tex_instr *instr, unsigned 
> src)
>unreachable("Invalid texture source type");
> }
>  }
>
>  static inline unsigned
> -nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
> +nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
>  {
> if (instr->src[src].src_type == nir_tex_src_coord)
>return instr->coord_components;
>
> /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */
> @@ -1357,11 +1358,11 @@ nir_tex_instr_src_size(nir_tex_instr *instr, unsigned 
> src)
>
> return 1;
>  }
>
>  static inline int
> -nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
> +nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
>  {
> for (unsigned i = 0; i < instr->num_srcs; i++)
>if (instr->src[i].src_type == type)
>   

Re: [Mesa-dev] [PATCH 3/2] r600g, compute: provide local copy of ac_binary.{h, c}

2017-06-09 Thread Jan Vesely
On Fri, 2017-06-09 at 10:12 -0500, Aaron Watry wrote:
> On Fri, Jun 9, 2017 at 8:20 AM, Jan Vesely  wrote:
> > This is a verbatim copy of the code. The functions can be cleaned up since
> > r600 does not use all the stuff that gcn does.
> > The symbol names have been changed since we still use ac_binary.h header
> > (for struct definition)
> > 
> > Signed-off-by: Jan Vesely 
> > ---
> > Emil, Aaron,
> > 
> > this is the last patch to get rid of libamd_common dependency (and thus 
> > libdrm_amdgpu). I have only remote access to the machine atm, so it's 
> > compile tested only.
> > 
> > Jan
> > 
> >  configure.ac   |   5 +-
> >  src/gallium/drivers/r600/Automake.inc  |  10 +-
> >  src/gallium/drivers/r600/Makefile.am   |   2 +
> >  src/gallium/drivers/r600/evergreen_compute.c   | 197 
> > -
> >  .../drivers/r600/evergreen_compute_internal.h  |   2 +-
> >  src/gallium/drivers/radeon/r600_pipe_common.c  |  21 ---
> >  src/gallium/drivers/radeon/r600_pipe_common.h  |   5 -
> >  src/gallium/targets/pipe-loader/Makefile.am|  10 +-
> >  8 files changed, 200 insertions(+), 52 deletions(-)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 9433e3c..fc4a58f 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -2631,10 +2631,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test 
> > x$HAVE_SWRAST_DRI = xyes)
> >  AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
> >  AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
> > 
> > -# FIXME: r600g still depends and amd_common (ac_binary*) when building 
> > OpenCL
> > -AM_CONDITIONAL(HAVE_AMD_DRIVERS, test \( "x$HAVE_GALLIUM_R600" = xyes -a \
> > -  "x$enable_opencl" = xyes \) -o \
> > -  "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
> > +AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
> >"x$HAVE_RADEON_VULKAN" = xyes)
> > 
> >  AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
> > diff --git a/src/gallium/drivers/r600/Automake.inc 
> > b/src/gallium/drivers/r600/Automake.inc
> > index 642d527..bb9f6ec 100644
> > --- a/src/gallium/drivers/r600/Automake.inc
> > +++ b/src/gallium/drivers/r600/Automake.inc
> > @@ -5,18 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
> >  TARGET_LIB_DEPS += \
> > $(top_builddir)/src/gallium/drivers/r600/libr600.la \
> > $(RADEON_LIBS) \
> > -   $(LIBDRM_LIBS)
> > +   $(LIBDRM_LIBS) \
> > +   $(LIBELF_LIBS)
> > 
> >  TARGET_RADEON_WINSYS = \
> > $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
> > 
> >  TARGET_RADEON_COMMON = \
> > $(top_builddir)/src/gallium/drivers/radeon/libradeon.la
> > -
> > -# TODO: drop this dependency. libamd_common requires libdrm_amdgpu.
> > -if HAVE_AMD_DRIVERS
> > -TARGET_RADEON_COMMON += \
> > -   $(top_builddir)/src/amd/common/libamd_common.la
> > -endif
> > -
> >  endif
> > diff --git a/src/gallium/drivers/r600/Makefile.am 
> > b/src/gallium/drivers/r600/Makefile.am
> > index 44fd51d..fbfb6e6 100644
> > --- a/src/gallium/drivers/r600/Makefile.am
> > +++ b/src/gallium/drivers/r600/Makefile.am
> > @@ -9,11 +9,13 @@ BUILT_SOURCES = $(R600_GENERATED_FILES)
> >  AM_CFLAGS = \
> > $(GALLIUM_DRIVER_CFLAGS) \
> > $(RADEON_CFLAGS) \
> > +   $(LIBELF_CFLAGS) \
> > -I$(top_srcdir)/src/amd/common
> > 
> >  AM_CXXFLAGS = \
> > $(GALLIUM_DRIVER_CXXFLAGS) \
> > $(RADEON_CFLAGS) \
> > +   $(LIBELF_CFLAGS) \
> > -I$(top_srcdir)/src/amd/common
> > 
> >  noinst_LTLIBRARIES = libr600.la
> > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> > b/src/gallium/drivers/r600/evergreen_compute.c
> > index d30024d..69a6d8b 100644
> > --- a/src/gallium/drivers/r600/evergreen_compute.c
> > +++ b/src/gallium/drivers/r600/evergreen_compute.c
> > @@ -24,9 +24,10 @@
> >   *  Adam Rak 
> >   */
> > 
> > +#include 
> > +#include 
> >  #include 
> >  #include 
> > -#include "ac_binary.h"
> >  #include "pipe/p_defines.h"
> >  #include "pipe/p_state.h"
> >  #include "pipe/p_context.h"
> > @@ -179,6 +180,192 @@ static void evergreen_cs_set_constant_buffer(struct 
> > r600_context *rctx,
> >  #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
> > 
> >  #ifdef HAVE_OPENCL
> > +/*
> > + * shader binary helpers.
> > + */
> > +static void r600_shader_binary_init(struct ac_shader_binary *b)
> > +{
> > +   memset(b, 0, sizeof(*b));
> > +}
> > +
> > +static void r600_shader_binary_clean(struct ac_shader_binary *b)
> > +{
> > +   if (!b)
> > +   return;
> > +   FREE(b->code);
> > +   FREE(b->config);
> > +   FREE(b->rodata);
> > +   FREE(b->global_symbol_offsets);
> > +   FREE(b->relocs);
> > +   

Re: [Mesa-dev] [PATCH] nir: make various getters take const pointers

2017-06-09 Thread Eric Engestrom
On Friday, 2017-06-09 20:23:04 +0300, Grazvydas Ignotas wrote:
> Ping. Boring patch, should be easy to review or NAK.

Don't know anything about NIR, so no clue if it's a good idea, but
I personally like `const` (I think it should've been the default :P)
and this patch looks good to me:
Reviewed-by: Eric Engestrom 

> 
> On Wed, Jun 7, 2017 at 2:25 AM, Grazvydas Ignotas  wrote:
> > This will allow to constify other things.
> >
> > Signed-off-by: Grazvydas Ignotas 
> > ---
> >  src/compiler/nir/nir.h  | 25 +
> >  src/compiler/nir/nir_lower_io.c |  2 +-
> >  2 files changed, 14 insertions(+), 13 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 3b827bf..ab7ba14 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -436,19 +436,19 @@ nir_instr_prev(nir_instr *instr)
> > else
> >return exec_node_data(nir_instr, prev, node);
> >  }
> >
> >  static inline bool
> > -nir_instr_is_first(nir_instr *instr)
> > +nir_instr_is_first(const nir_instr *instr)
> >  {
> > -   return exec_node_is_head_sentinel(exec_node_get_prev(>node));
> > +   return 
> > exec_node_is_head_sentinel(exec_node_get_prev_const(>node));
> >  }
> >
> >  static inline bool
> > -nir_instr_is_last(nir_instr *instr)
> > +nir_instr_is_last(const nir_instr *instr)
> >  {
> > -   return exec_node_is_tail_sentinel(exec_node_get_next(>node));
> > +   return 
> > exec_node_is_tail_sentinel(exec_node_get_next_const(>node));
> >  }
> >
> >  typedef struct nir_ssa_def {
> > /** for debugging only, can be NULL */
> > const char* name;
> > @@ -802,11 +802,12 @@ void nir_alu_src_copy(nir_alu_src *dest, const 
> > nir_alu_src *src,
> >  void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
> > nir_alu_instr *instr);
> >
> >  /* is this source channel used? */
> >  static inline bool
> > -nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned 
> > channel)
> > +nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
> > +   unsigned channel)
> >  {
> > if (nir_op_infos[instr->op].input_sizes[src] > 0)
> >return channel < nir_op_infos[instr->op].input_sizes[src];
> >
> > return (instr->dest.write_mask >> channel) & 1;
> > @@ -1085,11 +1086,11 @@ typedef struct {
> >  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
> >
> >
> >  #define INTRINSIC_IDX_ACCESSORS(name, flag, type)  
> >\
> >  static inline type 
> >\
> > -nir_intrinsic_##name(nir_intrinsic_instr *instr)   
> >\
> > +nir_intrinsic_##name(const nir_intrinsic_instr *instr) 
> >\
> >  {  
> >\
> > const nir_intrinsic_info *info = 
> > _intrinsic_infos[instr->intrinsic];   \
> > assert(info->index_map[NIR_INTRINSIC_##flag] > 0);  
> >\
> > return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1];   
> >\
> >  }  
> >\
> > @@ -1219,11 +1220,11 @@ typedef struct {
> >  */
> > nir_deref_var *sampler;
> >  } nir_tex_instr;
> >
> >  static inline unsigned
> > -nir_tex_instr_dest_size(nir_tex_instr *instr)
> > +nir_tex_instr_dest_size(const nir_tex_instr *instr)
> >  {
> > switch (instr->op) {
> > case nir_texop_txs: {
> >unsigned ret;
> >switch (instr->sampler_dim) {
> > @@ -1268,11 +1269,11 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
> >
> >  /* Returns true if this texture operation queries something about the 
> > texture
> >   * rather than actually sampling it.
> >   */
> >  static inline bool
> > -nir_tex_instr_is_query(nir_tex_instr *instr)
> > +nir_tex_instr_is_query(const nir_tex_instr *instr)
> >  {
> > switch (instr->op) {
> > case nir_texop_txs:
> > case nir_texop_lod:
> > case nir_texop_texture_samples:
> > @@ -1291,11 +1292,11 @@ nir_tex_instr_is_query(nir_tex_instr *instr)
> >unreachable("Invalid texture opcode");
> > }
> >  }
> >
> >  static inline nir_alu_type
> > -nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src)
> > +nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
> >  {
> > switch (instr->src[src].src_type) {
> > case nir_tex_src_coord:
> >switch (instr->op) {
> >case nir_texop_txf:
> > @@ -1335,11 +1336,11 @@ nir_tex_instr_src_type(nir_tex_instr *instr, 
> > unsigned src)
> >unreachable("Invalid texture source type");
> > }
> >  }
> >
> >  static inline unsigned
> > -nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
> > +nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
> >  {
> > if 

[Mesa-dev] [PATCH v2 3/3] r600g, compute: provide local copy of functions from ac_binary.c

2017-06-09 Thread Jan Vesely
This is a verbatim copy of the code. The functions can be cleaned up since
r600 does not use all the stuff that gcn does.
The symbol names have been changed since we still use ac_binary.h header
(for struct definition)

v2: Add ifdef guard around r600_binary_clean call (Aaron)
Remove stray comment

Signed-off-by: Jan Vesely 
Tested-By: Aaron Watry 
---
 configure.ac   |   5 +-
 src/gallium/drivers/r600/Automake.inc  |  10 +-
 src/gallium/drivers/r600/Makefile.am   |   2 +
 src/gallium/drivers/r600/evergreen_compute.c   | 197 -
 .../drivers/r600/evergreen_compute_internal.h  |   2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c  |  21 ---
 src/gallium/drivers/radeon/r600_pipe_common.h  |   5 -
 src/gallium/targets/pipe-loader/Makefile.am|  10 +-
 8 files changed, 200 insertions(+), 52 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9433e3c..fc4a58f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2631,10 +2631,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = 
xyes)
 AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
 AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
 
-# FIXME: r600g still depends and amd_common (ac_binary*) when building OpenCL
-AM_CONDITIONAL(HAVE_AMD_DRIVERS, test \( "x$HAVE_GALLIUM_R600" = xyes -a \
-  "x$enable_opencl" = xyes \) -o \
-  "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
+AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
   "x$HAVE_RADEON_VULKAN" = xyes)
 
 AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
diff --git a/src/gallium/drivers/r600/Automake.inc 
b/src/gallium/drivers/r600/Automake.inc
index 642d527..bb9f6ec 100644
--- a/src/gallium/drivers/r600/Automake.inc
+++ b/src/gallium/drivers/r600/Automake.inc
@@ -5,18 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
 TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/drivers/r600/libr600.la \
$(RADEON_LIBS) \
-   $(LIBDRM_LIBS)
+   $(LIBDRM_LIBS) \
+   $(LIBELF_LIBS)
 
 TARGET_RADEON_WINSYS = \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
 
 TARGET_RADEON_COMMON = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
-
-# TODO: drop this dependency. libamd_common requires libdrm_amdgpu.
-if HAVE_AMD_DRIVERS
-TARGET_RADEON_COMMON += \
-   $(top_builddir)/src/amd/common/libamd_common.la
-endif
-
 endif
diff --git a/src/gallium/drivers/r600/Makefile.am 
b/src/gallium/drivers/r600/Makefile.am
index 44fd51d..fbfb6e6 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -9,11 +9,13 @@ BUILT_SOURCES = $(R600_GENERATED_FILES)
 AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(RADEON_CFLAGS) \
+   $(LIBELF_CFLAGS) \
-I$(top_srcdir)/src/amd/common
 
 AM_CXXFLAGS = \
$(GALLIUM_DRIVER_CXXFLAGS) \
$(RADEON_CFLAGS) \
+   $(LIBELF_CFLAGS) \
-I$(top_srcdir)/src/amd/common
 
 noinst_LTLIBRARIES = libr600.la
diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index d30024d..1f1e083 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -24,9 +24,10 @@
  *  Adam Rak 
  */
 
+#include 
+#include 
 #include 
 #include 
-#include "ac_binary.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
@@ -179,6 +180,190 @@ static void evergreen_cs_set_constant_buffer(struct 
r600_context *rctx,
 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
 
 #ifdef HAVE_OPENCL
+/*
+ * shader binary helpers.
+ */
+static void r600_shader_binary_init(struct ac_shader_binary *b)
+{
+   memset(b, 0, sizeof(*b));
+}
+
+static void r600_shader_binary_clean(struct ac_shader_binary *b)
+{
+   if (!b)
+   return;
+   FREE(b->code);
+   FREE(b->config);
+   FREE(b->rodata);
+   FREE(b->global_symbol_offsets);
+   FREE(b->relocs);
+   FREE(b->disasm_string);
+   FREE(b->llvm_ir_string);
+}
+
+static void parse_symbol_table(Elf_Data *symbol_table_data,
+   const GElf_Shdr *symbol_table_header,
+   struct ac_shader_binary *binary)
+{
+   GElf_Sym symbol;
+   unsigned i = 0;
+   unsigned symbol_count =
+   symbol_table_header->sh_size / symbol_table_header->sh_entsize;
+
+   /* We are over allocating this list, because symbol_count gives the
+* total number of symbols, and we will only be filling the list
+* with offsets of global symbols.  The memory savings from
+* allocating the correct size of this list will be small, 

[Mesa-dev] [Bug 101334] Any vulkan app seems to freeze the system

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101334

--- Comment #3 from Eric Engestrom  ---
(In reply to John from comment #2)
> Would anything else help?

It would be really helpful if you could bisect the issue.
This means picking an app (game) that was working and doesn't work anymore, and
running `git bisect` using this app to determine if each commit has the issue.

This page can help you if you don't know how the commands work:
https://git-scm.com/docs/git-bisect

Note that I barely know anything about radv, so unless there's something fairly
obvious in the bad commit I won't be able to help past this.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: make various getters take const pointers

2017-06-09 Thread Grazvydas Ignotas
Ping. Boring patch, should be easy to review or NAK.

On Wed, Jun 7, 2017 at 2:25 AM, Grazvydas Ignotas  wrote:
> This will allow to constify other things.
>
> Signed-off-by: Grazvydas Ignotas 
> ---
>  src/compiler/nir/nir.h  | 25 +
>  src/compiler/nir/nir_lower_io.c |  2 +-
>  2 files changed, 14 insertions(+), 13 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 3b827bf..ab7ba14 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -436,19 +436,19 @@ nir_instr_prev(nir_instr *instr)
> else
>return exec_node_data(nir_instr, prev, node);
>  }
>
>  static inline bool
> -nir_instr_is_first(nir_instr *instr)
> +nir_instr_is_first(const nir_instr *instr)
>  {
> -   return exec_node_is_head_sentinel(exec_node_get_prev(>node));
> +   return exec_node_is_head_sentinel(exec_node_get_prev_const(>node));
>  }
>
>  static inline bool
> -nir_instr_is_last(nir_instr *instr)
> +nir_instr_is_last(const nir_instr *instr)
>  {
> -   return exec_node_is_tail_sentinel(exec_node_get_next(>node));
> +   return exec_node_is_tail_sentinel(exec_node_get_next_const(>node));
>  }
>
>  typedef struct nir_ssa_def {
> /** for debugging only, can be NULL */
> const char* name;
> @@ -802,11 +802,12 @@ void nir_alu_src_copy(nir_alu_src *dest, const 
> nir_alu_src *src,
>  void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
> nir_alu_instr *instr);
>
>  /* is this source channel used? */
>  static inline bool
> -nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned 
> channel)
> +nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
> +   unsigned channel)
>  {
> if (nir_op_infos[instr->op].input_sizes[src] > 0)
>return channel < nir_op_infos[instr->op].input_sizes[src];
>
> return (instr->dest.write_mask >> channel) & 1;
> @@ -1085,11 +1086,11 @@ typedef struct {
>  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
>
>
>  #define INTRINSIC_IDX_ACCESSORS(name, flag, type)
>  \
>  static inline type   
>  \
> -nir_intrinsic_##name(nir_intrinsic_instr *instr) 
>  \
> +nir_intrinsic_##name(const nir_intrinsic_instr *instr)   
>  \
>  {
>  \
> const nir_intrinsic_info *info = _intrinsic_infos[instr->intrinsic];  
>  \
> assert(info->index_map[NIR_INTRINSIC_##flag] > 0);
>  \
> return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; 
>  \
>  }
>  \
> @@ -1219,11 +1220,11 @@ typedef struct {
>  */
> nir_deref_var *sampler;
>  } nir_tex_instr;
>
>  static inline unsigned
> -nir_tex_instr_dest_size(nir_tex_instr *instr)
> +nir_tex_instr_dest_size(const nir_tex_instr *instr)
>  {
> switch (instr->op) {
> case nir_texop_txs: {
>unsigned ret;
>switch (instr->sampler_dim) {
> @@ -1268,11 +1269,11 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
>
>  /* Returns true if this texture operation queries something about the texture
>   * rather than actually sampling it.
>   */
>  static inline bool
> -nir_tex_instr_is_query(nir_tex_instr *instr)
> +nir_tex_instr_is_query(const nir_tex_instr *instr)
>  {
> switch (instr->op) {
> case nir_texop_txs:
> case nir_texop_lod:
> case nir_texop_texture_samples:
> @@ -1291,11 +1292,11 @@ nir_tex_instr_is_query(nir_tex_instr *instr)
>unreachable("Invalid texture opcode");
> }
>  }
>
>  static inline nir_alu_type
> -nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src)
> +nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
>  {
> switch (instr->src[src].src_type) {
> case nir_tex_src_coord:
>switch (instr->op) {
>case nir_texop_txf:
> @@ -1335,11 +1336,11 @@ nir_tex_instr_src_type(nir_tex_instr *instr, unsigned 
> src)
>unreachable("Invalid texture source type");
> }
>  }
>
>  static inline unsigned
> -nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
> +nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
>  {
> if (instr->src[src].src_type == nir_tex_src_coord)
>return instr->coord_components;
>
> /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */
> @@ -1357,11 +1358,11 @@ nir_tex_instr_src_size(nir_tex_instr *instr, unsigned 
> src)
>
> return 1;
>  }
>
>  static inline int
> -nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
> +nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
>  {
> for (unsigned i = 0; i < instr->num_srcs; i++)
>if (instr->src[i].src_type == type)
>   

[Mesa-dev] [PATCH 2/2] r600/eg: distribute egd_tables.py in the dist file

2017-06-09 Thread Juan A. Suarez Romero
Otherwise, `make distcheck` will fail.
---
 src/gallium/drivers/r600/Makefile.am | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/r600/Makefile.am 
b/src/gallium/drivers/r600/Makefile.am
index 44fd51d..2b1ffb5 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -35,6 +35,10 @@ AM_CFLAGS += \
-DHAVE_OPENCL
 endif
 
+CLEANFILES = \
+   egd_tables.h
+
 EXTRA_DIST = \
+   egd_tables.py \
sb/notes.markdown \
sb/sb_bc_fmt_def.inc
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: include gen4_blorp_exec.h into EXTRA_DIST

2017-06-09 Thread Juan A. Suarez Romero
Otherwise, `make distcheck` will fail.
---
 src/mesa/drivers/dri/i965/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/Makefile.am 
b/src/mesa/drivers/dri/i965/Makefile.am
index 762aefc..e2d5992 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -98,6 +98,7 @@ BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 
 EXTRA_DIST = \
+   gen4_blorp_exec.h \
brw_oa_hsw.xml \
brw_oa.py
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] RFC: radeon/compute: Limit allocations for VRAM-based chips to 3/4 VRAM

2017-06-09 Thread Aaron Watry
On Wed, Jun 7, 2017 at 11:12 PM, Aaron Watry  wrote:
> On Wed, Jun 7, 2017 at 9:15 PM, Michel Dänzer  wrote:
>> On 08/06/17 03:42 AM, Marek Olšák wrote:
>>> On Wed, Jun 7, 2017 at 4:10 PM, Aaron Watry  wrote:
 On Mon, Jun 5, 2017 at 3:07 PM, Marek Olšák  wrote:
>
> Can you make the change in radeon_drm_winsys.c instead?

 Something like the following?

 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
 b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
 index a485615ae4..44948f49ef 100644
 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
 +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
 @@ -365,6 +365,8 @@ static bool do_winsys_init(struct radeon_drm_winsys 
 *ws)
  /* Radeon allocates all buffers as contigous, which makes large 
 allocations
   * unlikely to succeed. */
  ws->info.max_alloc_size = MAX2(ws->info.vram_size,
 ws->info.gart_size) * 0.7;
 +if (ws->info.has_dedicated_vram)
 +ws->info.max_alloc_size = MIN2(ws->info.vram_size * 0.7,
 ws->info.max_alloc_size);
  if (ws->info.drm_minor < 40)
  ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 
 256*1024*1024);
>>>
>>> Yes, feel free to push that.
>>
>> That also affects PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE, is that intended?
>
> Not necessarily.
>
> Part of the reason that I had originally put this in
> r600_pipe_common.c under the compute params was that I didn't feel
> comfortable changing this for all workload types. There's evidence
> that implies that the closed-source AMD CL runtime limits global
> allocations to either 256MB or 1/4 VRAM (on a 1GB card), so 70% of the
> max of GART/VRAM seems a bit high for us to report. I'll probably
> check around a bit and see what the prevailing limits seem to be and
> if lowering the absolute max might make sense here (for compute loads
> only), as a failure to allocate the requested amount of memory seems
> to result in system hangs shortly thereafter, and I'd like to get the
> frequency of those occurrences down a bit.

At least in Windows 10 using the AMD binary CL runtime, it reports
global memory size of 2GB and max allocation of 1GB for the 1GB card
that I've got.  Whether that's being calculated as max allocation =
VRAM-size, or 50% of global memory size is an unknown. I'm not sure if
you can easily adjust the gart size in windows. So my original theory
of 1/4 VRAM seems to be limited to other cards or older drivers/OSes.

Given that Marek/Nicolai want to stick this in radeon_drm_winsys.c,
I'm ok with putting it there.  I think it still makes sense to limit
the max allocation to a percentage of VRAM when the card has its own
memory available for the reasons already mentioned by Nicolai. Whether
70% is a good number is another question, but one thing at a time.

Any objections Michel, or were you just raising the point that it
affected the texture allocation sizes just to make sure we were aware?

--Aaron

>
> --Aaron
>
>
>
>> --
>> Earthling Michel Dänzer   |   http://www.amd.com
>> Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] egl/dri2: add image extension to swrast_core_extensions

2017-06-09 Thread Gurchetan Singh
Actually, these are the only patches that are required.  We're trying to
run the Android Studio emulator using the host's GLES implementation.  The
emulator uses the image extension in that case:

https://android.googlesource.com/platform/sdk/+/emu-2.4-
release/emulator/opengl/host/libs/libOpenglRender/FrameBuffer.cpp
https://android.googlesource.com/platform/sdk/+/emu-2.4-
release/emulator/opengl/host/libs/libOpenglRender/ColorBuffer.cpp

It does only use a subset of the extension, but nothing hardware specific.


On Fri, Jun 9, 2017 at 4:17 AM, Emil Velikov 
wrote:

> Hi Gurchetan,
>
> On 9 June 2017 at 01:28, gurchetansi...@chromium.org
>  wrote:
> > From: Gurchetan Singh 
> >
> > Otherwise, this extension is not visible to the EGL user
> > ---
> >  src/egl/drivers/dri2/egl_dri2.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_
> dri2.c
> > index 7175e827c9..9e845e99e3 100644
> > --- a/src/egl/drivers/dri2/egl_dri2.c
> > +++ b/src/egl/drivers/dri2/egl_dri2.c
> > @@ -429,6 +429,7 @@ static const struct dri2_extension_match
> swrast_driver_extensions[] = {
> >
> >  static const struct dri2_extension_match swrast_core_extensions[] = {
> > { __DRI_TEX_BUFFER, 2, offsetof(struct dri2_egl_display, tex_buffer)
> },
> > +   { __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
> IIRC the current codebase will not use it even, we expose it. Correct?
> Wild guess here is that you guys have some extra patches around, like
> say using vgem? Is there a public repo with the lot?
>
> On the st/dri side (earlier patches) - one should be able to build
> st/dri without any hardware specific knowledge and/or files.
> Currently that's done by isolating all the DRM specifics in dri2.c.
> Not sure if breaking that, architectural split imho, is a good idea.
>
> -Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 24.5/24] i965/cnl: Add a preliminary device for Cannonlake

2017-06-09 Thread Anuj Phogat
On Thu, Jun 8, 2017 at 5:19 PM, Jason Ekstrand  wrote:
> I sent out a Vulkan patch (and CCd you) that needs to land before this one
> so that we don't accidentally start advertising Vulkan support once the PCI
> IDs land.
>
I'll land the vulkan patch before this one. Thanks.
> This seems to match the docs.
>
> Reviewed-by: Jason Ekstrand 
>
> On Fri, Jun 2, 2017 at 6:21 PM, Anuj Phogat  wrote:
>>
>> From: Ben Widawsky 
>>
>> v2 (Anuj):
>> Rebased on master and updated pci ids
>> Remove redundant initialization of max_wm_threads to 64 * 12.
>> For gen9+ max_wm_threads are initialized in gen_get_device_info().
>>
>> v3 (Anuj):
>> Move the patch to end of series.
>> Remove unused gt1, gt2, gt3 functions.
>> Remove l3_banks variable. Variable is now available on master.
>>
>> Signed-off-by: Anuj Phogat 
>> Signed-off-by: Ben Widawsky 
>> Cc: Jason Ekstrand 
>> ---
>>  include/pci_ids/i965_pci_ids.h | 12 ++
>>  src/intel/common/gen_device_info.c | 46
>> ++
>>  2 files changed, 58 insertions(+)
>>
>> diff --git a/include/pci_ids/i965_pci_ids.h
>> b/include/pci_ids/i965_pci_ids.h
>> index 17504f5..b296359 100644
>> --- a/include/pci_ids/i965_pci_ids.h
>> +++ b/include/pci_ids/i965_pci_ids.h
>> @@ -165,3 +165,15 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics
>> 650 (Kaby Lake GT3)")
>>  CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
>>  CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
>>  CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
>> +CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
>> +CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
>> +CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
>> +CHIPSET(0x5A42, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
>> +CHIPSET(0x5A44, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
>> +CHIPSET(0x5A59, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
>> +CHIPSET(0x5A5A, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
>> +CHIPSET(0x5A5C, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
>> +CHIPSET(0x5A50, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
>> +CHIPSET(0x5A51, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
>> +CHIPSET(0x5A52, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
>> +CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
>> diff --git a/src/intel/common/gen_device_info.c
>> b/src/intel/common/gen_device_info.c
>> index 47aed9d..f62fccb 100644
>> --- a/src/intel/common/gen_device_info.c
>> +++ b/src/intel/common/gen_device_info.c
>> @@ -555,6 +555,52 @@ static const struct gen_device_info
>> gen_device_info_glk_2x6 = {
>> GEN9_LP_FEATURES_2X6
>>  };
>>
>> +#define GEN10_HW_INFO   \
>> +   .gen = 10,   \
>> +   .max_vs_threads = 728,   \
>> +   .max_gs_threads = 432,   \
>> +   .max_tcs_threads = 432,  \
>> +   .max_tes_threads = 624,  \
>> +   .max_cs_threads = 56,\
>> +   .urb = { \
>> +  .size = 256,  \
>> +  .min_entries = {  \
>> + [MESA_SHADER_VERTEX]= 64,  \
>> + [MESA_SHADER_TESS_EVAL] = 34,  \
>> +  },\
>> +  .max_entries = {  \
>> +  [MESA_SHADER_VERTEX]   = 3936,\
>> +  [MESA_SHADER_TESS_CTRL]= 896, \
>> +  [MESA_SHADER_TESS_EVAL]= 2064,\
>> +  [MESA_SHADER_GEOMETRY] = 832, \
>> +  },\
>> +   }
>> +
>> +#define GEN10_FEATURES(_gt, _slices, _l3)   \
>> +   GEN8_FEATURES,   \
>> +   GEN10_HW_INFO,   \
>> +   .gt = _gt, .num_slices = _slices, .l3_banks = _l3
>> +
>> +static const struct gen_device_info gen_device_info_cnl_2x8 = {
>> +   /* GT0.5 */
>> +   GEN10_FEATURES(1, 1, 2)
>> +};
>> +
>> +static const struct gen_device_info gen_device_info_cnl_3x8 = {
>> +   /* GT1 */
>> +   GEN10_FEATURES(1, 1, 3)
>> +};
>> +
>> +static const struct gen_device_info gen_device_info_cnl_4x8 = {
>> +   /* GT 1.5 */
>> +   GEN10_FEATURES(1, 2, 6)
>> +};
>> +
>> +static const struct gen_device_info gen_device_info_cnl_5x8 = {
>> +   /* GT2 */
>> +   GEN10_FEATURES(2, 2, 6)
>> +};
>> +
>>  bool
>>  gen_get_device_info(int devid, struct gen_device_info *devinfo)
>>  {
>> --
>> 2.9.3
>>
>
___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH v5] egl/android: support for EGL_KHR_partial_update

2017-06-09 Thread Eric Engestrom
On Friday, 2017-06-09 20:13:34 +0530, Harish Krupo wrote:
> This patch adds support for the EGL_KHR_partial_update extension for
> android platform. It passes 36/37 tests in dEQP for EGL_KHR_partial_update.
> 1 test not supported.
> 
> v2: add fallback for eglSetDamageRegionKHR (Tapani)
> 
> v3: The native_window_set_surface_damage call is available only from
> Android version 6.0. Reintroduce the ANDROID_VERSION guard and
> advertise extension only if version is >= 6.0. (Emil Velikov)
> 
> v4: use newly introduced ANDROID_API_LEVEL guard rather than
> ANDROID_VERSION guard to advertise the extension.The extension
> is advertised only if ANDROID_API_LEVEL >= 23 (Android 6.0 or
> greater). Add fallback function for platforms other than Android.
> Fix possible math overflow. (Emil Velikov)
> Return immediately when n_rects is 0. Place function's entrypoint
> in alphabetical order. (Eric Engestrom)
> 
> v5: Replace unnecessary calloc with malloc (Eric)
> Check for BAD_ALLOC error (Emil)
> Check for error in native_window_set_damage_region. (Emil, Tapani,
> Eric).
> 
> Signed-off-by: Harish Krupo 
> Reviewed-by: Emil Velikov 
> Reviewed-by: Eric Engestrom 
> Reviewed-by: Tapani Pälli 

Looks good to me.
Do you want me to push this for you?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/2] r600g, compute: provide local copy of ac_binary.{h, c}

2017-06-09 Thread Aaron Watry
On Fri, Jun 9, 2017 at 8:20 AM, Jan Vesely  wrote:
> This is a verbatim copy of the code. The functions can be cleaned up since
> r600 does not use all the stuff that gcn does.
> The symbol names have been changed since we still use ac_binary.h header
> (for struct definition)
>
> Signed-off-by: Jan Vesely 
> ---
> Emil, Aaron,
>
> this is the last patch to get rid of libamd_common dependency (and thus 
> libdrm_amdgpu). I have only remote access to the machine atm, so it's compile 
> tested only.
>
> Jan
>
>  configure.ac   |   5 +-
>  src/gallium/drivers/r600/Automake.inc  |  10 +-
>  src/gallium/drivers/r600/Makefile.am   |   2 +
>  src/gallium/drivers/r600/evergreen_compute.c   | 197 
> -
>  .../drivers/r600/evergreen_compute_internal.h  |   2 +-
>  src/gallium/drivers/radeon/r600_pipe_common.c  |  21 ---
>  src/gallium/drivers/radeon/r600_pipe_common.h  |   5 -
>  src/gallium/targets/pipe-loader/Makefile.am|  10 +-
>  8 files changed, 200 insertions(+), 52 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 9433e3c..fc4a58f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2631,10 +2631,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI 
> = xyes)
>  AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
>  AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
>
> -# FIXME: r600g still depends and amd_common (ac_binary*) when building OpenCL
> -AM_CONDITIONAL(HAVE_AMD_DRIVERS, test \( "x$HAVE_GALLIUM_R600" = xyes -a \
> -  "x$enable_opencl" = xyes \) -o \
> -  "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
> +AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
>"x$HAVE_RADEON_VULKAN" = xyes)
>
>  AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
> diff --git a/src/gallium/drivers/r600/Automake.inc 
> b/src/gallium/drivers/r600/Automake.inc
> index 642d527..bb9f6ec 100644
> --- a/src/gallium/drivers/r600/Automake.inc
> +++ b/src/gallium/drivers/r600/Automake.inc
> @@ -5,18 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_R600
>  TARGET_LIB_DEPS += \
> $(top_builddir)/src/gallium/drivers/r600/libr600.la \
> $(RADEON_LIBS) \
> -   $(LIBDRM_LIBS)
> +   $(LIBDRM_LIBS) \
> +   $(LIBELF_LIBS)
>
>  TARGET_RADEON_WINSYS = \
> $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
>
>  TARGET_RADEON_COMMON = \
> $(top_builddir)/src/gallium/drivers/radeon/libradeon.la
> -
> -# TODO: drop this dependency. libamd_common requires libdrm_amdgpu.
> -if HAVE_AMD_DRIVERS
> -TARGET_RADEON_COMMON += \
> -   $(top_builddir)/src/amd/common/libamd_common.la
> -endif
> -
>  endif
> diff --git a/src/gallium/drivers/r600/Makefile.am 
> b/src/gallium/drivers/r600/Makefile.am
> index 44fd51d..fbfb6e6 100644
> --- a/src/gallium/drivers/r600/Makefile.am
> +++ b/src/gallium/drivers/r600/Makefile.am
> @@ -9,11 +9,13 @@ BUILT_SOURCES = $(R600_GENERATED_FILES)
>  AM_CFLAGS = \
> $(GALLIUM_DRIVER_CFLAGS) \
> $(RADEON_CFLAGS) \
> +   $(LIBELF_CFLAGS) \
> -I$(top_srcdir)/src/amd/common
>
>  AM_CXXFLAGS = \
> $(GALLIUM_DRIVER_CXXFLAGS) \
> $(RADEON_CFLAGS) \
> +   $(LIBELF_CFLAGS) \
> -I$(top_srcdir)/src/amd/common
>
>  noinst_LTLIBRARIES = libr600.la
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> b/src/gallium/drivers/r600/evergreen_compute.c
> index d30024d..69a6d8b 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -24,9 +24,10 @@
>   *  Adam Rak 
>   */
>
> +#include 
> +#include 
>  #include 
>  #include 
> -#include "ac_binary.h"
>  #include "pipe/p_defines.h"
>  #include "pipe/p_state.h"
>  #include "pipe/p_context.h"
> @@ -179,6 +180,192 @@ static void evergreen_cs_set_constant_buffer(struct 
> r600_context *rctx,
>  #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
>
>  #ifdef HAVE_OPENCL
> +/*
> + * shader binary helpers.
> + */
> +static void r600_shader_binary_init(struct ac_shader_binary *b)
> +{
> +   memset(b, 0, sizeof(*b));
> +}
> +
> +static void r600_shader_binary_clean(struct ac_shader_binary *b)
> +{
> +   if (!b)
> +   return;
> +   FREE(b->code);
> +   FREE(b->config);
> +   FREE(b->rodata);
> +   FREE(b->global_symbol_offsets);
> +   FREE(b->relocs);
> +   FREE(b->disasm_string);
> +   FREE(b->llvm_ir_string);
> +}
> +
> +static void parse_symbol_table(Elf_Data *symbol_table_data,
> +   const GElf_Shdr *symbol_table_header,
> +   struct ac_shader_binary *binary)
> +{
> +   GElf_Sym symbol;
> +   unsigned i = 0;
> +   

[Mesa-dev] [Bug 101338] Mesa software rendering draws incompletely on Raspberry Pi

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101338

--- Comment #3 from Brian Paul  ---
I don't have a Raspberry Pi to test with.  I installed geomview/savi on my
Intel deskside system and tested with both NVIDIA's driver and llvmpipe.  With
both I see a shaded blue sphere with an orbit ring and red/green/blue axes.  No
blue and yellow texture mapping.  No missing triangles.

My guess is an LLVM code generation issue.  I don't think llvmpipe has been
tested much with ARM CPUs.  Maybe someone else knows more about that.

I guess one alternative would be the 'softpipe' driver (export
GALLIUM_DRIVER=softpipe) but it's very slow and probably won't be practical.

Sorry I can't be of more help.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit DB_STENCIL_CONTROL if it has no effect

2017-06-09 Thread Marek Olšák
On Fri, Jun 9, 2017 at 4:31 PM, Samuel Pitoiset
 wrote:
>
>
> On 06/09/2017 04:26 PM, Marek Olšák wrote:
>>
>> On Fri, Jun 9, 2017 at 4:00 PM, Samuel Pitoiset
>>  wrote:
>>>
>>>
>>>
>>> On 06/09/2017 03:49 PM, Marek Olšák wrote:


 From: Marek Olšák 

 ---
src/gallium/drivers/radeonsi/si_state.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/radeonsi/si_state.c
 b/src/gallium/drivers/radeonsi/si_state.c
 index 53f66ac..a8255f2 100644
 --- a/src/gallium/drivers/radeonsi/si_state.c
 +++ b/src/gallium/drivers/radeonsi/si_state.c
 @@ -1078,21 +1078,22 @@ static void *si_create_dsa_state(struct
 pipe_context *ctx,
  if (state->alpha.enabled) {
  dsa->alpha_func = state->alpha.func;
  si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0
 +
 SI_SGPR_ALPHA_REF * 4,
 fui(state->alpha.ref_value));
  } else {
  dsa->alpha_func = PIPE_FUNC_ALWAYS;
  }
  si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL,
 db_depth_control);
 -   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
 db_stencil_control);
 +   if (state->stencil[0].enabled)
 +   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
 db_stencil_control);
>>>
>>>
>>>
>>> How if stencil is enabled, then disabled? Doesn't this reg has to be set
>>> to
>>> 0?
>>
>>
>> DB_DEPTH_CONTROL enables and disables stencil.
>
>
> Right.
>
> Can't we move this in the same if above?

Well, I'd like to register writes grouped together at the end, but it
mostly doesn't matter.

Marek

>
> Except this nitpick, series is:
>
> Reviewed-by: Samuel Pitoiset 
>
>>
>> Marek
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] configure.ac: remove explicit -lpthread link

2017-06-09 Thread Emil Velikov
On 9 June 2017 at 14:16, Rowley, Timothy O  wrote:
> With this patch series applied, the build fails for me on ubuntu 16.04.
>
Thanks Tim - I can see what's going wrong. I'll double-check things
and send v2 in a bit.

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] Reduce zlib requirement from 1.2.8 to 1.2.3.

2017-06-09 Thread Chuck Atkins
Hi Emil,

Did you test the upstream versions or the distribution ones which tend
> to be patched?
>

Both.  I build 17.1.1 against the system supplied zlib-devel packages for
1.2.3 in EL6 and 1.2.7 on EL7.  I then swapped out the zlib version at
runtime via LD_LIBRARY_PATH with ones build from the release tarballs from
zlib.net



> What tests did you use?
>

I ran the piglit shader profile with --quick addded to the tests since I
figured that would exercise the shader cache, which would in turn use zlib.



- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Don't advertise support on anything above gen9

2017-06-09 Thread Anuj Phogat
On Thu, Jun 8, 2017 at 5:10 PM, Jason Ekstrand  wrote:
> This will prevent the driver from even trying to work on Cannon Lake
> until we get actual support added.
>
> Cc: Anuj Phogat 
> ---
>  src/intel/vulkan/anv_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 72a96b7..8e8c502 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -293,7 +293,7 @@ anv_physical_device_init(struct anv_physical_device 
> *device,
>fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
> } else if (device->info.gen == 7 && device->info.is_baytrail) {
>fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
> -   } else if (device->info.gen >= 8) {
> +   } else if (device->info.gen >= 8 && device->info.gen <= 9) {
>/* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
> * supported as anything */
> } else {
> --
> 2.5.0.400.gff86faf
>
Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101326] gallium/wgl: Allow context creation without prior SetPixelFormat()

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101326

Brian Paul  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #4 from Brian Paul  ---
Patch 0ef39e588f92236f9e2fb1909a314c7eb70db8c2 pushed.  Thanks, Frank.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: fixed modulo zero crashes in tgsi interpreter

2017-06-09 Thread Brian Paul
Let's not impose behavior that would burden drivers (for example: "idiv 
by zero must result in ~0u").  Just updating the docs to say the results 
of div/mod by zero is undefined (unless we know something specific is 
needed) would be fine.  As it is now, some div/mod operations are 
documented to return 0 or ~0 but others say nothing and leaves the 
reader wondering what's expected.


-Brian

On 06/09/2017 03:22 AM, Marius Gräfe wrote:

I can fix the remaining integer div/mod opcodes, no problem. I think a
consistent error value would be beneficial, would opt for ~0u for 32-bit
values and ~0ull for 64 bit values to keep it consistent with the only
existing requirement (that is d3d10). Should I just submit another patch
based on my original one with the fixed values? I am unsure about the
correct procedure, still new to this whole mailing list procedure.
As far as src/gallium/docs/source/tgsi.rst is concerned, the docs don't
seem to mention any error value anyway.

Marius


Am 08.06.2017 um 22:28 schrieb Roland Scheidegger:

The behavior is probably undefined for most of the opcodes (signed 32bit
div, all 64bit div/mod), the docs don't state anything. But in general,
this is all undefined in all apis (opencl, glsl, spir-v), with the only
exception being d3d10 - which only has udiv and umod, hence these
stating in the gallium docs the required result (~0u).
So, I suppose we could let the docs say it's actually undefined, right
now gallivm will actually return 0 for idiv but all-ones for idiv64 and
so on, it's not really consistent...

Roland


Am 08.06.2017 um 21:51 schrieb Brian Paul:

Marius,

As long as you're working on this, would you review
src/gallium/docs/source/tgsi.rst to check if all the div/mod
instructions document div/mod by zero behavior?  Thanks.

-Brian


On 06/08/2017 11:10 AM, Roland Scheidegger wrote:

I don't really know if it makes sense to have different "error values"
for signed vs. unsigned modulo 0 - maybe the "all bits set" approach
would do too (the gallivm code does this, because it is actually
easier). But since it's undefined in any case pretty much everywhere, I
suppose any value will do (only the 32bit umod really has a requirement
for all bits set due do d3d10 requirements, but d3d has neither signed
nor 64bit versions of it).
And I don't know why it would only crash in geometry shaders...

Reviewed-by: Roland Scheidegger 


Am 08.06.2017 um 18:28 schrieb Marius Gräfe:

softpipe throws integer division by zero exceptions on windows
when using % with integers in a geometry shader.
---
   src/gallium/auxiliary/tgsi/tgsi_exec.c | 24

   1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c41954c..abd2d16 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -866,20 +866,20 @@ static void
   micro_u64mod(union tgsi_double_channel *dst,
const union tgsi_double_channel *src)
   {
-   dst->u64[0] = src[0].u64[0] % src[1].u64[0];
-   dst->u64[1] = src[0].u64[1] % src[1].u64[1];
-   dst->u64[2] = src[0].u64[2] % src[1].u64[2];
-   dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] :
UINT64_MAX;
+   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] :
UINT64_MAX;
+   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] :
UINT64_MAX;
+   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] :
UINT64_MAX;
   }

   static void
   micro_i64mod(union tgsi_double_channel *dst,
const union tgsi_double_channel *src)
   {
-   dst->i64[0] = src[0].i64[0] % src[1].i64[0];
-   dst->i64[1] = src[0].i64[1] % src[1].i64[1];
-   dst->i64[2] = src[0].i64[2] % src[1].i64[2];
-   dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] :
INT64_MAX;
+   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] :
INT64_MAX;
+   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] :
INT64_MAX;
+   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] :
INT64_MAX;
   }

   static void
@@ -4653,10 +4653,10 @@ micro_mod(union tgsi_exec_channel *dst,
 const union tgsi_exec_channel *src0,
 const union tgsi_exec_channel *src1)
   {
-   dst->i[0] = src0->i[0] % src1->i[0];
-   dst->i[1] = src0->i[1] % src1->i[1];
-   dst->i[2] = src0->i[2] % src1->i[2];
-   dst->i[3] = src0->i[3] % src1->i[3];
+   dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : INT_MAX;
+   dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : INT_MAX;
+   dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : INT_MAX;
+   dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : INT_MAX;
   }

   static void


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH v5] egl/android: support for EGL_KHR_partial_update

2017-06-09 Thread Harish Krupo
This patch adds support for the EGL_KHR_partial_update extension for
android platform. It passes 36/37 tests in dEQP for EGL_KHR_partial_update.
1 test not supported.

v2: add fallback for eglSetDamageRegionKHR (Tapani)

v3: The native_window_set_surface_damage call is available only from
Android version 6.0. Reintroduce the ANDROID_VERSION guard and
advertise extension only if version is >= 6.0. (Emil Velikov)

v4: use newly introduced ANDROID_API_LEVEL guard rather than
ANDROID_VERSION guard to advertise the extension.The extension
is advertised only if ANDROID_API_LEVEL >= 23 (Android 6.0 or
greater). Add fallback function for platforms other than Android.
Fix possible math overflow. (Emil Velikov)
Return immediately when n_rects is 0. Place function's entrypoint
in alphabetical order. (Eric Engestrom)

v5: Replace unnecessary calloc with malloc (Eric)
Check for BAD_ALLOC error (Emil)
Check for error in native_window_set_damage_region. (Emil, Tapani,
Eric).

Signed-off-by: Harish Krupo 
Reviewed-by: Emil Velikov 
Reviewed-by: Eric Engestrom 
Reviewed-by: Tapani Pälli 
---
 src/egl/drivers/dri2/egl_dri2.c |  9 
 src/egl/drivers/dri2/egl_dri2.h |  4 ++
 src/egl/drivers/dri2/egl_dri2_fallbacks.h   |  8 +++
 src/egl/drivers/dri2/platform_android.c | 47 
 src/egl/drivers/dri2/platform_drm.c |  1 +
 src/egl/drivers/dri2/platform_surfaceless.c |  1 +
 src/egl/drivers/dri2/platform_wayland.c |  1 +
 src/egl/drivers/dri2/platform_x11.c |  2 +
 src/egl/drivers/dri2/platform_x11_dri3.c|  1 +
 src/egl/main/eglapi.c   | 83 +
 src/egl/main/eglapi.h   |  2 +
 src/egl/main/egldisplay.h   |  1 +
 src/egl/main/eglentrypoint.h|  1 +
 src/egl/main/eglfallbacks.c |  1 +
 src/egl/main/eglsurface.c   |  9 
 src/egl/main/eglsurface.h   | 12 +
 16 files changed, 183 insertions(+)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index d31a0bf8e0..a1d72166df 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1512,6 +1512,14 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay 
*dpy, _EGLSurface *surf,
 }
 
 static EGLBoolean
+dri2_set_damage_region(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
+   EGLint *rects, EGLint n_rects)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+   return dri2_dpy->vtbl->set_damage_region(drv, dpy, surf, rects, n_rects);
+}
+
+static EGLBoolean
 dri2_post_sub_buffer(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
  EGLint x, EGLint y, EGLint width, EGLint height)
 {
@@ -3141,6 +3149,7 @@ _eglBuiltInDriverDRI2(const char *args)
dri2_drv->base.API.SwapBuffers = dri2_swap_buffers;
dri2_drv->base.API.SwapBuffersWithDamageEXT = dri2_swap_buffers_with_damage;
dri2_drv->base.API.SwapBuffersRegionNOK = dri2_swap_buffers_region;
+   dri2_drv->base.API.SetDamageRegion = dri2_set_damage_region;
dri2_drv->base.API.PostSubBufferNV = dri2_post_sub_buffer;
dri2_drv->base.API.CopyBuffers = dri2_copy_buffers,
dri2_drv->base.API.QueryBufferAge = dri2_query_buffer_age;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 449016093a..ba7a7be57b 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -118,6 +118,10 @@ struct dri2_egl_display_vtbl {
   _EGLSurface *surface,
   const EGLint *rects, EGLint n_rects);
 
+   EGLBoolean (*set_damage_region)(_EGLDriver *drv, _EGLDisplay *dpy,
+   _EGLSurface *surface,
+   const EGLint *rects, EGLint n_rects);
+
EGLBoolean (*swap_buffers_region)(_EGLDriver *drv, _EGLDisplay *dpy,
  _EGLSurface *surf, EGLint numRects,
  const EGLint *rects);
diff --git a/src/egl/drivers/dri2/egl_dri2_fallbacks.h 
b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
index 67a9c5034a..d8363c9bdd 100644
--- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h
+++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
@@ -95,6 +95,14 @@ dri2_fallback_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy,
return EGL_FALSE;
 }
 
+static inline EGLBoolean
+dri2_fallback_set_damage_region(_EGLDriver *drv, _EGLDisplay *dpy,
+_EGLSurface *surf,
+const EGLint *rects, EGLint n_rects)
+{
+   return EGL_FALSE;
+}
+
 static inline EGLint
 dri2_fallback_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf)
diff --git 

Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit DB_STENCIL_CONTROL if it has no effect

2017-06-09 Thread Samuel Pitoiset



On 06/09/2017 04:26 PM, Marek Olšák wrote:

On Fri, Jun 9, 2017 at 4:00 PM, Samuel Pitoiset
 wrote:



On 06/09/2017 03:49 PM, Marek Olšák wrote:


From: Marek Olšák 

---
   src/gallium/drivers/radeonsi/si_state.c | 3 ++-
   1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c
b/src/gallium/drivers/radeonsi/si_state.c
index 53f66ac..a8255f2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1078,21 +1078,22 @@ static void *si_create_dsa_state(struct
pipe_context *ctx,
 if (state->alpha.enabled) {
 dsa->alpha_func = state->alpha.func;
 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
SI_SGPR_ALPHA_REF * 4,
fui(state->alpha.ref_value));
 } else {
 dsa->alpha_func = PIPE_FUNC_ALWAYS;
 }
 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
-   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
db_stencil_control);
+   if (state->stencil[0].enabled)
+   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
db_stencil_control);



How if stencil is enabled, then disabled? Doesn't this reg has to be set to
0?


DB_DEPTH_CONTROL enables and disables stencil.


Right.

Can't we move this in the same if above?

Except this nitpick, series is:

Reviewed-by: Samuel Pitoiset 



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] tgsi: clarify TGSI_SEMANTIC_SAMPLEMASK documentation

2017-06-09 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 9, 2017 at 3:56 PM, Brian Paul  wrote:
> ---
>  src/gallium/docs/source/tgsi.rst | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/docs/source/tgsi.rst 
> b/src/gallium/docs/source/tgsi.rst
> index eceaa6d..7fb963f 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -3298,8 +3298,9 @@ TGSI_SEMANTIC_SAMPLEMASK
>  
>
>  For fragment shaders, this semantic label indicates that an output contains
> -the sample mask used to disable further sample processing
> -(i.e. gl_SampleMask). Only the X value is used, up to 32x MS.
> +the sample mask used to disable further sample processing.  The output's
> +type is uint[4] but only the X component is used (i.e. gl_SampleMask[0]).
> +Each bit corresponds to one sample position (up to 32x MSAA is supported).
>
>  TGSI_SEMANTIC_INVOCATIONID
>  ""
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit DB_STENCIL_CONTROL if it has no effect

2017-06-09 Thread Marek Olšák
On Fri, Jun 9, 2017 at 4:00 PM, Samuel Pitoiset
 wrote:
>
>
> On 06/09/2017 03:49 PM, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_state.c | 3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 53f66ac..a8255f2 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -1078,21 +1078,22 @@ static void *si_create_dsa_state(struct
>> pipe_context *ctx,
>> if (state->alpha.enabled) {
>> dsa->alpha_func = state->alpha.func;
>> si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
>>SI_SGPR_ALPHA_REF * 4,
>> fui(state->alpha.ref_value));
>> } else {
>> dsa->alpha_func = PIPE_FUNC_ALWAYS;
>> }
>> si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
>> -   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
>> db_stencil_control);
>> +   if (state->stencil[0].enabled)
>> +   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL,
>> db_stencil_control);
>
>
> How if stencil is enabled, then disabled? Doesn't this reg has to be set to
> 0?

DB_DEPTH_CONTROL enables and disables stencil.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 57/63] radeonsi: track use of bindless samplers/images from tgsi_shader_info

2017-06-09 Thread Marek Olšák
On Fri, Jun 9, 2017 at 3:35 PM, Samuel Pitoiset
 wrote:
> This adds some new helper functions to know if the current draw
> call (or dispatch compute) is using bindless samplers/images,
> based on TGSI analysis.
>
> v3: - add si_context::uses_bindless_{samplers,images}
> - add si_bind_shader_common() to limit code duplication
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_compute.c   |  2 ++
>  src/gallium/drivers/radeonsi/si_compute.h   | 14 +++
>  src/gallium/drivers/radeonsi/si_pipe.h  | 16 +
>  src/gallium/drivers/radeonsi/si_shader.h| 12 ++
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 31 
> +
>  5 files changed, 70 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 0338b8a123..79b107e96f 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -108,6 +108,8 @@ static void si_create_compute_state_async(void *job, int 
> thread_index)
> program->shader.is_monolithic = true;
> program->uses_grid_size = sel.info.uses_grid_size;
> program->uses_block_size = sel.info.uses_block_size;
> +   program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
> +   program->uses_bindless_images = sel.info.uses_bindless_images;
>
> if (si_shader_create(program->screen, tm, >shader, debug)) {
> program->shader.compilation_failed = true;
> diff --git a/src/gallium/drivers/radeonsi/si_compute.h 
> b/src/gallium/drivers/radeonsi/si_compute.h
> index 764d708c4f..3cf1538267 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.h
> +++ b/src/gallium/drivers/radeonsi/si_compute.h
> @@ -49,6 +49,20 @@ struct si_compute {
> unsigned variable_group_size : 1;
> unsigned uses_grid_size:1;
> unsigned uses_block_size:1;
> +   unsigned uses_bindless_samplers:1;
> +   unsigned uses_bindless_images:1;
>  };
>
> +static inline bool
> +si_compute_uses_bindless_samplers(struct si_context *sctx)
> +{
> +   return sctx->cs_shader_state.program->uses_bindless_samplers;
> +}
> +
> +static inline bool
> +si_compute_uses_bindless_images(struct si_context *sctx)
> +{
> +   return sctx->cs_shader_state.program->uses_bindless_images;
> +}

Can you inline these functions?

> +
>  #endif /* SI_COMPUTE_H */
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 252cec3b91..b87fa856cd 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -428,6 +428,10 @@ struct si_context {
> /* Resident bindless handles */
> struct util_dynarrayresident_tex_handles;
> struct util_dynarrayresident_img_handles;
> +
> +   /* Bindless state */
> +   booluses_bindless_samplers;
> +   booluses_bindless_images;
>  };
>
>  /* cik_sdma.c */
> @@ -548,6 +552,18 @@ static inline struct tgsi_shader_info 
> *si_get_vs_info(struct si_context *sctx)
> return vs->cso ? >cso->info : NULL;
>  }
>
> +static inline bool
> +si_graphics_uses_bindless_samplers(struct si_context *sctx)
> +{
> +   return sctx->uses_bindless_samplers;
> +}
> +
> +static inline bool
> +si_graphics_uses_bindless_images(struct si_context *sctx)
> +{
> +   return sctx->uses_bindless_images;
> +}

Can you inline these functions?

> +
>  static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
>  {
> if (sctx->gs_shader.cso)
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 7c04b7e253..4ebb745cb6 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -629,4 +629,16 @@ si_get_main_shader_part(struct si_shader_selector *sel,
> return >main_shader_part;
>  }
>
> +static inline bool
> +si_shader_uses_bindless_samplers(struct si_shader_selector *selector)
> +{
> +   return selector ? selector->info.uses_bindless_samplers : false;
> +}
> +
> +static inline bool
> +si_shader_uses_bindless_images(struct si_shader_selector *selector)
> +{
> +   return selector ? selector->info.uses_bindless_images : false;
> +}
> +
>  #endif
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 677a6de88c..776dd0b67a 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -2200,6 +2200,23 @@ static void si_update_clip_regs(struct si_context 
> *sctx,
> si_mark_atom_dirty(sctx, >clip_regs);
>  }
>
> +static void si_bind_shader_common(struct si_context *sctx)

si_update_common_shader_state

Thanks,
Marek

[Mesa-dev] [PATCH 2/3] i965/miptree: Separate src and dst slice specifiers in slice copy

2017-06-09 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 60 ---
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index f8fdde7..a4b2aeb 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1205,26 +1205,25 @@ intel_miptree_get_tile_offsets(const struct 
intel_mipmap_tree *mt,
 
 static void
 intel_miptree_copy_slice_sw(struct brw_context *brw,
-struct intel_mipmap_tree *dst_mt,
 struct intel_mipmap_tree *src_mt,
-int level,
-int slice,
-int width,
-int height)
+unsigned src_level, unsigned src_layer,
+struct intel_mipmap_tree *dst_mt,
+unsigned dst_level, unsigned dst_layer,
+unsigned width, unsigned height)
 {
void *src, *dst;
ptrdiff_t src_stride, dst_stride;
int cpp = dst_mt->cpp;
 
intel_miptree_map(brw, src_mt,
- level, slice,
+ src_level, src_layer,
  0, 0,
  width, height,
  GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
  , _stride);
 
intel_miptree_map(brw, dst_mt,
- level, slice,
+ dst_level, dst_layer,
  0, 0,
  width, height,
  GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
@@ -1250,8 +1249,8 @@ intel_miptree_copy_slice_sw(struct brw_context *brw,
   }
}
 
-   intel_miptree_unmap(brw, dst_mt, level, slice);
-   intel_miptree_unmap(brw, src_mt, level, slice);
+   intel_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
+   intel_miptree_unmap(brw, src_mt, src_level, src_layer);
 
/* Don't forget to copy the stencil data over, too.  We could have skipped
 * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
@@ -1260,23 +1259,28 @@ intel_miptree_copy_slice_sw(struct brw_context *brw,
 */
if (dst_mt->stencil_mt) {
   assert(src_mt->stencil_mt);
-  intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
-  level, slice, width, height);
+  intel_miptree_copy_slice_sw(brw,
+  src_mt->stencil_mt, src_level, src_layer,
+  dst_mt->stencil_mt, dst_level, dst_layer,
+  width, height);
}
 }
 
 static void
 intel_miptree_copy_slice(struct brw_context *brw,
-struct intel_mipmap_tree *dst_mt,
-struct intel_mipmap_tree *src_mt,
-unsigned level, unsigned slice)
+ struct intel_mipmap_tree *src_mt,
+ unsigned src_level, unsigned src_layer,
+ struct intel_mipmap_tree *dst_mt,
+ unsigned dst_level, unsigned dst_layer)
 
 {
+   uint32_t width = minify(src_mt->physical_width0,
+   src_level - src_mt->first_level);
+   uint32_t height = minify(src_mt->physical_height0,
+src_level - src_mt->first_level);
mesa_format format = src_mt->format;
-   uint32_t width = minify(src_mt->physical_width0, level - 
src_mt->first_level);
-   uint32_t height = minify(src_mt->physical_height0, level - 
src_mt->first_level);
 
-   assert(slice < src_mt->level[level].depth);
+   assert(src_layer < src_mt->level[src_level].depth);
assert(src_mt->format == dst_mt->format);
 
if (dst_mt->compressed) {
@@ -1292,15 +1296,17 @@ intel_miptree_copy_slice(struct brw_context *brw,
 */
if (src_mt->stencil_mt) {
   intel_miptree_copy_slice_sw(brw,
-  dst_mt, src_mt,
-  level, slice,
+  src_mt, src_level, src_layer,
+  dst_mt, dst_level, dst_layer,
   width, height);
   return;
}
 
uint32_t dst_x, dst_y, src_x, src_y;
-   intel_miptree_get_image_offset(dst_mt, level, slice, _x, _y);
-   intel_miptree_get_image_offset(src_mt, level, slice, _x, _y);
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_layer,
+  _x, _y);
+   intel_miptree_get_image_offset(src_mt, src_level, src_layer,
+  _x, _y);
 
DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
_mesa_get_format_name(src_mt->format),
@@ -1310,13 +1316,15 @@ intel_miptree_copy_slice(struct brw_context *brw,
width, height);
 
if 

[Mesa-dev] [PATCH 3/3] i965/gen4: Add support for single layer in alignment workaround

2017-06-09 Thread Topi Pohjolainen
On gen < 6 one doesn't have level or layer specifiers available
for render and depth targets. In order to support rendering to
specific level/layer, driver needs to manually offset the surface
to the desired slice.
There are, however, alignment restrictions to respect as well and
in come cases the only option is to use temporary single slice
surface which driver copies after rendering to the full miptree.

Current alignment workaround introduces new texture images which
are added to the parent texture object. Texture validation later
on copies the additional levels back to the surface that contains
the full mipmap.
This only works for non-arrayed surfaces and driver currently
creates new arrayed images in vain - individual layers within the
newly created are still unaligned the same as before.

This patch drops this mechanism and instead attaches single
temporary slice into the render buffer. This gets immediately
copied back to the mipmapped and/or arrayed surface just after
the render is done.

Sitting on top of earlier series cleaning up the depth buffer
state, this patch additionally fixes the following piglit tests:

ext_texture_array.copyteximage 2d_array.g45m64
ext_texture_array.copyteximage 1d_array.g45m64
arb_framebuffer_object.fbo-blit-stretch.g33m64
ext_framebuffer_object.fbo-cubemap.g965m64
arb_framebuffer_object.fbo-generatemipmap-cubemap.g965m64
arb_texture_cube_map.copyteximage cube.g965m64
ext_texture_array.copyteximage 1d_array.g965m64
ext_texture_array.copyteximage 2d_array.g965m64
ext_texture_array.fbo-array.g965m64
ext_texture_array.gen-mipmap.g965m64
ext_texture_array.fbo-generatemipmap-array.g965m64
arb_pixel_buffer_object.texsubimage array pbo.g965m64
ext_texture_array.copyteximage 2d_array.ilkm64
ext_texture_array.copyteximage 1d_array.ilkm64
arb_texture_cube_map.copyteximage cube.ilkm64

CC: Kenneth Graunke 
CC: Jason Ekstrand 
CC: Ian Romanick 
Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 51 
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  3 +-
 src/mesa/drivers/dri/i965/intel_fbo.c| 19 +
 src/mesa/drivers/dri/i965/intel_fbo.h| 24 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c|  2 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h|  7 
 7 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 611cb86..cb441c3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -396,6 +396,56 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context 
*brw)
 }
 
 static void
+intel_renderbuffer_move_temp_back(struct brw_context *brw,
+  struct intel_renderbuffer *irb)
+{
+   if (irb->align_wa_mt == NULL)
+  return;
+
+   brw_render_cache_set_check_flush(brw, irb->align_wa_mt->bo);
+
+   intel_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
+irb->mt,
+irb->Base.Base.TexImage->Level, irb->mt_layer);
+
+   intel_miptree_reference(>align_wa_mt, NULL);
+
+   /* Finally restore the x,y to correspond to full miptree. */
+   intel_renderbuffer_set_draw_offset(irb);
+
+   /* Make sure render surface state gets re-emitted with updated miptree. */
+   brw->NewGLState |= _NEW_BUFFERS;
+}
+
+static void
+brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
+{
+   struct gl_context *ctx = >ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+   struct intel_renderbuffer *depth_irb =
+  intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencil_irb =
+  intel_get_renderbuffer(fb, BUFFER_STENCIL);
+
+   if (depth_irb && depth_irb->align_wa_mt)
+  intel_renderbuffer_move_temp_back(brw, depth_irb);
+
+   if (stencil_irb && stencil_irb->align_wa_mt)
+  intel_renderbuffer_move_temp_back(brw, stencil_irb);
+
+   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+  struct intel_renderbuffer *irb =
+ intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+  if (!irb || irb->align_wa_mt == NULL)
+ continue;
+
+  intel_renderbuffer_move_temp_back(brw, irb);
+   }
+}
+
+static void
 brw_predraw_set_aux_buffers(struct brw_context *brw)
 {
if (brw->gen < 9)
@@ -626,6 +676,7 @@ retry:
   intel_batchbuffer_flush(brw);
 
brw_program_cache_check_size(brw);
+   brw_postdraw_reconcile_align_wa_slices(brw);
brw_postdraw_set_buffers_need_resolve(brw);
 
return;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index fe021b0..0c25261 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ 

[Mesa-dev] [PATCH 1/3] i965/miptree: Clarify face/level/layer in slice copy

2017-06-09 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 39 ++-
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 4b58268..f8fdde7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1269,22 +1269,14 @@ static void
 intel_miptree_copy_slice(struct brw_context *brw,
 struct intel_mipmap_tree *dst_mt,
 struct intel_mipmap_tree *src_mt,
-int level,
-int face,
-int depth)
+unsigned level, unsigned slice)
 
 {
mesa_format format = src_mt->format;
uint32_t width = minify(src_mt->physical_width0, level - 
src_mt->first_level);
uint32_t height = minify(src_mt->physical_height0, level - 
src_mt->first_level);
-   int slice;
 
-   if (face > 0)
-  slice = face;
-   else
-  slice = depth;
-
-   assert(depth < src_mt->level[level].depth);
+   assert(slice < src_mt->level[level].depth);
assert(src_mt->format == dst_mt->format);
 
if (dst_mt->compressed) {
@@ -1347,17 +1339,26 @@ intel_miptree_copy_teximage(struct brw_context *brw,
struct intel_texture_object *intel_obj =
   intel_texture_object(intelImage->base.Base.TexObject);
int level = intelImage->base.Base.Level;
-   int face = intelImage->base.Base.Face;
-
-   GLuint depth;
-   if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY)
-  depth = intelImage->base.Base.Height;
-   else
-  depth = intelImage->base.Base.Depth;
+   const unsigned face = intelImage->base.Base.Face;
+   unsigned start_layer, end_layer;
+
+   if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
+  assert(face == 0);
+  assert(intelImage->base.Base.Height);
+  start_layer = 0;
+  end_layer = intelImage->base.Base.Height - 1;
+   } else if (face > 0) {
+  start_layer = face;
+  end_layer = face;
+   } else {
+  assert(intelImage->base.Base.Depth);
+  start_layer = 0;
+  end_layer = intelImage->base.Base.Depth - 1;
+   }
 
if (!invalidate) {
-  for (int slice = 0; slice < depth; slice++) {
- intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
+  for (unsigned i = start_layer; i <= end_layer; i++) {
+ intel_miptree_copy_slice(brw, dst_mt, src_mt, level, i);
   }
}
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit DB_STENCIL_CONTROL if it has no effect

2017-06-09 Thread Samuel Pitoiset



On 06/09/2017 03:49 PM, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_state.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 53f66ac..a8255f2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1078,21 +1078,22 @@ static void *si_create_dsa_state(struct pipe_context 
*ctx,
if (state->alpha.enabled) {
dsa->alpha_func = state->alpha.func;
  
  		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +

   SI_SGPR_ALPHA_REF * 4, 
fui(state->alpha.ref_value));
} else {
dsa->alpha_func = PIPE_FUNC_ALWAYS;
}
  
  	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);

-   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+   if (state->stencil[0].enabled)
+   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, 
db_stencil_control);


How if stencil is enabled, then disabled? Doesn't this reg has to be set 
to 0?



if (state->depth.bounds_test) {
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 
fui(state->depth.bounds_min));
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 
fui(state->depth.bounds_max));
}
  
  	return dsa;

  }
  
  static void si_bind_dsa_state(struct pipe_context *ctx, void *state)

  {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 52/63] radeonsi: implement ARB_bindless_texture

2017-06-09 Thread Marek Olšák
On Fri, Jun 9, 2017 at 3:35 PM, Samuel Pitoiset
 wrote:
> This implements the Gallium interface. Decompression of resident
> textures/images will follow in the next patches.
>
> v3: - do not unmap bindless descriptors
> - remove unnecessary util_copy_image_view()
> - use READON_USAGE_READWRITE because of the WRITE_DATA packet
> - replace util_dynarray_delete by util_dynarray_delete_unordered
> - fix typo
> v2: - fix a memleak related to util_copy_image_view()
> - remove "texture" parameter from create_texture_handle()
> - store pipe_sampler_view instead of si_sampler_view
> - make use pipe_sampler_view_reference() to fix a refcount issue
> - rename si_resident_descriptor to si_bindless_descriptor
> - use util_dynarray_*
> - add more comments
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 249 
> ++
>  src/gallium/drivers/radeonsi/si_pipe.c|  15 ++
>  src/gallium/drivers/radeonsi/si_pipe.h|  20 +++
>  3 files changed, 284 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 16ffdef95e..6b284f193e 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -60,6 +60,7 @@
>  #include "sid.h"
>  #include "gfx9d.h"
>
> +#include "util/hash_table.h"
>  #include "util/u_format.h"
>  #include "util/u_memory.h"
>  #include "util/u_upload_mgr.h"
> @@ -2121,6 +2122,248 @@ void si_bindless_descriptor_slab_free(void *priv, 
> struct pb_slab *pslab)
> FREE(slab);
>  }
>
> +static struct si_bindless_descriptor *
> +si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
> + unsigned size)
> +{
> +   struct si_screen *sscreen = sctx->screen;
> +   struct si_bindless_descriptor *desc;
> +   struct pb_slab_entry *entry;
> +   void *ptr;
> +
> +   /* Sub-allocate the bindless descriptor from a slab to avoid dealing
> +* with a ton of buffers and for reducing the winsys overhead.
> +*/
> +   entry = pb_slab_alloc(>bindless_descriptor_slabs, 64, 0);
> +   if (!entry)
> +   return NULL;
> +
> +   desc = NULL;
> +   desc = container_of(entry, desc, entry);
> +
> +   /* Upload the descriptor directly in VRAM. Because the slabs are
> +* currently never reclaimed, we don't need to synchronize the
> +* operation.
> +*/
> +   ptr = sscreen->b.ws->buffer_map(desc->buffer->buf, NULL,
> +   PIPE_TRANSFER_WRITE |
> +   PIPE_TRANSFER_UNSYNCHRONIZED);
> +   util_memcpy_cpu_to_le32(ptr + desc->offset, desc_list, size);
> +
> +   return desc;
> +}
> +
> +static uint64_t si_create_texture_handle(struct pipe_context *ctx,
> +struct pipe_sampler_view *view,
> +const struct pipe_sampler_state 
> *state)
> +{
> +   struct si_sampler_view *sview = (struct si_sampler_view *)view;
> +   struct si_context *sctx = (struct si_context *)ctx;
> +   struct si_texture_handle *tex_handle;
> +   struct si_sampler_state *sstate;
> +   uint32_t desc_list[16];
> +   uint64_t handle;
> +
> +   tex_handle = CALLOC_STRUCT(si_texture_handle);
> +   if (!tex_handle)
> +   return 0;
> +
> +   memset(desc_list, 0, sizeof(desc_list));
> +   si_init_descriptor_list(_list[0], 16, 1, 
> null_texture_descriptor);
> +
> +   sstate = ctx->create_sampler_state(ctx, state);
> +   if (!sstate) {
> +   FREE(tex_handle);
> +   return 0;
> +   }
> +
> +   si_set_sampler_view_desc(sctx, sview, sstate, _list[0]);
> +   ctx->delete_sampler_state(ctx, sstate);
> +
> +   tex_handle->desc = si_create_bindless_descriptor(sctx, desc_list,
> +sizeof(desc_list));
> +   if (!tex_handle->desc) {
> +   FREE(tex_handle);
> +   return 0;
> +   }
> +
> +   handle = tex_handle->desc->buffer->gpu_address +
> +tex_handle->desc->offset;
> +
> +   if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
> +tex_handle)) {
> +   pb_slab_free(>bindless_descriptor_slabs,
> +_handle->desc->entry);
> +   FREE(tex_handle);
> +   return 0;
> +   }
> +
> +   pipe_sampler_view_reference(_handle->view, view);
> +
> +   return handle;
> +}
> +
> +static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t 
> handle)
> +{
> +   struct si_context *sctx = (struct si_context *)ctx;
> +   struct si_texture_handle *tex_handle;
> +   struct 

[Mesa-dev] [PATCH] tgsi: clarify TGSI_SEMANTIC_SAMPLEMASK documentation

2017-06-09 Thread Brian Paul
---
 src/gallium/docs/source/tgsi.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index eceaa6d..7fb963f 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3298,8 +3298,9 @@ TGSI_SEMANTIC_SAMPLEMASK
 
 
 For fragment shaders, this semantic label indicates that an output contains
-the sample mask used to disable further sample processing
-(i.e. gl_SampleMask). Only the X value is used, up to 32x MS.
+the sample mask used to disable further sample processing.  The output's
+type is uint[4] but only the X component is used (i.e. gl_SampleMask[0]).
+Each bit corresponds to one sample position (up to 32x MSAA is supported).
 
 TGSI_SEMANTIC_INVOCATIONID
 ""
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101326] gallium/wgl: Allow context creation without prior SetPixelFormat()

2017-06-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101326

--- Comment #3 from Brian Paul  ---
Thanks for the info.  The patch looks good.  I'll push it soon with minor
reformatting.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi: fix missing num_L2_invalidates increment

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8508259..ec564c1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -986,20 +986,21 @@ void si_emit_cache_flush(struct si_context *sctx)
 
/* Ideally flush TC together with CB/DB. */
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
tc_flags |= EVENT_TC_ACTION_ENA |
EVENT_TCL1_ACTION_ENA;
 
/* Clear the flags. */
rctx->flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 |
 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
 SI_CONTEXT_INV_VMEM_L1);
+   sctx->b.num_L2_invalidates++;
}
 
/* Allocate memory for the fence. */
u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
 , (struct pipe_resource**));
va = rbuf->gpu_address + offset;
 
r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
 rbuf, va, 0, 1);
r600_gfx_wait_fence(rctx, va, 1, 0x);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi: don't emit DB_STENCIL_CONTROL if it has no effect

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 53f66ac..a8255f2 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1078,21 +1078,22 @@ static void *si_create_dsa_state(struct pipe_context 
*ctx,
if (state->alpha.enabled) {
dsa->alpha_func = state->alpha.func;
 
si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
   SI_SGPR_ALPHA_REF * 4, 
fui(state->alpha.ref_value));
} else {
dsa->alpha_func = PIPE_FUNC_ALWAYS;
}
 
si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
-   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+   if (state->stencil[0].enabled)
+   si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, 
db_stencil_control);
if (state->depth.bounds_test) {
si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 
fui(state->depth.bounds_min));
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 
fui(state->depth.bounds_max));
}
 
return dsa;
 }
 
 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallium/noop: fix sampler views

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/noop/noop_state.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/noop/noop_state.c 
b/src/gallium/drivers/noop/noop_state.c
index 46d99ab..80cfae8 100644
--- a/src/gallium/drivers/noop/noop_state.c
+++ b/src/gallium/drivers/noop/noop_state.c
@@ -69,21 +69,24 @@ static void *noop_create_sampler_state(struct pipe_context 
*ctx,
 }
 
 static struct pipe_sampler_view *noop_create_sampler_view(struct pipe_context 
*ctx,
   struct pipe_resource 
*texture,
   const struct 
pipe_sampler_view *state)
 {
struct pipe_sampler_view *sampler_view = CALLOC_STRUCT(pipe_sampler_view);
 
if (!sampler_view)
   return NULL;
+
/* initialize base object */
+   *sampler_view = *state;
+   sampler_view->texture = NULL;
pipe_resource_reference(_view->texture, texture);
pipe_reference_init(_view->reference, 1);
sampler_view->context = ctx;
return sampler_view;
 }
 
 static struct pipe_surface *noop_create_surface(struct pipe_context *ctx,
 struct pipe_resource *texture,
 const struct pipe_surface 
*surf_tmpl)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radeonsi: get rid of more compressed_colortex_mask names

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_blit.c|  4 ++--
 src/gallium/drivers/radeonsi/si_descriptors.c | 28 +--
 src/gallium/drivers/radeonsi/si_pipe.h|  2 +-
 src/gallium/drivers/radeonsi/si_state.h   |  2 +-
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 74bc2e9..524b20a 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -648,25 +648,25 @@ static void si_decompress_textures(struct si_context 
*sctx, unsigned shader_mask
 {
unsigned compressed_colortex_counter, mask;
 
if (sctx->blitter->running)
return;
 
/* Update the compressed_colortex_mask if necessary. */
compressed_colortex_counter = 
p_atomic_read(>screen->b.compressed_colortex_counter);
if (compressed_colortex_counter != 
sctx->b.last_compressed_colortex_counter) {
sctx->b.last_compressed_colortex_counter = 
compressed_colortex_counter;
-   si_update_compressed_colortex_masks(sctx);
+   si_update_needs_color_decompress_masks(sctx);
}
 
/* Decompress color & depth textures if needed. */
-   mask = sctx->compressed_tex_shader_mask & shader_mask;
+   mask = sctx->shader_needs_decompress_mask & shader_mask;
while (mask) {
unsigned i = u_bit_scan();
 
if (sctx->samplers[i].needs_depth_decompress_mask) {
si_decompress_sampler_depth_textures(sctx, 
>samplers[i]);
}
if (sctx->samplers[i].needs_color_decompress_mask) {
si_decompress_sampler_color_textures(sctx, 
>samplers[i]);
}
if (sctx->images[i].needs_color_decompress_mask) {
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0e8606f..b04d108 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -560,32 +560,32 @@ static bool color_needs_decompression(struct r600_texture 
*rtex)
 }
 
 static bool depth_needs_decompression(struct r600_texture *rtex,
  struct si_sampler_view *sview)
 {
return rtex->db_compatible &&
   (!rtex->tc_compatible_htile ||
!r600_can_sample_zs(rtex, sview->is_stencil_sampler));
 }
 
-static void si_update_compressed_tex_shader_mask(struct si_context *sctx,
-unsigned shader)
+static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
+  unsigned shader)
 {
struct si_textures_info *samplers = >samplers[shader];
unsigned shader_bit = 1 << shader;
 
if (samplers->needs_depth_decompress_mask ||
samplers->needs_color_decompress_mask ||
sctx->images[shader].needs_color_decompress_mask)
-   sctx->compressed_tex_shader_mask |= shader_bit;
+   sctx->shader_needs_decompress_mask |= shader_bit;
else
-   sctx->compressed_tex_shader_mask &= ~shader_bit;
+   sctx->shader_needs_decompress_mask &= ~shader_bit;
 }
 
 static void si_set_sampler_views(struct pipe_context *ctx,
 enum pipe_shader_type shader, unsigned start,
  unsigned count,
 struct pipe_sampler_view **views)
 {
struct si_context *sctx = (struct si_context *)ctx;
struct si_textures_info *samplers = >samplers[shader];
int i;
@@ -623,25 +623,25 @@ static void si_set_sampler_views(struct pipe_context *ctx,
 
if (rtex->dcc_offset &&
p_atomic_read(>framebuffers_bound))
sctx->need_check_render_feedback = true;
} else {
samplers->needs_depth_decompress_mask &= ~(1u << slot);
samplers->needs_color_decompress_mask &= ~(1u << slot);
}
}
 
-   si_update_compressed_tex_shader_mask(sctx, shader);
+   si_update_shader_needs_decompress_mask(sctx, shader);
 }
 
 static void
-si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
+si_samplers_update_needs_color_decompress_mask(struct si_textures_info 
*samplers)
 {
unsigned mask = samplers->views.enabled_mask;
 
while (mask) {
int i = u_bit_scan();
struct pipe_resource *res = samplers->views.views[i]->texture;
 
if (res && res->target != PIPE_BUFFER) {
struct r600_texture *rtex = (struct r600_texture *)res;
 
@@ -847,25 +847,25 @@ si_set_shader_images(struct pipe_context *pipe,

[Mesa-dev] [PATCH] st/mesa: call check_program_state only when needed

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/mesa/state_tracker/st_atom.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index cbac762..bcfbcf8 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -109,21 +109,20 @@ static void check_program_state( struct st_context *st )
}
 
if (unlikely(new_fp != _fp->Base)) {
   if (old_fp)
  dirty |= old_fp->affected_states;
   if (new_fp)
  dirty |= st_fragment_program(new_fp)->affected_states;
}
 
st->dirty |= dirty;
-   st->gfx_shaders_may_be_dirty = false;
 }
 
 static void check_attrib_edgeflag(struct st_context *st)
 {
const struct gl_vertex_array **arrays = st->ctx->Array._DrawArrays;
GLboolean vertdata_edgeflags, edgeflag_culls_prims, edgeflags_enabled;
struct gl_program *vp = st->ctx->VertexProgram._Current;
 
if (!arrays)
   return;
@@ -164,21 +163,25 @@ void st_validate_state( struct st_context *st, enum 
st_pipeline pipeline )
 */
st->dirty |= ctx->NewDriverState & st->active_states & ST_ALL_STATES_MASK;
ctx->NewDriverState = 0;
 
/* Get pipeline state. */
switch (pipeline) {
case ST_PIPELINE_RENDER:
   if (st->ctx->API == API_OPENGL_COMPAT)
  check_attrib_edgeflag(st);
 
-  check_program_state(st);
+  if (st->gfx_shaders_may_be_dirty) {
+ check_program_state(st);
+ st->gfx_shaders_may_be_dirty = false;
+  }
+
   st_manager_validate_framebuffers(st);
 
   pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK;
   break;
 
case ST_PIPELINE_CLEAR:
   st_manager_validate_framebuffers(st);
   pipeline_mask = ST_PIPELINE_CLEAR_STATE_MASK;
   break;
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium/docs: clarify gen_name/get_vendor/get_device_vendor behavior

2017-06-09 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/docs/source/screen.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 852c31b..288fb5c 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -617,31 +617,40 @@ no matter which hint they got.
 Methods
 ---
 
 XXX to-do
 
 get_name
 
 
 Returns an identifying name for the screen.
 
+The returned string should remain valid and immutable for the lifetime of
+pipe_screen.
+
 get_vendor
 ^^
 
 Returns the screen vendor.
 
+The returned string should remain valid and immutable for the lifetime of
+pipe_screen.
+
 get_device_vendor
 ^
 
 Returns the actual vendor of the device driving the screen
 (as opposed to the driver vendor).
 
+The returned string should remain valid and immutable for the lifetime of
+pipe_screen.
+
 .. _get_param:
 
 get_param
 ^
 
 Get an integer/boolean screen parameter.
 
 **param** is one of the :ref:`PIPE_CAP` names.
 
 .. _get_paramf:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 63/63] radeonsi: enable ARB_bindless_texture

2017-06-09 Thread Samuel Pitoiset
This has only been tested on RX480.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 docs/features.txt  | 2 +-
 docs/relnotes/17.2.0.html  | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c | 4 +++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 0e69b9177e..79b71de543 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -277,7 +277,7 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
 
 Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES 
version:
 
-  GL_ARB_bindless_texture   started (airlied)
+  GL_ARB_bindless_texture   DONE (radeonsi)
   GL_ARB_cl_event   not started
   GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
   GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
diff --git a/docs/relnotes/17.2.0.html b/docs/relnotes/17.2.0.html
index 135d1e81dd..5859274d12 100644
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+GL_ARB_bindless_texture on radeonsi
 GL_ARB_post_depth_coverage on nvc0 (GM200+)
 GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)
 GL_AMD_vertex_shader_layer on nvc0 (GM200+)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 37d648459d..9f6e3c26f4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -482,6 +482,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_DOUBLES:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+   case PIPE_CAP_BINDLESS_TEXTURE:
+   return 1;
+
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_CLOCK:
@@ -564,7 +567,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
-   case PIPE_CAP_BINDLESS_TEXTURE:
return 0;
 
case PIPE_CAP_QUERY_BUFFER_OBJECT:
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 62/63] radeonsi: add support for loading bindless images

2017-06-09 Thread Samuel Pitoiset
v2: - removed use of the Bindless flags after rebasing
- make use of tgsi_is_bindless_image_file()

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 28 +--
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 00f0a41874..e72052c445 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -184,8 +184,6 @@ image_fetch_rsrc(
LLVMValueRef index;
bool dcc_off = is_store;
 
-   assert(image->Register.File == TGSI_FILE_IMAGE);
-
if (!image->Register.Indirect) {
const struct tgsi_shader_info *info = bld_base->info;
unsigned images_writemask = info->images_store |
@@ -214,6 +212,18 @@ image_fetch_rsrc(
 index, "");
}
 
+   if (image->Register.File != TGSI_FILE_IMAGE) {
+   struct gallivm_state *gallivm = >gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+
+   LLVMValueRef ptr =
+   lp_build_emit_fetch_src(bld_base, image,
+   TGSI_TYPE_UNSIGNED64, 0);
+   rsrc_ptr = LLVMBuildIntToPtr(builder, ptr,
+si_const_array(ctx->v8i32, 0), "");
+   index = LLVMConstInt(ctx->i32, 0, 0);
+   }
+
*rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
if (dcc_off && target != TGSI_TEXTURE_BUFFER)
*rsrc = force_dcc_off(ctx, *rsrc);
@@ -373,7 +383,8 @@ static void load_fetch_args(
 
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
   offset, false, false);
-   } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
+   } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
+  tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
LLVMValueRef coords;
 
image_fetch_rsrc(bld_base, >Src[0], false, target, );
@@ -537,8 +548,9 @@ static bool is_oneway_access_only(const struct 
tgsi_full_instruction *inst,
 * images.
 */
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
-   (inst->Src[0].Register.File == TGSI_FILE_IMAGE &&
-inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
+   (inst->Memory.Texture == TGSI_TEXTURE_BUFFER &&
+(inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
+ tgsi_is_bindless_image_file(inst->Src[0].Register.File {
if (!shader_buffers_reverse_access_mask &&
!(info->images_buffers & images_reverse_access_mask))
return true;
@@ -639,7 +651,8 @@ static void store_fetch_args(
 
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
   offset, false, false);
-   } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
+   } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
+  tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
 
@@ -858,7 +871,8 @@ static void atomic_fetch_args(
 
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
   offset, true, false);
-   } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
+   } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
+  tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
unsigned target = inst->Memory.Texture;
LLVMValueRef coords;
 
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 60/63] radeonsi: invalidate buffers which are made resident if needed

2017-06-09 Thread Samuel Pitoiset
When a buffer becomes resident, check if it has been invalidated,
if so update the descriptor and the dirty flag.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 34 +++
 1 file changed, 34 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 345eab56e6..142eedcd7a 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2263,6 +2263,32 @@ si_create_bindless_descriptor(struct si_context *sctx, 
uint32_t *desc_list,
return desc;
 }
 
+static void si_invalidate_bindless_buf_desc(struct si_context *sctx,
+   struct si_bindless_descriptor *desc,
+   struct pipe_resource *resource,
+   uint64_t offset)
+{
+   struct r600_resource *buf = r600_resource(resource);
+   uint32_t *desc_list = desc->desc_list;
+   uint64_t old_desc_va;
+
+   assert(resource->target == PIPE_BUFFER);
+
+   /* Retrieve the old buffer addr from the descriptor. */
+   old_desc_va  = desc_list[0];
+   old_desc_va |= ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc_list[1]) << 32);
+
+   if (old_desc_va != buf->gpu_address + offset) {
+   /* The buffer has been invalidated when the handle wasn't
+* resident, update the descriptor and the dirty flag.
+*/
+   si_set_buf_desc_address(buf, offset, _list[4]);
+
+   desc->dirty = true;
+   sctx->bindless_descriptors_dirty = true;
+   }
+}
+
 static uint64_t si_create_texture_handle(struct pipe_context *ctx,
 struct pipe_sampler_view *view,
 const struct pipe_sampler_state *state)
@@ -2362,6 +2388,10 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,
if (rtex->dcc_offset &&
p_atomic_read(>framebuffers_bound))
sctx->need_check_render_feedback = true;
+   } else {
+   si_invalidate_bindless_buf_desc(sctx, tex_handle->desc,
+   sview->base.texture,
+   
sview->base.u.buf.offset);
}
 
/* Add the texture handle to the per-context list. */
@@ -2481,6 +2511,10 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,
if (vi_dcc_enabled(rtex, level) &&
p_atomic_read(>framebuffers_bound))
sctx->need_check_render_feedback = true;
+   } else {
+   si_invalidate_bindless_buf_desc(sctx, img_handle->desc,
+   view->resource,
+   view->u.buf.offset);
}
 
/* Add the image handle to the per-context list. */
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 48/63] radeonsi: add si_init_descriptor_list() helper

2017-06-09 Thread Samuel Pitoiset
This will be used in order to initialize resident descriptors
for bindless textures/images.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0e8606f2bb..8fb6108b67 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -95,6 +95,21 @@ static uint32_t null_image_descriptor[8] = {
 * descriptor */
 };
 
+static void si_init_descriptor_list(uint32_t *desc_list,
+   unsigned element_dw_size,
+   unsigned num_elements,
+   const uint32_t *null_descriptor)
+{
+   int i;
+
+   /* Initialize the array to NULL descriptors if the element size is 8. */
+   if (null_descriptor) {
+   assert(element_dw_size % 8 == 0);
+   for (i = 0; i < num_elements * element_dw_size / 8; i++)
+   memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
+   }
+}
+
 static void si_init_descriptors(struct si_context *sctx,
struct si_descriptors *desc,
unsigned shader_userdata_index,
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 61/63] radeonsi: add support for loading bindless samplers

2017-06-09 Thread Samuel Pitoiset
v2: - removed use of the Bindless flags after rebasing

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index c53bde1e6d..00f0a41874 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1206,6 +1206,18 @@ static void tex_fetch_ptrs(
 si_get_sampler_slot(reg->Register.Index), 
0);
}
 
+   if (reg->Register.File != TGSI_FILE_SAMPLER) {
+   struct gallivm_state *gallivm = >gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+
+   LLVMValueRef ptr =
+   lp_build_emit_fetch_src(bld_base, reg,
+   TGSI_TYPE_UNSIGNED64, 0);
+   list = LLVMBuildIntToPtr(builder, ptr,
+si_const_array(ctx->v8i32, 0), "");
+   index = LLVMConstInt(ctx->i32, 0, 0);
+   }
+
if (target == TGSI_TEXTURE_BUFFER)
*res_ptr = load_sampler_desc(ctx, list, index, DESC_BUFFER);
else
@@ -1783,9 +1795,6 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
/* The hardware needs special lowering for Gather4 with integer 
formats. */
if (ctx->screen->b.chip_class <= VI &&
opcode == TGSI_OPCODE_TG4) {
-   const unsigned src_idx = 2;
-
-   assert(inst->Src[src_idx].Register.File == TGSI_FILE_SAMPLER);
assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
 
if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium: fixed modulo zero crashes in tgsi interpreter (v2)

2017-06-09 Thread Marius Gräfe
softpipe throws integer division by zero exceptions on windows
when using % with integers in a geometry shader.

v2: Made error results consistent with existing div/mod zero handling in
tgsi. 64 bit signed integer division by zero returns zero like in
micro_idiv, unsigned returns ~0u like in micro_udiv.
Modulo operations always set all result bits to one (like in
micro_umod).
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 40 +-
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c41954c..97c75e9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -846,40 +846,40 @@ static void
 micro_u64div(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->u64[0] = src[0].u64[0] / src[1].u64[0];
-   dst->u64[1] = src[0].u64[1] / src[1].u64[1];
-   dst->u64[2] = src[0].u64[2] / src[1].u64[2];
-   dst->u64[3] = src[0].u64[3] / src[1].u64[3];
+   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
+   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
+   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
+   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
 }
 
 static void
 micro_i64div(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->i64[0] = src[0].i64[0] / src[1].i64[0];
-   dst->i64[1] = src[0].i64[1] / src[1].i64[1];
-   dst->i64[2] = src[0].i64[2] / src[1].i64[2];
-   dst->i64[3] = src[0].i64[3] / src[1].i64[3];
+   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
+   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
+   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
+   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
 }
 
 static void
 micro_u64mod(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->u64[0] = src[0].u64[0] % src[1].u64[0];
-   dst->u64[1] = src[0].u64[1] % src[1].u64[1];
-   dst->u64[2] = src[0].u64[2] % src[1].u64[2];
-   dst->u64[3] = src[0].u64[3] % src[1].u64[3];
+   dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
+   dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
+   dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
+   dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
 }
 
 static void
 micro_i64mod(union tgsi_double_channel *dst,
  const union tgsi_double_channel *src)
 {
-   dst->i64[0] = src[0].i64[0] % src[1].i64[0];
-   dst->i64[1] = src[0].i64[1] % src[1].i64[1];
-   dst->i64[2] = src[0].i64[2] % src[1].i64[2];
-   dst->i64[3] = src[0].i64[3] % src[1].i64[3];
+   dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
+   dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
+   dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
+   dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
 }
 
 static void
@@ -4653,10 +4653,10 @@ micro_mod(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src0,
   const union tgsi_exec_channel *src1)
 {
-   dst->i[0] = src0->i[0] % src1->i[0];
-   dst->i[1] = src0->i[1] % src1->i[1];
-   dst->i[2] = src0->i[2] % src1->i[2];
-   dst->i[3] = src0->i[3] % src1->i[3];
+   dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
+   dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
+   dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
+   dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
 }
 
 static void
-- 
2.9.2.windows.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 53/63] radeonsi: add all resident buffers to the current CS

2017-06-09 Thread Samuel Pitoiset
Resident buffers have to be added to every new command stream.
Though, this could be slightly improved when current shaders
don't use any bindless textures/images but usually applications
tend to use bindless for almost every draw call, and the winsys
thread might help when buffers are added early.

v3: - use RADEON_USAGE_READWRITE because of WRITE_DATA packet
v2: - store pipe_sampler_view instead of si_sampler_view

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 50 +++
 src/gallium/drivers/radeonsi/si_hw_context.c  |  1 +
 src/gallium/drivers/radeonsi/si_state.h   |  1 +
 3 files changed, 52 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 6b284f193e..559a1c27b5 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2364,6 +2364,56 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,
 }
 
 
+void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
+{
+   unsigned num_resident_tex_handles, num_resident_img_handles;
+   unsigned num_bindless_descriptors;
+   unsigned i;
+
+   num_resident_tex_handles = sctx->resident_tex_handles.size /
+  sizeof(struct si_texture_handle *);
+   num_resident_img_handles = sctx->resident_img_handles.size /
+  sizeof(struct si_image_handle *);
+   num_bindless_descriptors = sctx->bindless_descriptors.size /
+  sizeof(struct r600_resource *);
+
+   /* Add all bindless descriptors. */
+   for (i = 0; i < num_bindless_descriptors; i++) {
+   struct r600_resource *desc =
+   *util_dynarray_element(>bindless_descriptors,
+  struct r600_resource *, i);
+
+   radeon_add_to_buffer_list(>b, >b.gfx, desc,
+ RADEON_USAGE_READWRITE,
+ RADEON_PRIO_DESCRIPTORS);
+   }
+
+   /* Add all resident texture handles. */
+   for (i = 0; i < num_resident_tex_handles; i++) {
+   struct si_texture_handle *tex_handle =
+   *util_dynarray_element(>resident_tex_handles,
+  struct si_texture_handle *, i);
+   struct si_sampler_view *sview =
+   (struct si_sampler_view *)tex_handle->view;
+
+   si_sampler_view_add_buffer(sctx, sview->base.texture,
+  RADEON_USAGE_READ,
+  sview->is_stencil_sampler, false);
+   }
+
+   /* Add all resident image handles. */
+   for (i = 0; i < num_resident_img_handles; i++) {
+   struct si_image_handle *img_handle =
+   *util_dynarray_element(>resident_img_handles,
+  struct si_image_handle *, i);
+   struct pipe_image_view *view = _handle->view;
+
+   si_sampler_view_add_buffer(sctx, view->resource,
+  RADEON_USAGE_READWRITE,
+  false, false);
+   }
+}
+
 /* INIT/DEINIT/UPLOAD */
 
 /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index 92c09cb633..345825af00 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -235,6 +235,7 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, >b.streamout.enable_atom);
si_mark_atom_dirty(ctx, >b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
+   si_all_resident_buffers_begin_new_cs(ctx);
 
ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 612c49fb0e..657c101dc1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -324,6 +324,7 @@ bool si_upload_graphics_shader_descriptors(struct 
si_context *sctx);
 bool si_upload_compute_shader_descriptors(struct si_context *sctx);
 void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
+void si_all_resident_buffers_begin_new_cs(struct si_context *sctx);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource 
**rbuffer,
const uint8_t *ptr, unsigned size, uint32_t 
*const_offset);
 void 

[Mesa-dev] [PATCH v3 49/63] radeonsi: add si_set_sampler_view_desc() helper

2017-06-09 Thread Samuel Pitoiset
To share some common code between bound and bindless textures.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 95 +++
 1 file changed, 52 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8fb6108b67..a6e148d84a 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -480,6 +480,54 @@ void si_set_mutable_tex_desc_fields(struct si_screen 
*sscreen,
}
 }
 
+static void si_set_sampler_view_desc(struct si_context *sctx,
+struct si_sampler_view *sview,
+struct si_sampler_state *sstate,
+uint32_t *desc)
+{
+   struct pipe_sampler_view *view = >base;
+   struct r600_texture *rtex = (struct r600_texture *)view->texture;
+   bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
+
+   if (unlikely(!is_buffer && sview->dcc_incompatible)) {
+   if (vi_dcc_enabled(rtex, view->u.tex.first_level))
+   if (!r600_texture_disable_dcc(>b, rtex))
+   sctx->b.decompress_dcc(>b.b, rtex);
+
+   sview->dcc_incompatible = false;
+   }
+
+   assert(rtex); /* views with texture == NULL aren't supported */
+   memcpy(desc, sview->state, 8*4);
+
+   if (is_buffer) {
+   si_set_buf_desc_address(>resource,
+   sview->base.u.buf.offset,
+   desc + 4);
+   } else {
+   bool is_separate_stencil = rtex->db_compatible &&
+  sview->is_stencil_sampler;
+
+   si_set_mutable_tex_desc_fields(sctx->screen, rtex,
+  sview->base_level_info,
+  sview->base_level,
+  sview->base.u.tex.first_level,
+  sview->block_width,
+  is_separate_stencil,
+  desc);
+   }
+
+   if (!is_buffer && rtex->fmask.size) {
+   memcpy(desc + 8, sview->fmask_state, 8*4);
+   } else {
+   /* Disable FMASK and bind sampler state in [12:15]. */
+   memcpy(desc + 8, null_texture_descriptor, 4*4);
+
+   if (sstate)
+   memcpy(desc + 12, sstate->val, 4*4);
+   }
+}
+
 static void si_set_sampler_view(struct si_context *sctx,
unsigned shader,
unsigned slot, struct pipe_sampler_view *view,
@@ -496,53 +544,14 @@ static void si_set_sampler_view(struct si_context *sctx,
 
if (view) {
struct r600_texture *rtex = (struct r600_texture 
*)view->texture;
-   bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
-
-   if (unlikely(!is_buffer && rview->dcc_incompatible)) {
-   if (vi_dcc_enabled(rtex, view->u.tex.first_level))
-   if (!r600_texture_disable_dcc(>b, rtex))
-   sctx->b.decompress_dcc(>b.b, 
rtex);
 
-   rview->dcc_incompatible = false;
-   }
+   si_set_sampler_view_desc(sctx, rview,
+views->sampler_states[slot], desc);
 
-   assert(rtex); /* views with texture == NULL aren't supported */
-   pipe_sampler_view_reference(>views[slot], view);
-   memcpy(desc, rview->state, 8*4);
-
-   if (is_buffer) {
+   if (rtex->resource.b.b.target == PIPE_BUFFER)
rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
 
-   si_set_buf_desc_address(>resource,
-   view->u.buf.offset,
-   desc + 4);
-   } else {
-   bool is_separate_stencil =
-   rtex->db_compatible &&
-   rview->is_stencil_sampler;
-
-   si_set_mutable_tex_desc_fields(sctx->screen, rtex,
-  rview->base_level_info,
-  rview->base_level,
-  
rview->base.u.tex.first_level,
-  rview->block_width,
-  is_separate_stencil,
-  desc);
-   }
-
-  

[Mesa-dev] [PATCH v3 59/63] radeonsi: upload new descriptors when resident buffers are invalidated

2017-06-09 Thread Samuel Pitoiset
When texture buffers are invalidated the addr in the resident
descriptor has to be updated but we can't create a new descriptor
because the resident handle has to be the same.

Instead, use the WRITE_DATA packet which allows to update memory
directly but graphics/compute have to be idle in case the GPU is
reading the descriptor.

v3: - use the ME engine for the WRITE_DATA packet
- sctx->b.flags cosmetic change
v2: - store pipe_sampler_view instead of si_sampler_view

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeon/r600_pipe_common.h |   4 +
 src/gallium/drivers/radeonsi/si_descriptors.c | 145 ++
 src/gallium/drivers/radeonsi/si_pipe.h|   3 +
 3 files changed, 152 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 84d38fb108..ba8473cbd9 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -181,6 +181,10 @@ struct r600_resource {
 
/* Whether the resource has been exported via resource_get_handle. */
unsignedexternal_usage; /* PIPE_HANDLE_USAGE_* 
*/
+
+   /* Whether this resource is referenced by bindless handles. */
+   booltexture_handle_allocated;
+   boolimage_handle_allocated;
 };
 
 struct r600_transfer {
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 6ab9b77e54..345eab56e6 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1848,6 +1848,67 @@ static void si_rebind_buffer(struct pipe_context *ctx, 
struct pipe_resource *buf
}
}
}
+
+   /* Bindless texture handles */
+   if (rbuffer->texture_handle_allocated) {
+   unsigned num_resident_tex_handles;
+
+   num_resident_tex_handles = sctx->resident_tex_handles.size /
+  sizeof(struct si_texture_handle *);
+
+   for (i = 0; i < num_resident_tex_handles; i++) {
+   struct si_texture_handle *tex_handle =
+   
*util_dynarray_element(>resident_tex_handles,
+  struct si_texture_handle 
*, i);
+   struct pipe_sampler_view *view = tex_handle->view;
+   struct si_bindless_descriptor *desc = tex_handle->desc;
+
+   if (view->texture == buf) {
+   si_set_buf_desc_address(rbuffer,
+   view->u.buf.offset,
+   >desc_list[4]);
+   desc->dirty = true;
+   sctx->bindless_descriptors_dirty = true;
+
+   radeon_add_to_buffer_list_check_mem(
+   >b, >b.gfx, rbuffer,
+   RADEON_USAGE_READ,
+   RADEON_PRIO_SAMPLER_BUFFER, true);
+   }
+   }
+   }
+
+   /* Bindless image handles */
+   if (rbuffer->image_handle_allocated) {
+   unsigned num_resident_img_handles;
+
+   num_resident_img_handles = sctx->resident_img_handles.size /
+  sizeof(struct si_image_handle *);
+
+   for (i = 0; i < num_resident_img_handles; i++) {
+   struct si_image_handle *img_handle =
+   
*util_dynarray_element(>resident_img_handles,
+  struct si_image_handle 
*, i);
+   struct pipe_image_view *view = _handle->view;
+   struct si_bindless_descriptor *desc = img_handle->desc;
+
+   if (view->resource == buf) {
+   if (view->access & PIPE_IMAGE_ACCESS_WRITE)
+   si_mark_image_range_valid(view);
+
+   si_set_buf_desc_address(rbuffer,
+   view->u.buf.offset,
+   >desc_list[4]);
+   desc->dirty = true;
+   sctx->bindless_descriptors_dirty = true;
+
+   radeon_add_to_buffer_list_check_mem(
+   >b, >b.gfx, rbuffer,
+   RADEON_USAGE_READWRITE,
+   RADEON_PRIO_SAMPLER_BUFFER, true);
+   }
+   }
+   }
 }
 
 /* 

[Mesa-dev] [PATCH v3 56/63] radeonsi: decompress resident textures/images before graphics/compute

2017-06-09 Thread Samuel Pitoiset
Similar to the existing decompression code path except that it
loops over the list of resident textures/images.

v3: - move the blitter check back to si_decompress_textures()
- decompress resident textures directly in si_decompress_textures()
- depth_texture -> needs_depth_decompress
- compressed_colortex -> need_color_decompress
- rebase on top of marek's cleanup
v2: - store pipe_sampler_view instead of si_sampler_view

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_blit.c| 65 +++
 src/gallium/drivers/radeonsi/si_descriptors.c | 52 +
 src/gallium/drivers/radeonsi/si_pipe.h|  3 ++
 3 files changed, 120 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 2740d040df..8b77242759 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -22,6 +22,7 @@
  */
 
 #include "si_pipe.h"
+#include "si_compute.h"
 #include "util/u_format.h"
 #include "util/u_surface.h"
 
@@ -706,6 +707,67 @@ static void si_check_render_feedback(struct si_context 
*sctx)
sctx->need_check_render_feedback = false;
 }
 
+static void si_decompress_resident_textures(struct si_context *sctx)
+{
+   unsigned num_resident_tex_handles;
+   unsigned i;
+
+   num_resident_tex_handles = sctx->resident_tex_handles.size /
+  sizeof(struct si_texture_handle *);
+
+   for (i = 0; i < num_resident_tex_handles; i++) {
+   struct si_texture_handle *tex_handle =
+   *util_dynarray_element(>resident_tex_handles,
+  struct si_texture_handle *, i);
+   struct pipe_sampler_view *view = tex_handle->view;
+   struct si_sampler_view *sview = (struct si_sampler_view *)view;
+   struct r600_texture *tex;
+
+   assert(view);
+   tex = (struct r600_texture *)view->texture;
+
+   if (view->texture->target == PIPE_BUFFER)
+   continue;
+
+   if (tex_handle->needs_color_decompress)
+   si_decompress_color_texture(sctx, tex, 
view->u.tex.first_level,
+   view->u.tex.last_level);
+
+   if (tex_handle->needs_depth_decompress)
+   si_decompress_depth(sctx, tex,
+   sview->is_stencil_sampler ? PIPE_MASK_S : 
PIPE_MASK_Z,
+   view->u.tex.first_level, view->u.tex.last_level,
+   0, util_max_layer(>resource.b.b, 
view->u.tex.first_level));
+   }
+}
+
+static void si_decompress_resident_images(struct si_context *sctx)
+{
+   unsigned num_resident_img_handles;
+   unsigned i;
+
+   num_resident_img_handles = sctx->resident_img_handles.size /
+  sizeof(struct si_image_handle *);
+
+   for (i = 0; i < num_resident_img_handles; i++) {
+   struct si_image_handle *img_handle =
+   *util_dynarray_element(>resident_img_handles,
+  struct si_image_handle *, i);
+   struct pipe_image_view *view = _handle->view;
+   struct r600_texture *tex;
+
+   assert(view);
+   tex = (struct r600_texture *)view->resource;
+
+   if (view->resource->target == PIPE_BUFFER)
+   continue;
+
+   if (img_handle->needs_color_decompress)
+   si_decompress_color_texture(sctx, tex, 
view->u.tex.level,
+   view->u.tex.level);
+   }
+}
+
 static void si_decompress_textures(struct si_context *sctx, unsigned 
shader_mask)
 {
unsigned compressed_colortex_counter, mask;
@@ -736,6 +798,9 @@ static void si_decompress_textures(struct si_context *sctx, 
unsigned shader_mask
}
}
 
+   si_decompress_resident_textures(sctx);
+   si_decompress_resident_images(sctx);
+
si_check_render_feedback(sctx);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 980448181c..6ab9b77e54 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1615,6 +1615,48 @@ static void si_set_polygon_stipple(struct pipe_context 
*ctx,
 
 /* TEXTURE METADATA ENABLE/DISABLE */
 
+static void
+si_resident_handles_update_compressed_colortex(struct si_context *sctx)
+{
+   unsigned num_resident_tex_handles, num_resident_img_handles;
+   unsigned i;
+
+   num_resident_tex_handles = sctx->resident_tex_handles.size /
+  sizeof(struct si_texture_handle *);
+
+   for (i 

[Mesa-dev] [PATCH v3 46/63] st/mesa: disable per-context seamless cubemap when using texture handles

2017-06-09 Thread Samuel Pitoiset
The ARB_bindless_texture spec say:

   "If ARB_seamless_cubemap (or OpenGL 4.0, which includes it) is
supported, the per-context seamless cubemap enable is ignored
and treated as disabled when using texture handles."

   "If AMD_seamless_cubemap_per_texture is supported, the seamless
cube map texture parameter of the underlying texture does apply
when texture handles are used."

The per-context seamless cubemap flag should only be enabled for
bound textures/samplers.

v2: - do not rely on HandleAllocated

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
Reviewed-by: Nicolai Hähnle  (v2)
---
 src/mesa/state_tracker/st_atom_sampler.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c 
b/src/mesa/state_tracker/st_atom_sampler.c
index c6d992fbb0..8c121d5407 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -135,7 +135,6 @@ st_convert_sampler(const struct st_context *st,
const struct gl_sampler_object *msamp,
struct pipe_sampler_state *sampler)
 {
-   struct gl_context *ctx = st->ctx;
GLenum texBaseFormat;
 
texBaseFormat = _mesa_texture_base_format(texobj);
@@ -226,8 +225,11 @@ st_convert_sampler(const struct st_context *st,
   sampler->compare_func = st_compare_func_to_pipe(msamp->CompareFunc);
}
 
-   sampler->seamless_cube_map =
-  ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless;
+   /* Only set the seamless cube map texture parameter because the per-context
+* enable should be ignored and treated as disabled when using texture
+* handles, as specified by ARB_bindless_texture.
+*/
+   sampler->seamless_cube_map = msamp->CubeMapSeamless;
 }
 
 /**
@@ -250,6 +252,7 @@ st_convert_sampler_from_unit(const struct st_context *st,
st_convert_sampler(st, texobj, msamp, sampler);
 
sampler->lod_bias += ctx->Texture.Unit[texUnit].LodBias;
+   sampler->seamless_cube_map |= ctx->Texture.CubeMapSeamless;
 }
 
 
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 54/63] radeonsi: only add descriptors in presence of resident handles

2017-06-09 Thread Samuel Pitoiset
This won't help much except for applications that use a ton
of resident handles. Though, this will reduce the winsys
overhead a little bit.

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 559a1c27b5..0cdccdfed1 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2374,6 +2374,12 @@ void si_all_resident_buffers_begin_new_cs(struct 
si_context *sctx)
   sizeof(struct si_texture_handle *);
num_resident_img_handles = sctx->resident_img_handles.size /
   sizeof(struct si_image_handle *);
+
+   /* Skip adding the bindless descriptors when no handles are resident.
+*/
+   if (!num_resident_tex_handles && !num_resident_img_handles)
+   return;
+
num_bindless_descriptors = sctx->bindless_descriptors.size /
   sizeof(struct r600_resource *);
 
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 58/63] radeonsi: only decompress resident textures/images when used

2017-06-09 Thread Samuel Pitoiset
When the current bound shaders don't use any bindless textures
or images, it's useless to decompress the resident resources.

v3: - rebased and updated check for compute

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_blit.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 8b77242759..70259b80c8 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -798,8 +798,14 @@ static void si_decompress_textures(struct si_context 
*sctx, unsigned shader_mask
}
}
 
-   si_decompress_resident_textures(sctx);
-   si_decompress_resident_images(sctx);
+   if (si_graphics_uses_bindless_samplers(sctx) ||
+   ((shader_mask & (1 << PIPE_SHADER_COMPUTE) &&
+si_compute_uses_bindless_samplers(sctx
+   si_decompress_resident_textures(sctx);
+   if (si_graphics_uses_bindless_images(sctx) ||
+   ((shader_mask & (1 << PIPE_SHADER_COMPUTE) &&
+si_compute_uses_bindless_images(sctx
+   si_decompress_resident_images(sctx);
 
si_check_render_feedback(sctx);
 }
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 57/63] radeonsi: track use of bindless samplers/images from tgsi_shader_info

2017-06-09 Thread Samuel Pitoiset
This adds some new helper functions to know if the current draw
call (or dispatch compute) is using bindless samplers/images,
based on TGSI analysis.

v3: - add si_context::uses_bindless_{samplers,images}
- add si_bind_shader_common() to limit code duplication

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_compute.c   |  2 ++
 src/gallium/drivers/radeonsi/si_compute.h   | 14 +++
 src/gallium/drivers/radeonsi/si_pipe.h  | 16 +
 src/gallium/drivers/radeonsi/si_shader.h| 12 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 31 +
 5 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 0338b8a123..79b107e96f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -108,6 +108,8 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
program->shader.is_monolithic = true;
program->uses_grid_size = sel.info.uses_grid_size;
program->uses_block_size = sel.info.uses_block_size;
+   program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
+   program->uses_bindless_images = sel.info.uses_bindless_images;
 
if (si_shader_create(program->screen, tm, >shader, debug)) {
program->shader.compilation_failed = true;
diff --git a/src/gallium/drivers/radeonsi/si_compute.h 
b/src/gallium/drivers/radeonsi/si_compute.h
index 764d708c4f..3cf1538267 100644
--- a/src/gallium/drivers/radeonsi/si_compute.h
+++ b/src/gallium/drivers/radeonsi/si_compute.h
@@ -49,6 +49,20 @@ struct si_compute {
unsigned variable_group_size : 1;
unsigned uses_grid_size:1;
unsigned uses_block_size:1;
+   unsigned uses_bindless_samplers:1;
+   unsigned uses_bindless_images:1;
 };
 
+static inline bool
+si_compute_uses_bindless_samplers(struct si_context *sctx)
+{
+   return sctx->cs_shader_state.program->uses_bindless_samplers;
+}
+
+static inline bool
+si_compute_uses_bindless_images(struct si_context *sctx)
+{
+   return sctx->cs_shader_state.program->uses_bindless_images;
+}
+
 #endif /* SI_COMPUTE_H */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 252cec3b91..b87fa856cd 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -428,6 +428,10 @@ struct si_context {
/* Resident bindless handles */
struct util_dynarrayresident_tex_handles;
struct util_dynarrayresident_img_handles;
+
+   /* Bindless state */
+   booluses_bindless_samplers;
+   booluses_bindless_images;
 };
 
 /* cik_sdma.c */
@@ -548,6 +552,18 @@ static inline struct tgsi_shader_info 
*si_get_vs_info(struct si_context *sctx)
return vs->cso ? >cso->info : NULL;
 }
 
+static inline bool
+si_graphics_uses_bindless_samplers(struct si_context *sctx)
+{
+   return sctx->uses_bindless_samplers;
+}
+
+static inline bool
+si_graphics_uses_bindless_images(struct si_context *sctx)
+{
+   return sctx->uses_bindless_images;
+}
+
 static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
 {
if (sctx->gs_shader.cso)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 7c04b7e253..4ebb745cb6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -629,4 +629,16 @@ si_get_main_shader_part(struct si_shader_selector *sel,
return >main_shader_part;
 }
 
+static inline bool
+si_shader_uses_bindless_samplers(struct si_shader_selector *selector)
+{
+   return selector ? selector->info.uses_bindless_samplers : false;
+}
+
+static inline bool
+si_shader_uses_bindless_images(struct si_shader_selector *selector)
+{
+   return selector ? selector->info.uses_bindless_images : false;
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 677a6de88c..776dd0b67a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2200,6 +2200,23 @@ static void si_update_clip_regs(struct si_context *sctx,
si_mark_atom_dirty(sctx, >clip_regs);
 }
 
+static void si_bind_shader_common(struct si_context *sctx)
+{
+   sctx->uses_bindless_samplers =
+   si_shader_uses_bindless_samplers(sctx->vs_shader.cso)  ||
+   si_shader_uses_bindless_samplers(sctx->gs_shader.cso)  ||
+   si_shader_uses_bindless_samplers(sctx->ps_shader.cso)  ||
+   si_shader_uses_bindless_samplers(sctx->tcs_shader.cso) ||
+   si_shader_uses_bindless_samplers(sctx->tes_shader.cso);
+   sctx->uses_bindless_images =
+  

[Mesa-dev] [PATCH v3 55/63] radeonsi: decompress DCC for resident textures/images

2017-06-09 Thread Samuel Pitoiset
Analogous to bound textures/images. We should also update the
resident descriptors and disable COMPRESSION_EN for avoiding
useless DCC fetches, but I postpone this optimization for a
separate series.

v3: - remove use of si_update_check_render_feedback()
v2: - store pipe_sampler_view instead of si_sampler_view

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_blit.c| 62 +++
 src/gallium/drivers/radeonsi/si_descriptors.c | 21 +
 2 files changed, 83 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 74bc2e9a51..2740d040df 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -631,6 +631,64 @@ static void si_check_render_feedback_images(struct 
si_context *sctx,
}
 }
 
+static void si_check_render_feedback_resident_textures(struct si_context *sctx)
+{
+   unsigned num_resident_tex_handles;
+   unsigned i;
+
+   num_resident_tex_handles = sctx->resident_tex_handles.size /
+  sizeof(struct si_texture_handle *);
+
+   for (i = 0; i < num_resident_tex_handles; i++) {
+   struct si_texture_handle *tex_handle =
+   *util_dynarray_element(>resident_tex_handles,
+  struct si_texture_handle *, i);
+   struct pipe_sampler_view *view;
+   struct r600_texture *tex;
+
+   view = tex_handle->view;
+   if (view->texture->target == PIPE_BUFFER)
+   continue;
+
+   tex = (struct r600_texture *)view->texture;
+
+   si_check_render_feedback_texture(sctx, tex,
+view->u.tex.first_level,
+view->u.tex.last_level,
+view->u.tex.first_layer,
+view->u.tex.last_layer);
+   }
+}
+
+static void si_check_render_feedback_resident_images(struct si_context *sctx)
+{
+   unsigned num_resident_img_handles;
+   unsigned i;
+
+   num_resident_img_handles = sctx->resident_img_handles.size /
+  sizeof(struct si_image_handle *);
+
+   for (i = 0; i < num_resident_img_handles; i++) {
+   struct si_image_handle *img_handle =
+   *util_dynarray_element(>resident_img_handles,
+  struct si_image_handle *, i);
+   struct pipe_image_view *view;
+   struct r600_texture *tex;
+
+   view = _handle->view;
+   if (view->resource->target == PIPE_BUFFER)
+   continue;
+
+   tex = (struct r600_texture *)view->resource;
+
+   si_check_render_feedback_texture(sctx, tex,
+view->u.tex.level,
+view->u.tex.level,
+view->u.tex.first_layer,
+view->u.tex.last_layer);
+   }
+}
+
 static void si_check_render_feedback(struct si_context *sctx)
 {
 
@@ -641,6 +699,10 @@ static void si_check_render_feedback(struct si_context 
*sctx)
si_check_render_feedback_images(sctx, >images[i]);
si_check_render_feedback_textures(sctx, >samplers[i]);
}
+
+   si_check_render_feedback_resident_images(sctx);
+   si_check_render_feedback_resident_textures(sctx);
+
sctx->need_check_render_feedback = false;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 0cdccdfed1..980448181c 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2238,6 +2238,15 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,
sview = (struct si_sampler_view *)tex_handle->view;
 
if (resident) {
+   if (sview->base.texture->target != PIPE_BUFFER) {
+   struct r600_texture *rtex =
+   (struct r600_texture *)sview->base.texture;
+
+   if (rtex->dcc_offset &&
+   p_atomic_read(>framebuffers_bound))
+   sctx->need_check_render_feedback = true;
+   }
+
/* Add the texture handle to the per-context list. */
util_dynarray_append(>resident_tex_handles,
 struct si_texture_handle *, tex_handle);
@@ -2340,6 +2349,18 @@ static void si_make_image_handle_resident(struct 
pipe_context *ctx,
view = _handle->view;
 
if (resident) {
+

[Mesa-dev] [PATCH v3 39/63] st/mesa: implement ARB_bindless_texture

2017-06-09 Thread Samuel Pitoiset
v2: - remove "texture" parameter from create_texture_handle()

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Nicolai Hähnle  (v1)
Reviewed-by: Marek Olšák  (v2)
---
 src/mesa/state_tracker/st_cb_texture.c | 84 ++
 1 file changed, 84 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 99c59f77a3..e299147cd9 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -2880,6 +2880,82 @@ st_TexParameter(struct gl_context *ctx,
 }
 
 
+static GLuint64
+st_NewTextureHandle(struct gl_context *ctx, struct gl_texture_object *texObj,
+struct gl_sampler_object *sampObj)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_object *stObj = st_texture_object(texObj);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_sampler_view *view;
+   struct pipe_sampler_state sampler;
+
+   if (!st_finalize_texture(ctx, pipe, texObj, 0))
+  return 0;
+
+   st_convert_sampler(st, texObj, sampObj, );
+
+   view = st_get_texture_sampler_view_from_stobj(st, stObj, sampObj, 0);
+
+   return pipe->create_texture_handle(pipe, view, );
+}
+
+
+static void
+st_DeleteTextureHandle(struct gl_context *ctx, GLuint64 handle)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->delete_texture_handle(pipe, handle);
+}
+
+
+static void
+st_MakeTextureHandleResident(struct gl_context *ctx, GLuint64 handle,
+ bool resident)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->make_texture_handle_resident(pipe, handle, resident);
+}
+
+
+static GLuint64
+st_NewImageHandle(struct gl_context *ctx, struct gl_image_unit *imgObj)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_image_view image;
+
+   st_convert_image(st, imgObj, );
+
+   return pipe->create_image_handle(pipe, );
+}
+
+
+static void
+st_DeleteImageHandle(struct gl_context *ctx, GLuint64 handle)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->delete_image_handle(pipe, handle);
+}
+
+
+static void
+st_MakeImageHandleResident(struct gl_context *ctx, GLuint64 handle,
+   GLenum access, bool resident)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   pipe->make_image_handle_resident(pipe, handle, access, resident);
+}
+
+
 void
 st_init_texture_functions(struct dd_function_table *functions)
 {
@@ -2914,4 +2990,12 @@ st_init_texture_functions(struct dd_function_table 
*functions)
functions->ClearTexSubImage = st_ClearTexSubImage;
 
functions->TexParameter = st_TexParameter;
+
+   /* bindless functions */
+   functions->NewTextureHandle = st_NewTextureHandle;
+   functions->DeleteTextureHandle = st_DeleteTextureHandle;
+   functions->MakeTextureHandleResident = st_MakeTextureHandleResident;
+   functions->NewImageHandle = st_NewImageHandle;
+   functions->DeleteImageHandle = st_DeleteImageHandle;
+   functions->MakeImageHandleResident = st_MakeImageHandleResident;
 }
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 51/63] radeonsi: add a slab allocator for bindless descriptors

2017-06-09 Thread Samuel Pitoiset
For each texture/image handles, we need to allocate a new
buffer for the bindless descriptor. But when the number of
buffers added to the current CS becomes high, the overhead
in the winsys (and in the kernel) is important.

To reduce this bottleneck, the idea is to suballocate the
bindless descriptors using a slab similar to the one used
in the winsys.

Currently, a buffer can hold 1024 bindless descriptors but
this limit is arbitrary and could be changed in the future
for some reasons. Once a slab is allocated the "base" buffer
is added to a per-context list.

v3: - use PIPE_USAGE_DEFAULT instead of PIPE_USAGE_IMMUTABLE
- replace util_dynarray_delete by util_dynarray_delete_unordered
v2: - rename si_resident_descriptor to si_bindless_descriptor
- make can_reclaim_slab() returns false, always
- use util_dynarray_*

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 84 +++
 src/gallium/drivers/radeonsi/si_pipe.c| 12 
 src/gallium/drivers/radeonsi/si_pipe.h| 15 +
 src/gallium/drivers/radeonsi/si_state.h   |  8 +++
 4 files changed, 119 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index ec19561fc0..16ffdef95e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2037,6 +2037,90 @@ void si_emit_compute_shader_userdata(struct si_context 
*sctx)
sctx->shader_pointers_dirty &= ~compute_mask;
 }
 
+/* BINDLESS */
+
+struct si_bindless_descriptor_slab
+{
+   struct pb_slab base;
+   struct r600_resource *buffer;
+   struct si_bindless_descriptor *entries;
+};
+
+bool si_bindless_descriptor_can_reclaim_slab(void *priv,
+struct pb_slab_entry *entry)
+{
+   /* Do not allow to reclaim any bindless descriptors for now because the
+* GPU might be using them. This should be improved later on.
+*/
+   return false;
+}
+
+struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
+ unsigned entry_size,
+ unsigned group_index)
+{
+   struct si_context *sctx = priv;
+   struct si_screen *sscreen = sctx->screen;
+   struct si_bindless_descriptor_slab *slab;
+
+   slab = CALLOC_STRUCT(si_bindless_descriptor_slab);
+   if (!slab)
+   return NULL;
+
+   /* Create a buffer in VRAM for 1024 bindless descriptors. */
+   slab->buffer = (struct r600_resource *)
+   pipe_buffer_create(>b.b, 0,
+  PIPE_USAGE_DEFAULT, 64 * 1024);
+   if (!slab->buffer)
+   goto fail;
+
+   slab->base.num_entries = slab->buffer->bo_size / entry_size;
+   slab->base.num_free = slab->base.num_entries;
+   slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
+   if (!slab->entries)
+   goto fail_buffer;
+
+   LIST_INITHEAD(>base.free);
+
+   for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+   struct si_bindless_descriptor *desc = >entries[i];
+
+   desc->entry.slab = >base;
+   desc->entry.group_index = group_index;
+   desc->buffer = slab->buffer;
+   desc->offset = i * entry_size;
+
+   LIST_ADDTAIL(>entry.head, >base.free);
+   }
+
+   /* Add the descriptor to the per-context list. */
+   util_dynarray_append(>bindless_descriptors,
+   struct r600_resource *, slab->buffer);
+
+   return >base;
+
+fail_buffer:
+   r600_resource_reference(>buffer, NULL);
+fail:
+   FREE(slab);
+   return NULL;
+}
+
+void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab)
+{
+   struct si_context *sctx = priv;
+   struct si_bindless_descriptor_slab *slab =
+   (struct si_bindless_descriptor_slab *)pslab;
+
+   /* Remove the descriptor from the per-context list. */
+   util_dynarray_delete_unordered(>bindless_descriptors,
+  struct r600_resource *, slab->buffer);
+
+   r600_resource_reference(>buffer, NULL);
+   FREE(slab->entries);
+   FREE(slab);
+}
+
 /* INIT/DEINIT/UPLOAD */
 
 /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 031e4731bf..14fe9dd6a3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -94,6 +94,9 @@ static void si_destroy_context(struct pipe_context *context)
r600_resource_reference(>last_trace_buf, NULL);
radeon_clear_saved_cs(>last_gfx);
 
+   pb_slabs_deinit(>bindless_descriptor_slabs);
+ 

[Mesa-dev] [PATCH v3 50/63] radeonsi: add si_set_shader_image_desc() helper

2017-06-09 Thread Samuel Pitoiset
To share some common code between bound and bindless images.

v3: - use vi_dcc_enabled() instead of dcc_offset for images

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Marek Olšák  (v2)
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 79 ---
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index a6e148d84a..ec19561fc0 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -741,28 +741,16 @@ si_mark_image_range_valid(const struct pipe_image_view 
*view)
   view->u.buf.offset + view->u.buf.size);
 }
 
-static void si_set_shader_image(struct si_context *ctx,
-   unsigned shader,
-   unsigned slot, const struct pipe_image_view 
*view,
-   bool skip_decompress)
+static void si_set_shader_image_desc(struct si_context *ctx,
+const struct pipe_image_view *view,
+bool skip_decompress,
+uint32_t *desc)
 {
struct si_screen *screen = ctx->screen;
-   struct si_images_info *images = >images[shader];
-   struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, 
shader);
struct r600_resource *res;
-   unsigned desc_slot = si_get_image_slot(slot);
-   uint32_t *desc = descs->list + desc_slot * 8;
-
-   if (!view || !view->resource) {
-   si_disable_shader_image(ctx, shader, slot);
-   return;
-   }
 
res = (struct r600_resource *)view->resource;
 
-   if (>views[slot] != view)
-   util_copy_image_view(>views[slot], view);
-
if (res->b.b.target == PIPE_BUFFER) {
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
si_mark_image_range_valid(view);
@@ -772,9 +760,6 @@ static void si_set_shader_image(struct si_context *ctx,
  view->u.buf.offset,
  view->u.buf.size, desc);
si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
-
-   images->needs_color_decompress_mask &= ~(1 << slot);
-   res->bind_history |= PIPE_BIND_SHADER_IMAGE;
} else {
static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
struct r600_texture *tex = (struct r600_texture *)res;
@@ -792,22 +777,10 @@ static void si_set_shader_image(struct si_context *ctx,
 * The decompression is relatively cheap if the surface
 * has been decompressed already.
 */
-   if (r600_texture_disable_dcc(>b, tex))
-   uses_dcc = false;
-   else
+   if (!r600_texture_disable_dcc(>b, tex))
ctx->b.decompress_dcc(>b.b, tex);
}
 
-   if (color_needs_decompression(tex)) {
-   images->needs_color_decompress_mask |= 1 << slot;
-   } else {
-   images->needs_color_decompress_mask &= ~(1 << slot);
-   }
-
-   if (uses_dcc &&
-   p_atomic_read(>framebuffers_bound))
-   ctx->need_check_render_feedback = true;
-
if (ctx->b.chip_class >= GFX9) {
/* Always set the base address. The swizzle modes don't
 * allow setting mipmap level offsets as the base.
@@ -843,6 +816,48 @@ static void si_set_shader_image(struct si_context *ctx,
   
util_format_get_blockwidth(view->format),
   false, desc);
}
+}
+
+static void si_set_shader_image(struct si_context *ctx,
+   unsigned shader,
+   unsigned slot, const struct pipe_image_view 
*view,
+   bool skip_decompress)
+{
+   struct si_images_info *images = >images[shader];
+   struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, 
shader);
+   struct r600_resource *res;
+   unsigned desc_slot = si_get_image_slot(slot);
+   uint32_t *desc = descs->list + desc_slot * 8;
+
+   if (!view || !view->resource) {
+   si_disable_shader_image(ctx, shader, slot);
+   return;
+   }
+
+   res = (struct r600_resource *)view->resource;
+
+   if (>views[slot] != view)
+   util_copy_image_view(>views[slot], view);
+
+   si_set_shader_image_desc(ctx, view, skip_decompress, desc);
+
+   if (res->b.b.target == PIPE_BUFFER) {
+   

[Mesa-dev] [PATCH v3 52/63] radeonsi: implement ARB_bindless_texture

2017-06-09 Thread Samuel Pitoiset
This implements the Gallium interface. Decompression of resident
textures/images will follow in the next patches.

v3: - do not unmap bindless descriptors
- remove unnecessary util_copy_image_view()
- use READON_USAGE_READWRITE because of the WRITE_DATA packet
- replace util_dynarray_delete by util_dynarray_delete_unordered
- fix typo
v2: - fix a memleak related to util_copy_image_view()
- remove "texture" parameter from create_texture_handle()
- store pipe_sampler_view instead of si_sampler_view
- make use pipe_sampler_view_reference() to fix a refcount issue
- rename si_resident_descriptor to si_bindless_descriptor
- use util_dynarray_*
- add more comments

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 249 ++
 src/gallium/drivers/radeonsi/si_pipe.c|  15 ++
 src/gallium/drivers/radeonsi/si_pipe.h|  20 +++
 3 files changed, 284 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 16ffdef95e..6b284f193e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -60,6 +60,7 @@
 #include "sid.h"
 #include "gfx9d.h"
 
+#include "util/hash_table.h"
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
@@ -2121,6 +2122,248 @@ void si_bindless_descriptor_slab_free(void *priv, 
struct pb_slab *pslab)
FREE(slab);
 }
 
+static struct si_bindless_descriptor *
+si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
+ unsigned size)
+{
+   struct si_screen *sscreen = sctx->screen;
+   struct si_bindless_descriptor *desc;
+   struct pb_slab_entry *entry;
+   void *ptr;
+
+   /* Sub-allocate the bindless descriptor from a slab to avoid dealing
+* with a ton of buffers and for reducing the winsys overhead.
+*/
+   entry = pb_slab_alloc(>bindless_descriptor_slabs, 64, 0);
+   if (!entry)
+   return NULL;
+
+   desc = NULL;
+   desc = container_of(entry, desc, entry);
+
+   /* Upload the descriptor directly in VRAM. Because the slabs are
+* currently never reclaimed, we don't need to synchronize the
+* operation.
+*/
+   ptr = sscreen->b.ws->buffer_map(desc->buffer->buf, NULL,
+   PIPE_TRANSFER_WRITE |
+   PIPE_TRANSFER_UNSYNCHRONIZED);
+   util_memcpy_cpu_to_le32(ptr + desc->offset, desc_list, size);
+
+   return desc;
+}
+
+static uint64_t si_create_texture_handle(struct pipe_context *ctx,
+struct pipe_sampler_view *view,
+const struct pipe_sampler_state *state)
+{
+   struct si_sampler_view *sview = (struct si_sampler_view *)view;
+   struct si_context *sctx = (struct si_context *)ctx;
+   struct si_texture_handle *tex_handle;
+   struct si_sampler_state *sstate;
+   uint32_t desc_list[16];
+   uint64_t handle;
+
+   tex_handle = CALLOC_STRUCT(si_texture_handle);
+   if (!tex_handle)
+   return 0;
+
+   memset(desc_list, 0, sizeof(desc_list));
+   si_init_descriptor_list(_list[0], 16, 1, null_texture_descriptor);
+
+   sstate = ctx->create_sampler_state(ctx, state);
+   if (!sstate) {
+   FREE(tex_handle);
+   return 0;
+   }
+
+   si_set_sampler_view_desc(sctx, sview, sstate, _list[0]);
+   ctx->delete_sampler_state(ctx, sstate);
+
+   tex_handle->desc = si_create_bindless_descriptor(sctx, desc_list,
+sizeof(desc_list));
+   if (!tex_handle->desc) {
+   FREE(tex_handle);
+   return 0;
+   }
+
+   handle = tex_handle->desc->buffer->gpu_address +
+tex_handle->desc->offset;
+
+   if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
+tex_handle)) {
+   pb_slab_free(>bindless_descriptor_slabs,
+_handle->desc->entry);
+   FREE(tex_handle);
+   return 0;
+   }
+
+   pipe_sampler_view_reference(_handle->view, view);
+
+   return handle;
+}
+
+static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
+{
+   struct si_context *sctx = (struct si_context *)ctx;
+   struct si_texture_handle *tex_handle;
+   struct hash_entry *entry;
+
+   entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
+   if (!entry)
+   return;
+
+   tex_handle = (struct si_texture_handle *)entry->data;
+
+   pipe_sampler_view_reference(_handle->view, NULL);
+   _mesa_hash_table_remove(sctx->tex_handles, entry);
+   

[Mesa-dev] [PATCH v3 42/63] st/mesa: add st_convert_image_from_unit() helper

2017-06-09 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
---
 src/mesa/state_tracker/st_atom_image.c | 33 ++---
 src/mesa/state_tracker/st_texture.h|  5 +
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_image.c 
b/src/mesa/state_tracker/st_atom_image.c
index 381eca191a..5b914637a2 100644
--- a/src/mesa/state_tracker/st_atom_image.c
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -102,6 +102,27 @@ st_convert_image(const struct st_context *st, const struct 
gl_image_unit *u,
}
 }
 
+/**
+ * Get a pipe_image_view object from an image unit.
+ */
+void
+st_convert_image_from_unit(const struct st_context *st,
+   struct pipe_image_view *img,
+   GLuint imgUnit)
+{
+   struct gl_image_unit *u = >ctx->ImageUnits[imgUnit];
+   struct st_texture_object *stObj = st_texture_object(u->TexObj);
+
+   if (!_mesa_is_image_unit_valid(st->ctx, u) ||
+   !st_finalize_texture(st->ctx, st->pipe, u->TexObj, 0) ||
+   !stObj->pt) {
+  memset(img, 0, sizeof(*img));
+  return;
+   }
+
+   st_convert_image(st, u, img);
+}
+
 static void
 st_bind_images(struct st_context *st, struct gl_program *prog,
enum pipe_shader_type shader_type)
@@ -116,19 +137,9 @@ st_bind_images(struct st_context *st, struct gl_program 
*prog,
c = >ctx->Const.Program[prog->info.stage];
 
for (i = 0; i < prog->info.num_images; i++) {
-  struct gl_image_unit *u =
- >ctx->ImageUnits[prog->sh.ImageUnits[i]];
-  struct st_texture_object *stObj = st_texture_object(u->TexObj);
   struct pipe_image_view *img = [i];
 
-  if (!_mesa_is_image_unit_valid(st->ctx, u) ||
-  !st_finalize_texture(st->ctx, st->pipe, u->TexObj, 0) ||
-  !stObj->pt) {
- memset(img, 0, sizeof(*img));
- continue;
-  }
-
-  st_convert_image(st, u, img);
+  st_convert_image_from_unit(st, img, prog->sh.ImageUnits[i]);
}
cso_set_shader_images(st->cso_context, shader_type, 0,
  prog->info.num_images, images);
diff --git a/src/mesa/state_tracker/st_texture.h 
b/src/mesa/state_tracker/st_texture.h
index e73de2f1d3..7f8a0cb841 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -259,6 +259,11 @@ st_convert_image(const struct st_context *st, const struct 
gl_image_unit *u,
  struct pipe_image_view *img);
 
 void
+st_convert_image_from_unit(const struct st_context *st,
+   struct pipe_image_view *img,
+   GLuint imgUnit);
+
+void
 st_convert_sampler(const struct st_context *st,
const struct gl_texture_object *texobj,
const struct gl_sampler_object *msamp,
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >