Re: [Mesa-dev] [PATCH] Revert "st_glsl_to_tgsi: rewrite rename registers to use array fully."

2017-07-31 Thread Gert Wollny
Am Dienstag, den 01.08.2017, 09:32 +1000 schrieb Dave Airlie:
> From: Dave Airlie 
> 
> This reverts commit 3008161d28e38336ba39aba4769a2deaf9732f55,
> which caused a regression for VMWare.
> 
> The initial code had some recursion in it, that I removed by accident
> trying to add back the recursion broke lots of things, take the high
> road and revert for now.

Since I've prepared a patch set that improves the register merging [1]
that is awaiting another review I'm wondering which part broke the
recursion? 

[1] uses Davids rename_temp_registers method, but changes the initial
life-time estimation, and should improve the all-over situation for
drivers with limited registers (it does for R600g). 
It would be nice to see if it also helps with the VMWare driver and
avoids the regression. 

(The latest version of the patch set does not apply to master though,
an rebased version can be found at [2]). 

many thanks, 
Gert 

[1] https://patchwork.freedesktop.org/series/25594/
[2] https://github.com/gerddie/mesa/tree/regrename-v7


 






> 
> Fixes: 3008161d (st_glsl_to_tgsi: rewrite rename registers to use
> array fully.)
> Reviewed-by: Brian Paul 
> Tested-by: Brian Paul 
> Signed-off-by: Dave Airlie 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 55 --
> 
>  1 file changed, 29 insertions(+), 26 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 3983fe7..d496fff 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -399,7 +399,7 @@ find_array_type(struct inout_decl *decls,
> unsigned count, unsigned array_id)
>  }
>  
>  struct rename_reg_pair {
> -   bool valid;
> +   int old_reg;
> int new_reg;
>  };
>  
> @@ -568,7 +568,7 @@ public:
>  
> void simplify_cmp(void);
>  
> -   void rename_temp_registers(struct rename_reg_pair *renames);
> +   void rename_temp_registers(int num_renames, struct
> rename_reg_pair *renames);
> void get_first_temp_read(int *first_reads);
> void get_first_temp_write(int *first_writes);
> void get_last_temp_read_first_temp_write(int *last_reads, int
> *first_writes);
> @@ -4835,37 +4835,36 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
>  
>  /* Replaces all references to a temporary register index with
> another index. */
>  void
> -glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair
> *renames)
> +glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct
> rename_reg_pair *renames)
>  {
> foreach_in_list(glsl_to_tgsi_instruction, inst, 
> >instructions) {
>    unsigned j;
> +  int k;
>    for (j = 0; j < num_inst_src_regs(inst); j++) {
> - if (inst->src[j].file == PROGRAM_TEMPORARY) {
> -int old_idx = inst->src[j].index;
> -if (renames[old_idx].valid)
> -   inst->src[j].index = renames[old_idx].new_reg;
> - }
> + if (inst->src[j].file == PROGRAM_TEMPORARY)
> +for (k = 0; k < num_renames; k++)
> +   if (inst->src[j].index == renames[k].old_reg)
> +  inst->src[j].index = renames[k].new_reg;
>    }
>  
>    for (j = 0; j < inst->tex_offset_num_offset; j++) {
> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
> -int old_idx = inst->tex_offsets[j].index;
> -if (renames[old_idx].valid)
> -   inst->tex_offsets[j].index =
> renames[old_idx].new_reg;
> - }
> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
> +for (k = 0; k < num_renames; k++)
> +   if (inst->tex_offsets[j].index == renames[k].old_reg)
> +  inst->tex_offsets[j].index = renames[k].new_reg;
>    }
>  
>    if (inst->resource.file == PROGRAM_TEMPORARY) {
> - int old_idx = inst->resource.index;
> - if (renames[old_idx].valid)
> -inst->resource.index = renames[old_idx].new_reg;
> + for (k = 0; k < num_renames; k++)
> +if (inst->resource.index == renames[k].old_reg)
> +   inst->resource.index = renames[k].new_reg;
>    }
>  
>    for (j = 0; j < num_inst_dst_regs(inst); j++) {
> - if (inst->dst[j].file == PROGRAM_TEMPORARY) {
> -int old_idx = inst->dst[j].index;
> -if (renames[old_idx].valid)
> -   inst->dst[j].index = renames[old_idx].new_reg;}
> + if (inst->dst[j].file == PROGRAM_TEMPORARY)
> + for (k = 0; k < num_renames; k++)
> +if (inst->dst[j].index == renames[k].old_reg)
> +   inst->dst[j].index = renames[k].new_reg;
>    }
> }
>  }
> @@ -5446,6 +5445,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
> int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
> struct rename_reg_pair *renames = 

[Mesa-dev] [PATCH] egl: Add swrast support to surfaceless platform

2017-07-31 Thread Akihiko Odaki
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101397
---
 src/egl/drivers/dri2/platform_surfaceless.c | 114 ++--
 src/gallium/state_trackers/dri/drisw.c  |  45 ++-
 2 files changed, 148 insertions(+), 11 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_surfaceless.c 
b/src/egl/drivers/dri2/platform_surfaceless.c
index 1091b4febd..5487c89816 100644
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -133,9 +133,16 @@ dri2_surfaceless_create_surface(_EGLDriver *drv, 
_EGLDisplay *disp, EGLint type,
if (!config)
   goto cleanup_surface;
 
-   dri2_surf->dri_drawable =
-  dri2_dpy->image_driver->createNewDrawable(dri2_dpy->dri_screen, config,
-dri2_surf);
+   if (dri2_dpy->image_driver) {
+  dri2_surf->dri_drawable =
+ dri2_dpy->image_driver->createNewDrawable(dri2_dpy->dri_screen, 
config,
+   dri2_surf);
+   } else {
+  assert(dri2_dpy->swrast);
+  dri2_surf->dri_drawable =
+ dri2_dpy->swrast->createNewDrawable(dri2_dpy->dri_screen, config,
+ dri2_surf);
+   }
if (dri2_surf->dri_drawable == NULL) {
   _eglError(EGL_BAD_ALLOC, "image->createNewDrawable");
   goto cleanup_surface;
@@ -229,7 +236,24 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, 
_EGLDisplay *dpy)
return (config_count != 0);
 }
 
-static const struct dri2_egl_display_vtbl dri2_surfaceless_display_vtbl = {
+static const struct dri2_egl_display_vtbl dri2_surfaceless_swrast_display_vtbl 
= {
+   .create_pixmap_surface = dri2_fallback_create_pixmap_surface,
+   .create_pbuffer_surface = dri2_surfaceless_create_pbuffer_surface,
+   .destroy_surface = surfaceless_destroy_surface,
+   .create_image = dri2_create_image_khr,
+   .swap_interval = dri2_fallback_swap_interval,
+   .swap_buffers = surfaceless_swap_buffers,
+   .swap_buffers_region = dri2_fallback_swap_buffers_region,
+   .set_damage_region = dri2_fallback_set_damage_region,
+   .post_sub_buffer = dri2_fallback_post_sub_buffer,
+   .copy_buffers = dri2_fallback_copy_buffers,
+   .query_buffer_age = dri2_fallback_query_buffer_age,
+   .create_wayland_buffer_from_image = 
dri2_fallback_create_wayland_buffer_from_image,
+   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
+};
+
+static const struct dri2_egl_display_vtbl dri2_surfaceless_dri3_display_vtbl = 
{
.create_pixmap_surface = dri2_fallback_create_pixmap_surface,
.create_pbuffer_surface = dri2_surfaceless_create_pbuffer_surface,
.destroy_surface = surfaceless_destroy_surface,
@@ -252,6 +276,66 @@ surfaceless_flush_front_buffer(__DRIdrawable *driDrawable, 
void *loaderPrivate)
 {
 }
 
+static const __DRIextension *swrast_loader_extensions[] = {
+   NULL,
+};
+
+static EGLBoolean
+dri2_initialize_surfaceless_swrast(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy;
+   const char* err;
+
+   loader_set_logger(_eglLog);
+
+   dri2_dpy = calloc(1, sizeof *dri2_dpy);
+   if (!dri2_dpy)
+  return _eglError(EGL_BAD_ALLOC, "eglInitialize");
+
+   dri2_dpy->fd = -1;
+   disp->DriverData = (void *) dri2_dpy;
+
+   /*
+* Every hardware driver_name is set using strdup. Doing the same in
+* here will allow is to simply free the memory at dri2_terminate().
+*/
+   dri2_dpy->driver_name = strdup("swrast");
+   if (!dri2_load_driver_swrast(disp)) {
+  err = "DRI2: failed to load driver";
+  goto cleanup;
+   }
+
+   dri2_dpy->loader_extensions = swrast_loader_extensions;
+
+   if (!dri2_create_screen(disp)) {
+  err = "DRI2: failed to create screen";
+  goto cleanup;
+   }
+
+   if (!dri2_setup_extensions(disp)) {
+  err = "DRI2: failed to find required DRI extensions";
+  goto cleanup;
+   }
+
+   dri2_setup_screen(disp);
+
+   if (!surfaceless_add_configs_for_visuals(drv, disp)) {
+  err = "DRI2: failed to add configs";
+  goto cleanup;
+   }
+
+   /* Fill vtbl last to prevent accidentally calling virtual function during
+* initialization.
+*/
+   dri2_dpy->vtbl = _surfaceless_swrast_display_vtbl;
+
+   return EGL_TRUE;
+
+ cleanup:
+   dri2_display_destroy(disp);
+   return _eglError(EGL_NOT_INITIALIZED, err);
+}
+
 static const __DRIimageLoaderExtension image_loader_extension = {
.base = { __DRI_IMAGE_LOADER, 1 },
.getBuffers   = surfaceless_image_get_buffers,
@@ -267,8 +351,8 @@ static const __DRIextension *image_loader_extensions[] = {
NULL,
 };
 
-EGLBoolean
-dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp)
+static EGLBoolean
+dri2_initialize_surfaceless_dri3(_EGLDriver *drv, _EGLDisplay *disp)
 {
struct dri2_egl_display *dri2_dpy;
const char* err;
@@ -336,7 +420,7 @@ dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay 
*disp)
/* Fill 

Re: [Mesa-dev] [PATCH 10/10] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-07-31 Thread Connor Abbott
I wrote some crucible tests, but it seems like I never got around to
sending them out. I can put a branch up tomorrow. They're dependent on
this series that never got any attention, though:
https://lists.freedesktop.org/archives/mesa-dev/2017-June/158480.html

On Mon, Jul 31, 2017 at 10:03 PM, Jason Ekstrand  wrote:
> Out of curiosity, are there any tests for this anywhere?
>
> --Jason
>
> On Mon, Jul 31, 2017 at 7:24 PM, Connor Abbott 
> wrote:
>>
>> From: Connor Abbott 
>>
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 49
>> +
>>  src/amd/vulkan/radv_device.c|  8 +++
>>  src/amd/vulkan/radv_pipeline.c  |  2 ++
>>  3 files changed, 59 insertions(+)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index f756b9a..3dd1cbb 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -4142,6 +4142,55 @@ static void visit_intrinsic(struct ac_nir_context
>> *ctx,
>> case nir_intrinsic_load_patch_vertices_in:
>> result = LLVMConstInt(ctx->ac.i32,
>> ctx->nctx->options->key.tcs.input_vertices, false);
>> break;
>> +   case nir_intrinsic_ballot:
>> +   result = ac_build_ballot(>ac, get_src(ctx,
>> instr->src[0]));
>> +   break;
>> +   case nir_intrinsic_read_first_invocation: {
>> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +   ac_build_optimization_barrier(>ac, );
>> +   LLVMValueRef srcs[1] = { src0 };
>> +   result = ac_build_intrinsic(>ac,
>> "llvm.amdgcn.readfirstlane",
>> +   ctx->ac.i32, srcs, 1,
>> +   AC_FUNC_ATTR_NOUNWIND |
>> +   AC_FUNC_ATTR_READNONE |
>> +   AC_FUNC_ATTR_CONVERGENT);
>> +   break;
>> +}
>> +   case nir_intrinsic_read_invocation: {
>> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
>> +   ac_build_optimization_barrier(>ac, );
>> +   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1])
>> };
>> +   result = ac_build_intrinsic(>ac,
>> "llvm.amdgcn.readlane",
>> +   ctx->ac.i32, srcs, 2,
>> +   AC_FUNC_ATTR_NOUNWIND |
>> +   AC_FUNC_ATTR_READNONE |
>> +   AC_FUNC_ATTR_CONVERGENT);
>> +   break;
>> +}
>> +   case nir_intrinsic_load_subgroup_invocation:
>> +   result = ac_get_thread_id(>ac);
>> +   break;
>> +   case nir_intrinsic_load_subgroup_size:
>> +   result = LLVMConstInt(ctx->ac.i32, 64, 0);
>> +   break;
>> +   case nir_intrinsic_vote_all:
>> +   result = LLVMBuildSExt(ctx->ac.builder,
>> +  ac_build_vote_all(>ac,
>> +get_src(ctx,
>> instr->src[0])),
>> +  ctx->ac.i32, "");
>> +   break;
>> +   case nir_intrinsic_vote_any:
>> +   result = LLVMBuildSExt(ctx->ac.builder,
>> +  ac_build_vote_any(>ac,
>> +get_src(ctx,
>> instr->src[0])),
>> +  ctx->ac.i32, "");
>> +   break;
>> +   case nir_intrinsic_vote_eq:
>> +   result = LLVMBuildSExt(ctx->ac.builder,
>> +  ac_build_vote_eq(>ac,
>> +get_src(ctx,
>> instr->src[0])),
>> +  ctx->ac.i32, "");
>> +   break;
>> default:
>> fprintf(stderr, "Unknown intrinsic: ");
>> nir_print_instr(>instr, stderr);
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index eb25872..e8fe086 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -165,6 +165,14 @@ static const VkExtensionProperties
>> common_device_extensions[] = {
>> .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
>> .specVersion = 1,
>> },
>> +   {
>> +   .extensionName =
>> VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
>> +   .specVersion = 1,
>> +   },
>> +   {
>> +   .extensionName =
>> VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
>> +   .specVersion = 1,
>> +   },
>>  };
>>  static const VkExtensionProperties ext_sema_device_extensions[] = {
>> {
>> diff --git a/src/amd/vulkan/radv_pipeline.c
>> 

Re: [Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-07-31 Thread Dave Airlie
On 1 August 2017 at 14:14, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.

This also fixes a bunch of multisample interpolation tests on
radv on CIK.

Not 100% sure why.
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-07-31 Thread Jason Ekstrand
Out of curiosity, are there any tests for this anywhere?

--Jason

On Mon, Jul 31, 2017 at 7:24 PM, Connor Abbott 
wrote:

> From: Connor Abbott 
>
> ---
>  src/amd/common/ac_nir_to_llvm.c | 49 ++
> +++
>  src/amd/vulkan/radv_device.c|  8 +++
>  src/amd/vulkan/radv_pipeline.c  |  2 ++
>  3 files changed, 59 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_
> llvm.c
> index f756b9a..3dd1cbb 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4142,6 +4142,55 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_load_patch_vertices_in:
> result = LLVMConstInt(ctx->ac.i32,
> ctx->nctx->options->key.tcs.input_vertices, false);
> break;
> +   case nir_intrinsic_ballot:
> +   result = ac_build_ballot(>ac, get_src(ctx,
> instr->src[0]));
> +   break;
> +   case nir_intrinsic_read_first_invocation: {
> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> +   ac_build_optimization_barrier(>ac, );
> +   LLVMValueRef srcs[1] = { src0 };
> +   result = ac_build_intrinsic(>ac,
> "llvm.amdgcn.readfirstlane",
> +   ctx->ac.i32, srcs, 1,
> +   AC_FUNC_ATTR_NOUNWIND |
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> +   break;
> +}
> +   case nir_intrinsic_read_invocation: {
> +   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
> +   ac_build_optimization_barrier(>ac, );
> +   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1])
> };
> +   result = ac_build_intrinsic(>ac,
> "llvm.amdgcn.readlane",
> +   ctx->ac.i32, srcs, 2,
> +   AC_FUNC_ATTR_NOUNWIND |
> +   AC_FUNC_ATTR_READNONE |
> +   AC_FUNC_ATTR_CONVERGENT);
> +   break;
> +}
> +   case nir_intrinsic_load_subgroup_invocation:
> +   result = ac_get_thread_id(>ac);
> +   break;
> +   case nir_intrinsic_load_subgroup_size:
> +   result = LLVMConstInt(ctx->ac.i32, 64, 0);
> +   break;
> +   case nir_intrinsic_vote_all:
> +   result = LLVMBuildSExt(ctx->ac.builder,
> +  ac_build_vote_all(>ac,
> +get_src(ctx,
> instr->src[0])),
> +  ctx->ac.i32, "");
> +   break;
> +   case nir_intrinsic_vote_any:
> +   result = LLVMBuildSExt(ctx->ac.builder,
> +  ac_build_vote_any(>ac,
> +get_src(ctx,
> instr->src[0])),
> +  ctx->ac.i32, "");
> +   break;
> +   case nir_intrinsic_vote_eq:
> +   result = LLVMBuildSExt(ctx->ac.builder,
> +  ac_build_vote_eq(>ac,
> +get_src(ctx,
> instr->src[0])),
> +  ctx->ac.i32, "");
> +   break;
> default:
> fprintf(stderr, "Unknown intrinsic: ");
> nir_print_instr(>instr, stderr);
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index eb25872..e8fe086 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -165,6 +165,14 @@ static const VkExtensionProperties
> common_device_extensions[] = {
> .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
> .specVersion = 1,
> },
> +   {
> +   .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_
> EXTENSION_NAME,
> +   .specVersion = 1,
> +   },
> +   {
> +   .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_
> EXTENSION_NAME,
> +   .specVersion = 1,
> +   },
>  };
>  static const VkExtensionProperties ext_sema_device_extensions[] = {
> {
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_
> pipeline.c
> index 2fc64e8..c354807 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -234,6 +234,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
> .tessellation = true,
> .int64 = true,
> .variable_pointers = true,
> +   .shader_ballot = true,
> +   .shader_group_vote = true,
> 

Re: [Mesa-dev] [PATCH 1/2] android: link libmesa_intel_common with zlib and expat

2017-07-31 Thread Tapani Pälli



On 08/01/2017 12:17 AM, Rob Herring wrote:

On Mon, Jul 31, 2017 at 3:45 PM, Emil Velikov  wrote:

On 31 July 2017 at 09:32, Tapani Pälli  wrote:

Makes it possible to build Mesa on Android with -DDEBUG with
the next patch that reverts 4f695731.

Signed-off-by: Tapani Pälli 
---
  src/intel/Android.common.mk | 5 +
  1 file changed, 5 insertions(+)

diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk
index f056f0a..12cea6e 100644
--- a/src/intel/Android.common.mk
+++ b/src/intel/Android.common.mk
@@ -32,10 +32,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
  LOCAL_SRC_FILES := $(COMMON_FILES)

  LOCAL_C_INCLUDES := \
+   external/zlib \

Ideally this will be part of zlib's LOCAL_EXPORT_C_INCLUDE_DIRS, at
some point in the future.


FYI, it already is and has been since M. So it depends whether you
care about L support. IMO, with O coming out soon, it's time to drop
L. 3 years/versions of Android support in master seems like plenty to
me.



Yeah, this was to not break L support like with commit 
bfc0c23843008fd510afa263ebe371bef3346445. Android-IA is fine with 
dropping L support, not sure if Android-x86 wants it?


// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

2017-07-31 Thread Dave Airlie
From: Dave Airlie 

This looks like it's supported since llvm 3.9 at least,
so switch over radeonsi and radv to using it, -pro also
uses this. We can now drop creating lds for these operations
as the ds_swizzle operation doesn't actually write to lds at all.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_build.c   | 57 +++-
 src/amd/common/ac_llvm_build.h   |  1 -
 src/amd/common/ac_nir_to_llvm.c  |  9 +
 src/gallium/drivers/radeonsi/si_shader.c | 16 +
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9b939c1..a38aad6 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  bool has_ds_bpermute,
  uint32_t mask,
  int idx,
- LLVMValueRef lds,
  LLVMValueRef val)
 {
-   LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+   LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
 
-   thread_id = ac_get_thread_id(ctx);
+   if (has_ds_bpermute) {
+   LLVMValueRef thread_id, tl_tid, trbl_tid;
+   thread_id = ac_get_thread_id(ctx);
 
-   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
+   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+ LLVMConstInt(ctx->i32, mask, false), "");
 
-   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-   LLVMConstInt(ctx->i32, idx, false), "");
+   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+   LLVMConstInt(ctx->i32, idx, false), "");
 
-   if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
   LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val;
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  AC_FUNC_ATTR_READNONE |
  AC_FUNC_ATTR_CONVERGENT);
} else {
-   LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+   uint32_t masks[2];
+
+   switch (mask) {
+   case AC_TID_MASK_TOP_LEFT:
+   masks[0] = 0x8000;
+   if (idx == 1)
+   masks[1] = 0x8055;
+   else
+   masks[1] = 0x80aa;
+
+   break;
+   case AC_TID_MASK_TOP:
+   masks[0] = 0x8044;
+   masks[1] = 0x80ee;
+   break;
+   case AC_TID_MASK_LEFT:
+   masks[0] = 0x80a0;
+   masks[1] = 0x80f5;
+   break;
+   }
 
-   store_ptr = ac_build_gep0(ctx, lds, thread_id);
-   load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
-   load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+   args[0] = val;
+   args[1] = LLVMConstInt(ctx->i32, masks[0], false);
 
-   LLVMBuildStore(ctx->builder, val, store_ptr);
-   tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-   trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+   tl = ac_build_intrinsic(ctx,
+   "llvm.amdgcn.ds.swizzle", ctx->i32,
+   args, 2,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+
+   args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+   trbl = ac_build_intrinsic(ctx,
+   "llvm.amdgcn.ds.swizzle", ctx->i32,
+   args, 2,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
}
 
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 09fd585..ee27d3c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
  bool has_ds_bpermute,
  uint32_t mask,
  int idx,
- LLVMValueRef lds,
  LLVMValueRef val);
 
 #define AC_SENDMSG_GS 2
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 530b581..dc765fe 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -68,8 +68,6 @@ struct ac_nir_context {
int num_locals;
LLVMValueRef *locals;
 
-  

Re: [Mesa-dev] [PATCH 1/4] nir: add new convergent, uniform-control, and cross-thread attributes

2017-07-31 Thread Matt Turner
On Mon, Jul 31, 2017 at 7:02 PM, Connor Abbott
 wrote:
> From: Connor Abbott 
>
> These are properties of the instruction that must be respected when
> moving it around, in addition to the usual SSA dominance guarantee.
> Previously, we only had special handling for fddx and fddy, in a very
> ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote, we'll
> have to start handling a lot more instructions with similar constraints,
> so we want to add a more formal model of what optimizations can and
> cannot do.
>
> v2: don't add attribute for ALU instructions
> v3: special-case derivative ALU instructions
> v4: rename convergent to uniform-control, and add LLVM-style convergent
> attribute
> ---
>  src/compiler/nir/nir.h | 126 
> +
>  1 file changed, 126 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 9313b7a..24934f0 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -986,6 +986,39 @@ typedef enum {
>  * intrinsic are due to the register reads/writes.
>  */
> NIR_INTRINSIC_CAN_REORDER = (1 << 1),
> +
> +   /**
> +* Indicates whether this intrinsic is "convergent". An operation is
> +* convergent if results from one thread depend on results from another
> +* thread, but in such a way that additional threads being enabled doesn't
> +* affect the result of the operation. Examples of convergent operations
> +* include screen-space derivatives, readInvocation() from
> +* ARB_shader_ballot, etc. Note that this is a more precise version of
> +* LLVM's "convergent" attribute, which simply stipulates that control
> +* dependencies cannot be added, since the set of active threads can only 
> be
> +* reduced by adding control dependencies.
> +*/
> +   NIR_INTRINSIC_CONVERGENT = (1 << 2),
> +
> +   /**
> +* Indicates whether this intrinsic is "cross-thread". An operation is
> +* cross-thread if results in one thread depend on the set of active 
> threads
> +* when it is executed, as well as possibly the input value of the other
> +* threads, and therefore optimizations cannot change the execution mask
> +* when the operation is called. Examples of cross-thread operations 
> include
> +* the "any" reduction which returns "true" in all threads if any thread
> +* inputs "true", ballotARB() from ARB_shader_ballot, etc. Note that any
> +* cross-thread operation must be convergent.
> +*/
> +   NIR_INTRINSIC_CROSS_THREAD = (1 << 3),
> +
> +   /**
> +* Indicates that this intrinsic is guaranteed to always be called in
> +* uniform control flow, that is, control flow with the same execution 
> mask
> +* as when the program started. If an operation is uniform-control, it 
> must
> +* be convergent as well, since the optimizer must maintain the guarantee.
> +*/
> +   NIR_INTRINSIC_UNIFORM_CONTROL = (1 << 4),
>  } nir_intrinsic_semantic_flag;
>
>  /**
> @@ -1460,6 +1493,99 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
>  type, nir_instr_type_parallel_copy)
>
>  /*
> + * Helpers to determine if an instruction is cross-thread, convergent, or
> + * uniform-control. See 
> NIR_INTRINSIC_{CONVERGENT|CROSS_THREAD|UNIFORM_CONTROL}
> + * for the definitions.
> + */
> +static inline bool
> +nir_instr_is_uniform_control(const nir_instr *instr)
> +{
> +   switch (instr->type) {
> +   case nir_instr_type_alu:
> +  switch (nir_instr_as_alu(instr)->op) {
> +  case nir_op_fddx:
> +  case nir_op_fddy:
> +  case nir_op_fddx_fine:
> +  case nir_op_fddy_fine:
> +  case nir_op_fddx_coarse:
> +  case nir_op_fddy_coarse:
> + /* Section 8.13.1 (Derivative Functions) of the GLSL 4.50 spec says:
> +  *
> +  *"Derivatives are undefined within non-uniform control flow."
> +  *
> +  * Thus, we can assume they are called in uniform control flow.
> +  */
> + return true;
> +
> +  default:
> + return false;
> +  }
> +
> +   case nir_instr_type_intrinsic: {
> +  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +  return nir_intrinsic_infos[intrin->intrinsic].flags &
> + NIR_INTRINSIC_UNIFORM_CONTROL;
> +   }
> +
> +   case nir_instr_type_tex:
> + switch (nir_instr_as_tex(instr)->op) {
> + case nir_texop_tex:
> + case nir_texop_txb:
> + case nir_texop_lod:
> +/* These three take implicit derivatives, so they are
> + * uniform-control as well.
> + */
> +return true;
> +
> + default:
> +return false;
> + }

This block is indented too much.

> +
> +   default:
> +  return false;
> +   }
> +}
> +
> +static inline bool
> +nir_instr_is_cross_thread(const nir_instr *instr)
> +{
> +   switch (instr->type) {
> +   case 

Re: [Mesa-dev] [PATCH 1/3] nir: fix algebraic optimizations

2017-07-31 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Revert "mesa: stop assigning unused storage for non-bindless opaque types"

2017-07-31 Thread Timothy Arceri

On 01/08/17 06:59, Samuel Pitoiset wrote:

This reverts commit fcbb93e860246375d03f280f927f79d3645a8988 and
also  commit 7c5b204e38d8cae70f5bf26e7223da5bc448bb5c to avoid
compilation errors.

Basically, the parameter indexes look wrong when a non-bindless
sampler is declared inside a nested struct (because it is skipped).
I think it's safer to just restore the previous behaviour which is
here since ages and also because the initial attempt is only a
little performance improvement.


I've got a proper fix. Just doing some final testing, will send soon.



This fixes a regression with
ES2-CTS.functional.shaders.struct.uniform.sampler_nested*.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101983
Cc: 17.2 
Signed-off-by: Samuel Pitoiset 
---
  src/mesa/program/ir_to_mesa.cpp | 56 +
  1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ac12b59d07..775211cefb 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2409,8 +2409,10 @@ namespace {
  class add_uniform_to_shader : public program_resource_visitor {
  public:
 add_uniform_to_shader(struct gl_shader_program *shader_program,
-struct gl_program_parameter_list *params)
-  : shader_program(shader_program), params(params), idx(-1)
+struct gl_program_parameter_list *params,
+ gl_shader_stage shader_type)
+  : shader_program(shader_program), params(params), idx(-1),
+shader_type(shader_type)
 {
/* empty */
 }
@@ -2433,6 +2435,7 @@ private:
 struct gl_program_parameter_list *params;
 int idx;
 ir_variable *var;
+   gl_shader_stage shader_type;
  };
  
  } /* anonymous namespace */

@@ -2444,18 +2447,49 @@ add_uniform_to_shader::visit_field(const glsl_type 
*type, const char *name,
 const enum glsl_interface_packing,
 bool /* last_field */)
  {
-   /* opaque types don't use storage in the param list unless they are
-* bindless samplers or images.
-*/
-   if (type->contains_opaque() && !var->data.bindless)
+   /* atomics don't get real storage */
+   if (type->contains_atomic())
return;
  
-   assert(_mesa_lookup_parameter_index(params, name) < 0);

+   gl_register_file file;
+   if (type->without_array()->is_sampler() && !var->data.bindless) {
+  file = PROGRAM_SAMPLER;
+   } else {
+  file = PROGRAM_UNIFORM;
+   }
+
+   int index = _mesa_lookup_parameter_index(params, name);
+   if (index < 0) {
+  unsigned size = type_size(type) * 4;
+
+  index = _mesa_add_parameter(params, file, name, size, type->gl_type,
+ NULL, NULL);
  
-   unsigned size = type_size(type) * 4;

+  /* Sampler uniform values are stored in prog->SamplerUnits,
+   * and the entry in that array is selected by this index we
+   * store in ParameterValues[].
+   */
+  if (file == PROGRAM_SAMPLER) {
+unsigned location;
+const bool found =
+   this->shader_program->UniformHash->get(location,
+  
params->Parameters[index].Name);
+assert(found);
+
+if (!found)
+   return;
+
+struct gl_uniform_storage *storage =
+>shader_program->data->UniformStorage[location];
  
-   int index = _mesa_add_parameter(params, PROGRAM_UNIFORM, name, size,

-   type->gl_type, NULL, NULL);
+ assert(storage->type->is_sampler() &&
+storage->opaque[shader_type].active);
+
+for (unsigned int j = 0; j < size / 4; j++)
+params->ParameterValues[index + j][0].f =
+   storage->opaque[shader_type].index + j;
+  }
+   }
  
 /* The first part of the uniform that's processed determines the base

  * location of the whole uniform (for structures).
@@ -2479,7 +2513,7 @@ _mesa_generate_parameters_list_for_uniforms(struct 
gl_shader_program
struct gl_program_parameter_list
*params)
  {
-   add_uniform_to_shader add(shader_program, params);
+   add_uniform_to_shader add(shader_program, params, sh->Stage);
  
 foreach_in_list(ir_instruction, node, sh->ir) {

ir_variable *var = node->as_variable();


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] radv/ac: enable EXT_shader_subgroup_ballot and EXT_shader_subgroup_vote

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

---
 src/amd/common/ac_nir_to_llvm.c | 49 +
 src/amd/vulkan/radv_device.c|  8 +++
 src/amd/vulkan/radv_pipeline.c  |  2 ++
 3 files changed, 59 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f756b9a..3dd1cbb 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4142,6 +4142,55 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_load_patch_vertices_in:
result = LLVMConstInt(ctx->ac.i32, 
ctx->nctx->options->key.tcs.input_vertices, false);
break;
+   case nir_intrinsic_ballot:
+   result = ac_build_ballot(>ac, get_src(ctx, instr->src[0]));
+   break;
+   case nir_intrinsic_read_first_invocation: {
+   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+   ac_build_optimization_barrier(>ac, );
+   LLVMValueRef srcs[1] = { src0 };
+   result = ac_build_intrinsic(>ac, 
"llvm.amdgcn.readfirstlane",
+   ctx->ac.i32, srcs, 1,
+   AC_FUNC_ATTR_NOUNWIND |
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+   break;
+}
+   case nir_intrinsic_read_invocation: {
+   LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+   ac_build_optimization_barrier(>ac, );
+   LLVMValueRef srcs[2] = { src0, get_src(ctx, instr->src[1]) };
+   result = ac_build_intrinsic(>ac, "llvm.amdgcn.readlane",
+   ctx->ac.i32, srcs, 2,
+   AC_FUNC_ATTR_NOUNWIND |
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+   break;
+}
+   case nir_intrinsic_load_subgroup_invocation:
+   result = ac_get_thread_id(>ac);
+   break;
+   case nir_intrinsic_load_subgroup_size:
+   result = LLVMConstInt(ctx->ac.i32, 64, 0);
+   break;
+   case nir_intrinsic_vote_all:
+   result = LLVMBuildSExt(ctx->ac.builder,
+  ac_build_vote_all(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->ac.i32, "");
+   break;
+   case nir_intrinsic_vote_any:
+   result = LLVMBuildSExt(ctx->ac.builder,
+  ac_build_vote_any(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->ac.i32, "");
+   break;
+   case nir_intrinsic_vote_eq:
+   result = LLVMBuildSExt(ctx->ac.builder,
+  ac_build_vote_eq(>ac,
+get_src(ctx, 
instr->src[0])),
+  ctx->ac.i32, "");
+   break;
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(>instr, stderr);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index eb25872..e8fe086 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -165,6 +165,14 @@ static const VkExtensionProperties 
common_device_extensions[] = {
.extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
.specVersion = 1,
},
+   {
+   .extensionName = VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
+   .specVersion = 1,
+   },
+   {
+   .extensionName = VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+   .specVersion = 1,
+   },
 };
 static const VkExtensionProperties ext_sema_device_extensions[] = {
{
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 2fc64e8..c354807 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -234,6 +234,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
.tessellation = true,
.int64 = true,
.variable_pointers = true,
+   .shader_ballot = true,
+   .shader_group_vote = true,
};
entry_point = spirv_to_nir(spirv, module->size / 4,
   spec_entries, num_spec_entries,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/10] radv: call nir_opt_intrinsics()

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

For us, this will only lower subgroup mask instructions. It could do
other things in the future though.
---
 src/amd/vulkan/radv_pipeline.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c4407ec..2fc64e8 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -65,6 +65,8 @@ static const struct nir_shader_compiler_options nir_options = 
{
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .lower_subgroup_masks = true,
+   .max_subgroup_size = 64,
.max_unroll_iterations = 32
 };
 
@@ -154,6 +156,7 @@ radv_optimize_nir(struct nir_shader *shader)
NIR_PASS_V(shader, nir_lower_64bit_pack);
 NIR_PASS_V(shader, nir_lower_alu_to_scalar);
 NIR_PASS_V(shader, nir_lower_phis_to_scalar);
+   NIR_PASS_V(shader, nir_opt_intrinsics);
 
 NIR_PASS(progress, shader, nir_copy_prop);
 NIR_PASS(progress, shader, nir_opt_remove_phis);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] radeonsi: move the guts of ARB_shader_group_vote emission to ac

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
---
 src/amd/common/ac_llvm_build.c   | 30 ++
 src/amd/common/ac_llvm_build.h   |  6 ++
 src/gallium/drivers/radeonsi/si_shader.c | 24 +++-
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2074938..42e2639 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -270,6 +270,36 @@ ac_build_ballot(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
+ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+   return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+}
+
+LLVMValueRef
+ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+   return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+}
+
+LLVMValueRef
+ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+   LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+   LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+
+   LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+vote_set, active_set, "");
+   LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+   return LLVMBuildOr(ctx->builder, all, none, "");
+}
+
+LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 840b501..c993df2 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -81,6 +81,12 @@ void ac_build_optimization_barrier(struct ac_llvm_context 
*ctx,
 
 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
 
+LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef 
value);
+
+LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef 
value);
+
+LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 3ebcbee..456b413 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3754,13 +3754,8 @@ static void vote_all_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef active_set, vote_set;
-   LLVMValueRef tmp;
-
-   active_set = ac_build_ballot(>ac, ctx->i32_1);
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
-   tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
+LLVMValueRef tmp = ac_build_vote_all(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
@@ -3772,13 +3767,8 @@ static void vote_any_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef vote_set;
-   LLVMValueRef tmp;
 
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
-
-   tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
-   vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+LLVMValueRef tmp = ac_build_vote_any(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
@@ -3790,16 +3780,8 @@ static void vote_eq_emit(
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef active_set, vote_set;
-   LLVMValueRef all, none, tmp;
-
-   active_set = ac_build_ballot(>ac, ctx->i32_1);
-   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
-   all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
-   none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
-vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
-   tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+LLVMValueRef tmp = ac_build_vote_eq(>ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
   

[Mesa-dev] [PATCH 07/10] radeonsi: move si_emit_ballot() to ac

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
---
 src/amd/common/ac_llvm_build.c   | 26 ++
 src/amd/common/ac_llvm_build.h   |  4 
 src/gallium/drivers/radeonsi/si_shader.c | 38 +---
 3 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 7de02fb..2074938 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -244,6 +244,32 @@ ac_build_optimization_barrier(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
+ac_build_ballot(struct ac_llvm_context *ctx,
+   LLVMValueRef value)
+{
+   LLVMValueRef args[3] = {
+   value,
+   ctx->i32_0,
+   LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+   };
+
+   /* We currently have no other way to prevent LLVM from lifting the icmp
+* calls to a dominating basic block.
+*/
+   ac_build_optimization_barrier(ctx, [0]);
+
+   if (LLVMTypeOf(args[0]) != ctx->i32)
+   args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+
+   return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index b3287cf..840b501 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -77,6 +77,10 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize);
 
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
   LLVMValueRef *pvgpr);
+
+
+LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fbe270f..3ebcbee 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3747,32 +3747,6 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
}
 }
 
-static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
-  LLVMValueRef value)
-{
-   struct gallivm_state *gallivm = >gallivm;
-   LLVMValueRef args[3] = {
-   value,
-   ctx->i32_0,
-   LLVMConstInt(ctx->i32, LLVMIntNE, 0)
-   };
-
-   /* We currently have no other way to prevent LLVM from lifting the icmp
-* calls to a dominating basic block.
-*/
-   ac_build_optimization_barrier(>ac, [0]);
-
-   if (LLVMTypeOf(args[0]) != ctx->i32)
-   args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, 
"");
-
-   return lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.icmp.i32",
- ctx->i64, args, 3,
- LP_FUNC_ATTR_NOUNWIND |
- LP_FUNC_ATTR_READNONE |
- LP_FUNC_ATTR_CONVERGENT);
-}
-
 static void vote_all_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
@@ -3783,8 +3757,8 @@ static void vote_all_emit(
LLVMValueRef active_set, vote_set;
LLVMValueRef tmp;
 
-   active_set = si_emit_ballot(ctx, ctx->i32_1);
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   active_set = ac_build_ballot(>ac, ctx->i32_1);
+   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
emit_data->output[emit_data->chan] =
@@ -3801,7 +3775,7 @@ static void vote_any_emit(
LLVMValueRef vote_set;
LLVMValueRef tmp;
 
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   vote_set = ac_build_ballot(>ac, emit_data->args[0]);
 
tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
@@ -3819,8 +3793,8 @@ static void vote_eq_emit(
LLVMValueRef active_set, vote_set;
LLVMValueRef all, none, tmp;
 
-   active_set = si_emit_ballot(ctx, ctx->i32_1);
-   vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+   active_set = ac_build_ballot(>ac, ctx->i32_1);
+   vote_set = 

[Mesa-dev] [PATCH 06/10] radeonsi: move emit_optimization_barrier() to ac

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
---
 src/amd/common/ac_llvm_build.c   | 43 ++
 src/amd/common/ac_llvm_build.h   |  2 ++
 src/gallium/drivers/radeonsi/si_shader.c | 45 ++--
 3 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 7a4987c..7de02fb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -36,6 +36,7 @@
 #include "ac_exp_param.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
+#include "util/u_atomic.h"
 #include "sid.h"
 
 #include "shader_enums.h"
@@ -200,6 +201,48 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize)
}
 }
 
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
+ */
+void
+ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr)
+{
+   static int counter = 0;
+
+   LLVMBuilderRef builder = ctx->builder;
+   char code[16];
+
+   snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
+
+   if (!pvgpr) {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", 
true, false);
+   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+   } else {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, >i32, 1, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
+   LLVMValueRef vgpr = *pvgpr;
+   LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+   unsigned vgpr_size = ac_get_type_size(vgpr_type);
+   LLVMValueRef vgpr0;
+
+   assert(vgpr_size % 4 == 0);
+
+   vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, 
vgpr_size / 4), "");
+   vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+   vgpr0 = LLVMBuildCall(builder, inlineasm, , 1, "");
+   vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, 
"");
+   vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+   *pvgpr = vgpr;
+   }
+}
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 2566923..b3287cf 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -75,6 +75,8 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char 
*name,
 
 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned 
bufsize);
 
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+  LLVMValueRef *pvgpr);
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 41dc70a..fbe270f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3463,47 +3463,6 @@ static void si_llvm_return_fs_outputs(struct 
ac_shader_abi *abi,
ctx->return_value = ret;
 }
 
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- *
- * Optionally, a value can be passed through the inline assembly to prevent
- * LLVM from hoisting calls to ReadNone functions.
- */
-static void emit_optimization_barrier(struct si_shader_context *ctx,
- LLVMValueRef *pvgpr)
-{
-   static int counter = 0;
-
-   LLVMBuilderRef builder = ctx->gallivm.builder;
-   char code[16];
-
-   snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
-
-   if (!pvgpr) {
-   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, 
false);
-   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", 
true, false);
-   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
-   } else {
-   LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, >i32, 1, 
false);
-   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
-   LLVMValueRef vgpr = *pvgpr;
-   LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
-   unsigned vgpr_size = ac_get_type_size(vgpr_type);
- 

[Mesa-dev] [PATCH 05/10] radeonsi: move llvm_get_type_size() to ac

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

Reviewed-by: Nicolai Hähnle 
Reviewed-by: Marek Olšák 
---
 src/amd/common/ac_llvm_build.c   | 24 ++
 src/amd/common/ac_llvm_build.h   |  2 ++
 src/gallium/drivers/radeonsi/si_shader.c | 43 +++-
 3 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9b939c1..7a4987c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -88,6 +88,30 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }
 
+unsigned
+ac_get_type_size(LLVMTypeRef type)
+{
+   LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+   switch (kind) {
+   case LLVMIntegerTypeKind:
+   return LLVMGetIntTypeWidth(type) / 8;
+   case LLVMFloatTypeKind:
+   return 4;
+   case LLVMPointerTypeKind:
+   return 8;
+   case LLVMVectorTypeKind:
+   return LLVMGetVectorSize(type) *
+  ac_get_type_size(LLVMGetElementType(type));
+   case LLVMArrayTypeKind:
+   return LLVMGetArrayLength(type) *
+  ac_get_type_size(LLVMGetElementType(type));
+   default:
+   assert(0);
+   return 0;
+   }
+}
+
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 09fd585..2566923 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -66,6 +66,8 @@ struct ac_llvm_context {
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
 
+unsigned ac_get_type_size(LLVMTypeRef type);
+
 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 628e6f8..41dc70a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -89,8 +89,6 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
 static void si_dump_shader_key(unsigned processor, const struct si_shader 
*shader,
   FILE *f);
 
-static unsigned llvm_get_type_size(LLVMTypeRef type);
-
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
@@ -3491,7 +3489,7 @@ static void emit_optimization_barrier(struct 
si_shader_context *ctx,
LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
LLVMValueRef vgpr = *pvgpr;
LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
-   unsigned vgpr_size = llvm_get_type_size(vgpr_type);
+   unsigned vgpr_size = ac_get_type_size(vgpr_type);
LLVMValueRef vgpr0;
 
assert(vgpr_size % 4 == 0);
@@ -4159,29 +4157,6 @@ static void declare_streamout_params(struct 
si_shader_context *ctx,
}
 }
 
-static unsigned llvm_get_type_size(LLVMTypeRef type)
-{
-   LLVMTypeKind kind = LLVMGetTypeKind(type);
-
-   switch (kind) {
-   case LLVMIntegerTypeKind:
-   return LLVMGetIntTypeWidth(type) / 8;
-   case LLVMFloatTypeKind:
-   return 4;
-   case LLVMPointerTypeKind:
-   return 8;
-   case LLVMVectorTypeKind:
-   return LLVMGetVectorSize(type) *
-  llvm_get_type_size(LLVMGetElementType(type));
-   case LLVMArrayTypeKind:
-   return LLVMGetArrayLength(type) *
-  llvm_get_type_size(LLVMGetElementType(type));
-   default:
-   assert(0);
-   return 0;
-   }
-}
-
 static void declare_lds_as_pointer(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm = >gallivm;
@@ -4627,10 +4602,10 @@ static void create_function(struct si_shader_context 
*ctx)
shader->info.num_input_vgprs = 0;
 
for (i = 0; i < fninfo.num_sgpr_params; ++i)
-   shader->info.num_input_sgprs += 
llvm_get_type_size(fninfo.types[i]) / 4;
+   shader->info.num_input_sgprs += 
ac_get_type_size(fninfo.types[i]) / 4;
 
for (; i < fninfo.num_params; ++i)
-   shader->info.num_input_vgprs += 
llvm_get_type_size(fninfo.types[i]) / 4;
+   shader->info.num_input_vgprs += 
ac_get_type_size(fninfo.types[i]) / 4;
 
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;

[Mesa-dev] [PATCH 04/10] nir/spirv: add plumbing for KHR_shader_ballot and KHR_subgroup_vote

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

---
 src/compiler/spirv/nir_spirv.h |  2 +
 src/compiler/spirv/spirv_to_nir.c  | 79 ++
 src/compiler/spirv/vtn_variables.c | 28 ++
 3 files changed, 109 insertions(+)

diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h
index 83577fb..9d90a4d 100644
--- a/src/compiler/spirv/nir_spirv.h
+++ b/src/compiler/spirv/nir_spirv.h
@@ -52,6 +52,8 @@ struct nir_spirv_supported_extensions {
bool int64;
bool multiview;
bool variable_pointers;
+   bool shader_ballot;
+   bool shader_group_vote;
 };
 
 nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 4b9c121..84f2db6 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2602,6 +2602,69 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
nir_builder_instr_insert(>nb, >instr);
 }
 
+static void
+vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
+const uint32_t *w, unsigned count)
+{
+   nir_intrinsic_op intrinsic_op;
+   switch (opcode) {
+   case SpvOpSubgroupBallotKHR:
+  intrinsic_op = nir_intrinsic_ballot;
+  break;
+   case SpvOpSubgroupFirstInvocationKHR:
+  intrinsic_op = nir_intrinsic_read_first_invocation;
+  break;
+   case SpvOpSubgroupReadInvocationKHR:
+  intrinsic_op = nir_intrinsic_read_invocation;
+  break;
+   case SpvOpSubgroupAllKHR:
+  intrinsic_op = nir_intrinsic_vote_all;
+  break;
+   case SpvOpSubgroupAnyKHR:
+  intrinsic_op = nir_intrinsic_vote_any;
+  break;
+   case SpvOpSubgroupAllEqualKHR:
+  intrinsic_op = nir_intrinsic_vote_eq;
+  break;
+   default:
+  unreachable("unknown subgroup instruction");
+  break;
+   }
+
+   nir_intrinsic_instr *intrin =
+  nir_intrinsic_instr_create(b->shader, intrinsic_op);
+
+   intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def);
+
+   if (opcode == SpvOpSubgroupReadInvocationKHR) {
+  intrin->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+   }
+
+   intrin->num_components = intrin->src[0].ssa->num_components;
+   nir_ssa_dest_init(>instr, >dest,
+ intrin->num_components,
+ (opcode == SpvOpSubgroupBallotKHR) ? 64 : 32,
+ NULL);
+   nir_builder_instr_insert(>nb, >instr);
+
+   nir_ssa_def *result = >dest.ssa;
+
+   if (opcode == SpvOpSubgroupBallotKHR) {
+  /* convert from 64-bit to 4 32-bit components */
+  nir_ssa_def *tmp = nir_unpack_64_2x32(>nb, result);
+  nir_ssa_def *zero = nir_imm_int(>nb, 0);
+  result = nir_vec4(>nb, nir_channel(>nb, tmp, 0),
+nir_channel(>nb, tmp, 1),
+zero, zero);
+   }
+
+   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+   const struct glsl_type *result_type =
+  vtn_value(b, w[1], vtn_value_type_type)->type->type;
+   val->ssa = vtn_create_ssa_value(b, result_type);
+   val->ssa->def = result;
+}
+
 static unsigned
 gl_primitive_from_spv_execution_mode(SpvExecutionMode mode)
 {
@@ -2787,6 +2850,13 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
   case SpvCapabilityVariablePointersStorageBuffer:
   case SpvCapabilityVariablePointers:
  spv_check_supported(variable_pointers, cap);
+
+  case SpvCapabilitySubgroupBallotKHR:
+ spv_check_supported(shader_ballot, cap);
+ break;
+
+  case SpvCapabilitySubgroupVoteKHR:
+ spv_check_supported(shader_ballot, cap);
  break;
 
   default:
@@ -3307,6 +3377,15 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp 
opcode,
   vtn_handle_barrier(b, opcode, w, count);
   break;
 
+   case SpvOpSubgroupBallotKHR:
+   case SpvOpSubgroupFirstInvocationKHR:
+   case SpvOpSubgroupReadInvocationKHR:
+   case SpvOpSubgroupAllKHR:
+   case SpvOpSubgroupAnyKHR:
+   case SpvOpSubgroupAllEqualKHR:
+  vtn_handle_subgroup(b, opcode, w, count);
+  break;
+
default:
   unreachable("Unhandled opcode");
}
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 4432e72..abb5515 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1161,6 +1161,34 @@ vtn_get_builtin_location(struct vtn_builder *b,
   *location = SYSTEM_VALUE_VIEW_INDEX;
   set_mode_system_value(mode);
   break;
+   case SpvBuiltInSubgroupSize:
+  *location = SYSTEM_VALUE_SUBGROUP_SIZE;
+  set_mode_system_value(mode);
+  break;
+   case SpvBuiltInSubgroupLocalInvocationId:
+  *location = SYSTEM_VALUE_SUBGROUP_INVOCATION;
+  set_mode_system_value(mode);
+  break;
+   case SpvBuiltInSubgroupEqMaskKHR:
+  *location = SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT;
+  set_mode_system_value(mode);
+  break;
+   case 

[Mesa-dev] [PATCH 03/10] nir/lower_system_values: handle SPIR-V shader_ballot system values

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

Lower them to the GL version.
---
 src/compiler/nir/nir_lower_system_values.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index ba20d30..76045ad 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -110,6 +110,44 @@ convert_block(nir_block *block, nir_builder *b)
  }
  break;
 
+  case SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT:
+  case SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT: {
+ nir_intrinsic_op op;
+ switch (var->data.location) {
+ case SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_eq_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_ge_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_gt_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_le_mask;
+break;
+ case SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT:
+op = nir_intrinsic_load_subgroup_lt_mask;
+break;
+ default:
+unreachable("bad intrinsic");
+ }
+ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->shader, 
op);
+ instr->num_components = 1;
+ nir_ssa_dest_init(>instr, >dest, 1, 64, NULL);
+ nir_builder_instr_insert(b, >instr);
+ 
+ sysval = nir_unpack_64_2x32(b, >dest.ssa);
+ nir_ssa_def *zero = nir_imm_int(b, 0);
+ sysval = nir_vec4(b, nir_channel(b, sysval, 0),
+   nir_channel(b, sysval, 1),
+   zero, zero);
+ break;
+  }
+
   case SYSTEM_VALUE_INSTANCE_INDEX:
  sysval = nir_iadd(b,
nir_load_instance_id(b),
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] compiler: add new system values for SPV_KHR_shader_ballot

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

For SPIR-V, Khronos decided to make the SubGroup*Mask system values
consist of 4 32-bit components rather than one 64-bit component.
Although we'll lower away the difference in nir_lower_system_values so
drivers won't have to deal with them, adding these system values makes
it easier to implement the SPIRV-to-NIR bits.
---
 src/compiler/shader_enums.c |  5 +
 src/compiler/shader_enums.h | 11 +++
 2 files changed, 16 insertions(+)

diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index b2ca80b..571a43e 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -211,6 +211,11 @@ gl_system_value_name(gl_system_value sysval)
  ENUM(SYSTEM_VALUE_SUBGROUP_GT_MASK),
  ENUM(SYSTEM_VALUE_SUBGROUP_LE_MASK),
  ENUM(SYSTEM_VALUE_SUBGROUP_LT_MASK),
+ ENUM(SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT),
+ ENUM(SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT),
  ENUM(SYSTEM_VALUE_VERTEX_ID),
  ENUM(SYSTEM_VALUE_INSTANCE_ID),
  ENUM(SYSTEM_VALUE_INSTANCE_INDEX),
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 2f20e68..c73b382 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -374,6 +374,17 @@ typedef enum
SYSTEM_VALUE_SUBGROUP_GT_MASK,
SYSTEM_VALUE_SUBGROUP_LE_MASK,
SYSTEM_VALUE_SUBGROUP_LT_MASK,
+
+   /**
+* These are the same as system values above, except that they consist of 4
+* 32-bit components rather than 1 64-bit component. This matches the
+* semantics of the SPIR-V KHR_shader_ballot extension.
+*/
+   SYSTEM_VALUE_SUBGROUP_EQ_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_GE_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_GT_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_LE_MASK_32BIT,
+   SYSTEM_VALUE_SUBGROUP_LT_MASK_32BIT,
/*@}*/
 
/*@}*/
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/10] mesa: fix 64-bit issues with system_values_read

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

We're about to bump the number of system values above 32. The
system_values_read bitfield itself is 64 bits, but some users weren't
taking that into account. Fix the ones I could find by grepping for
"system_values_read". This prevents regressions at least with radeonsi
and other Gallium drivers, and probably i965 too.

Reviewed-by: Nicolai Hähnle 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c| 2 +-
 src/intel/compiler/brw_vec4_gs_visitor.cpp | 3 ++-
 src/mesa/program/programopt.c  | 2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c   | 6 +++---
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index d4914ac..e5daef4 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -588,7 +588,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned 
file, unsigned index,
   src = nir_src_for_ssa(>dest.ssa);
 
   b->shader->info.system_values_read |=
- (1 << nir_system_value_from_intrinsic(op));
+ (1ull << nir_system_value_from_intrinsic(op));
 
   break;
}
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp 
b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index ca59927..8a7da0f 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -653,7 +653,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
*log_data,
   shader->info.clip_distance_array_size;
 
prog_data->include_primitive_id =
-  (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 
0;
+  (shader->info.system_values_read &
+   (1ull << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
 
prog_data->invocations = shader->info.gs.invocations;
 
diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c
index f560bce..f389d2b 100644
--- a/src/mesa/program/programopt.c
+++ b/src/mesa/program/programopt.c
@@ -597,7 +597,7 @@ _mesa_program_fragment_position_to_sysval(struct gl_program 
*prog)
   return;
 
prog->info.inputs_read &= ~BITFIELD64_BIT(VARYING_SLOT_POS);
-   prog->info.system_values_read |= 1 << SYSTEM_VALUE_FRAG_COORD;
+   prog->info.system_values_read |= BITFIELD64_BIT(SYSTEM_VALUE_FRAG_COORD);
 
for (i = 0; i < prog->arb.NumInstructions; i++) {
   struct prog_instruction *inst = prog->arb.Instructions + i;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2fca398..d78d067 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6592,10 +6592,10 @@ st_translate_program(
/* Declare misc input registers
 */
{
-  GLbitfield sysInputs = proginfo->info.system_values_read;
+  GLbitfield64 sysInputs = proginfo->info.system_values_read;
 
   for (i = 0; sysInputs; i++) {
- if (sysInputs & (1 << i)) {
+ if (sysInputs & BITFIELD64_BIT(i)) {
 unsigned semName = _mesa_sysval_to_semantic(i);
 
 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -6626,7 +6626,7 @@ st_translate_program(
emit_wpos(st_context(ctx), t, proginfo, ureg,
  program->wpos_transform_const);
 
-sysInputs &= ~(1 << i);
+sysInputs &= ~BITFIELD64_BIT(i);
  }
   }
}
@@ -6912,7 +6912,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
/* This must be done before the uniform storage is associated. */
if (shader->Stage == MESA_SHADER_FRAGMENT &&
(prog->info.inputs_read & VARYING_BIT_POS ||
-prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
+prog->info.system_values_read &
+BITFIELD64_BIT(SYSTEM_VALUE_FRAG_COORD))) {
   static const gl_state_index wposTransformState[STATE_LENGTH] = {
  STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
   };
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index f6eb5ef..a62962c 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -938,9 +938,9 @@ st_translate_mesa_program(
 
/* Declare misc input registers
 */
-   GLbitfield sysInputs = program->info.system_values_read;
+   GLbitfield64 sysInputs = program->info.system_values_read;
for (i = 0; sysInputs; i++) {
-  if (sysInputs & (1 << i)) {
+  if (sysInputs & BITFIELD64_BIT(i)) {
  unsigned semName = _mesa_sysval_to_semantic(i);
 
  t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
@@ -972,7 +972,7 @@ st_translate_mesa_program(
  semName == TGSI_SEMANTIC_POSITION)
 emit_wpos(st_context(ctx), t, program, ureg);
 
-  sysInputs &= ~(1 << i);
+  sysInputs &= 

[Mesa-dev] [PATCH 00/10] radv: Support for subgroup_vote and shader_ballot (v2)

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

This is mostly a re-send of my previous series [1]. I've split out the
more controversial NIR attribute stuff into a separate series, which is
now independent of this one. The main change, other than rebasing stuff,
is that we now use the NIR lowering added by Matt for handling the
subgroup masks, rather than doing it ourselves while translating to
LLVM. That code still isn't tested thanks to [2], though.

Also, this requires some fixes for integer handling that I just sent
out, particularly the last one, for my crucible tests to work. This
series, including those fixes, is available at:

git://people.freedesktop.org/~cwabbott0/mesa radv-shader-ballot-v3

[1] https://lists.freedesktop.org/archives/mesa-dev/2017-June/158672.html
[2] https://github.com/KhronosGroup/glslang/issues/930

Connor Abbott (10):
  mesa: fix 64-bit issues with system_values_read
  compiler: add new system values for SPV_KHR_shader_ballot
  nir/lower_system_values: handle SPIR-V shader_ballot system values
  nir/spirv: add plumbing for KHR_shader_ballot and KHR_subgroup_vote
  radeonsi: move llvm_get_type_size() to ac
  radeonsi: move emit_optimization_barrier() to ac
  radeonsi: move si_emit_ballot() to ac
  radeonsi: move the guts of ARB_shader_group_vote emission to ac
  radv: call nir_opt_intrinsics()
  radv/ac: enable EXT_shader_subgroup_ballot and
EXT_shader_subgroup_vote

 src/amd/common/ac_llvm_build.c | 123 ++
 src/amd/common/ac_llvm_build.h |  14 +++
 src/amd/common/ac_nir_to_llvm.c|  49 +++
 src/amd/vulkan/radv_device.c   |   8 ++
 src/amd/vulkan/radv_pipeline.c |   5 ++
 src/compiler/nir/nir_lower_system_values.c |  38 
 src/compiler/shader_enums.c|   5 ++
 src/compiler/shader_enums.h|  11 +++
 src/compiler/spirv/nir_spirv.h |   2 +
 src/compiler/spirv/spirv_to_nir.c  |  79 +
 src/compiler/spirv/vtn_variables.c |  28 ++
 src/gallium/auxiliary/nir/tgsi_to_nir.c|   2 +-
 src/gallium/drivers/radeonsi/si_shader.c   | 136 +++--
 src/intel/compiler/brw_vec4_gs_visitor.cpp |   3 +-
 src/mesa/program/programopt.c  |   2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   9 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |   6 +-
 17 files changed, 387 insertions(+), 133 deletions(-)

-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] nir: use new attributes for ARB_shader_ballot and ARB_shader_group_vote

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

---
 src/compiler/nir/nir_intrinsics.h | 35 +--
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index ea51525..72c4296 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -76,6 +76,29 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, 
xx,
  */
 #define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
 
+/*
+ * A cross-thread instrinsic is similar to an ALU instruction, except it has a
+ * few more restrictions placed on it because it does cross-thread
+ * communication. See the comment for NIR_INTRINSIC_CROSS_THREAD in nir.h for
+ * details.
+ */
+#define CROSS_THREAD(name, num_srcs, src0_components, src1_components, \
+ dest_components) \
+   INTRINSIC(name, num_srcs, ARR(src0_components, src1_components), \
+ true, dest_components, 0, 0, xx, xx, xx, \
+ NIR_INTRINSIC_CAN_REORDER | NIR_INTRINSIC_CAN_ELIMINATE | \
+ NIR_INTRINSIC_CROSS_THREAD)
+/*
+ * Similar to CROSS_THREAD, except it has slightly more relaxed semantics. See
+ * the comment for NIR_INTRINSIC_CONVERGENT in nir.h for details.
+ */
+#define CONVERGENT(name, num_srcs, src0_components, src1_components, \
+   dest_components) \
+   INTRINSIC(name, num_srcs, ARR(src0_components, src1_components), \
+ true, dest_components, 0, 0, xx, xx, xx, \
+ NIR_INTRINSIC_CAN_REORDER | NIR_INTRINSIC_CAN_ELIMINATE | \
+ NIR_INTRINSIC_CONVERGENT)
+
 BARRIER(barrier)
 BARRIER(discard)
 
@@ -102,9 +125,9 @@ INTRINSIC(shader_clock, 0, ARR(0), true, 2, 0, 0, xx, xx, 
xx, NIR_INTRINSIC_CAN_
  *
  * GLSL functions from ARB_shader_ballot.
  */
-INTRINSIC(ballot, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(read_invocation, 2, ARR(0, 1), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(read_first_invocation, 1, ARR(0), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+CROSS_THREAD(ballot, 1, 1, 0, 1)
+CONVERGENT(read_invocation, 2, 0, 1, 0)
+CROSS_THREAD(read_first_invocation, 1, 0, 0, 0)
 
 /*
  * Memory barrier with semantics analogous to the compute shader
@@ -121,9 +144,9 @@ BARRIER(memory_barrier_shared)
 INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
 
 /** ARB_shader_group_vote intrinsics */
-INTRINSIC(vote_any, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(vote_all, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(vote_eq,  1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+CROSS_THREAD(vote_any, 1, 1, 0, 1)
+CROSS_THREAD(vote_all, 1, 1, 0, 1)
+CROSS_THREAD(vote_eq,  1, 1, 0, 1)
 
 /**
  * Basic Geometry Shader intrinsics.
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] nir: take cross-thread operations into account into a few places

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

These optimizations happened to work with derivatives, but they won't
with upcoming shader_ballot and group_vote instructions.

v2: fixup for new convergent & uniform-control semantics
---
 src/compiler/nir/nir_instr_set.c   | 24 
 src/compiler/nir/nir_opt_peephole_select.c | 11 +++
 2 files changed, 35 insertions(+)

diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 9cb9ed4..b40c203 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -178,6 +178,14 @@ hash_instr(const void *data)
const nir_instr *instr = data;
uint32_t hash = _mesa_fnv32_1a_offset_bias;
 
+   /*
+* In nir_instrs_equal(), we compare the instruction's basic blocks in this
+* case. See the comment there for the explanation.
+*/
+   if (nir_instr_is_convergent(instr) && !nir_instr_is_uniform_control(instr)) 
{
+  HASH(hash, instr->block);
+   }
+
switch (instr->type) {
case nir_instr_type_alu:
   hash = hash_alu(hash, nir_instr_as_alu(instr));
@@ -256,6 +264,22 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr 
*instr2)
if (instr1->type != instr2->type)
   return false;
 
+   /*
+* If the instructions are cross-thread, then they must have the same
+* execution mask, and if they are convergent, then the one being replaced
+* must have a smaller execution mask. If they are uniform-control, then we
+* can always replace one invocation with another since every invocation
+* must already have the same execution mask (the largest possible one).
+* But not so for non-uniform-control instructions, since different
+* invocations may be called with different execution masks and therefore
+* have different results. Conservatively enforce that the instructions are
+* in the same basic block.
+*/
+   if (nir_instr_is_convergent(instr1) && 
!nir_instr_is_uniform_control(instr1)) {
+  if (instr1->block != instr2->block)
+ return false;
+   }
+
switch (instr1->type) {
case nir_instr_type_alu: {
   nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
diff --git a/src/compiler/nir/nir_opt_peephole_select.c 
b/src/compiler/nir/nir_opt_peephole_select.c
index 4ca4f80..ce41781 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -61,6 +61,17 @@ static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
 {
nir_foreach_instr(instr, block) {
+  if (nir_instr_is_cross_thread(instr) && !nir_instr_is_convergent(instr)) 
{
+ /* If the instruction is cross-thread, then we can't execute it
+  * conditionally when we would've executed it unconditionally before,
+  * except when the condition is uniform. If the instruction is
+  * convergent, though, we're already guaranteed that the entire
+  * region is convergent (including the condition) so we can go ahead.
+  *
+  * TODO: allow when the if-condition is uniform
+  */
+ return false;
+  }
   switch (instr->type) {
   case nir_instr_type_intrinsic: {
  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] nir/gcm: use the new convergent attribute

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

v2: use convergent instead of cross-thread
---
 src/compiler/nir/nir_opt_gcm.c | 72 ++
 1 file changed, 23 insertions(+), 49 deletions(-)

diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index 879a77a..185c430 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -109,65 +109,39 @@ static bool
 gcm_pin_instructions_block(nir_block *block, struct gcm_state *state)
 {
nir_foreach_instr_safe(instr, block) {
-  switch (instr->type) {
-  case nir_instr_type_alu:
- switch (nir_instr_as_alu(instr)->op) {
- case nir_op_fddx:
- case nir_op_fddy:
- case nir_op_fddx_fine:
- case nir_op_fddy_fine:
- case nir_op_fddx_coarse:
- case nir_op_fddy_coarse:
-/* These can only go in uniform control flow; pin them for now */
-instr->pass_flags = GCM_INSTR_PINNED;
+  if (nir_instr_is_convergent(instr)) {
+ /* pin cross-thread and convergent operations for now */
+ instr->pass_flags = GCM_INSTR_PINNED;
+  } else {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_tex:
+ case nir_instr_type_load_const:
+instr->pass_flags = 0;
 break;
 
- default:
-instr->pass_flags = 0;
+ case nir_instr_type_intrinsic: {
+const nir_intrinsic_info *info =
+   _intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+
+if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+(info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+   instr->pass_flags = 0;
+} else {
+   instr->pass_flags = GCM_INSTR_PINNED;
+}
 break;
  }
- break;
 
-  case nir_instr_type_tex:
- switch (nir_instr_as_tex(instr)->op) {
- case nir_texop_tex:
- case nir_texop_txb:
- case nir_texop_lod:
-/* These two take implicit derivatives so they need to be pinned */
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_phi:
 instr->pass_flags = GCM_INSTR_PINNED;
 break;
 
  default:
-instr->pass_flags = 0;
-break;
- }
- break;
-
-  case nir_instr_type_load_const:
- instr->pass_flags = 0;
- break;
-
-  case nir_instr_type_intrinsic: {
- const nir_intrinsic_info *info =
-_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-
- if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
- (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
-instr->pass_flags = 0;
- } else {
-instr->pass_flags = GCM_INSTR_PINNED;
+unreachable("Invalid instruction type in GCM");
  }
- break;
-  }
-
-  case nir_instr_type_jump:
-  case nir_instr_type_ssa_undef:
-  case nir_instr_type_phi:
- instr->pass_flags = GCM_INSTR_PINNED;
- break;
-
-  default:
- unreachable("Invalid instruction type in GCM");
   }
 
   if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] nir: Cross-thread, uniform-control, and convergent attributes

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

This series adds some more precise semantics for instructions added by
ARB_shader_ballot and ARB_shader_group_vote, which should allow the
optimizer more freedom. I based it off my previous series [1], but
since the intrinsics have already landed in master since then, this
series changes them to use the new attributes instead. I've also renamed
"convergent" to "uniform-control" to reduce confusion, and I've added a
new "convergent" attribute that's similar in spirit to the LLVM one.

There was some debate last time about whether derivatives should be
marked uniform-control or just convergent. I've realized that our
current behavior actually already assumed that derivatives are
uniform-control. In particular, with something like this (contrived)
example:

do {
   ... = dFdx(a);
} while(...);
... = dFdx(a);

CSE would happily merge the second dFdx into the first, even though in
general this isn't allowed for operations that are convergent, since if
the loop condition is non-uniform, then the first dFdx will be called
with a smaller execution mask than the second. Under the current model,
we'd have to assume that the first dFdx is always called in uniform
control flow to make the transform correct. So, in order to keep the
same behavior for derivatives that we previously had, we need to make
them uniform-control. If we run into problems with people calling
derivatives in non-uniform control flow if/when we add more aggresive
optimizations, then we can always fall back to making them convergent
now that we have the convergent attribute.

[1] https://lists.freedesktop.org/archives/mesa-dev/2017-June/157909.html

Connor Abbott (4):
  nir: add new convergent, uniform-control, and cross-thread attributes
  nir/gcm: use the new convergent attribute
  nir: take cross-thread operations into account into a few places
  nir: use new attributes for ARB_shader_ballot and
ARB_shader_group_vote

 src/compiler/nir/nir.h | 126 +
 src/compiler/nir/nir_instr_set.c   |  24 ++
 src/compiler/nir/nir_intrinsics.h  |  35 ++--
 src/compiler/nir/nir_opt_gcm.c |  72 ++---
 src/compiler/nir/nir_opt_peephole_select.c |  11 +++
 5 files changed, 213 insertions(+), 55 deletions(-)

-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nir: add new convergent, uniform-control, and cross-thread attributes

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

These are properties of the instruction that must be respected when
moving it around, in addition to the usual SSA dominance guarantee.
Previously, we only had special handling for fddx and fddy, in a very
ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote, we'll
have to start handling a lot more instructions with similar constraints,
so we want to add a more formal model of what optimizations can and
cannot do.

v2: don't add attribute for ALU instructions
v3: special-case derivative ALU instructions
v4: rename convergent to uniform-control, and add LLVM-style convergent
attribute
---
 src/compiler/nir/nir.h | 126 +
 1 file changed, 126 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9313b7a..24934f0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -986,6 +986,39 @@ typedef enum {
 * intrinsic are due to the register reads/writes.
 */
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+
+   /**
+* Indicates whether this intrinsic is "convergent". An operation is
+* convergent if results from one thread depend on results from another
+* thread, but in such a way that additional threads being enabled doesn't
+* affect the result of the operation. Examples of convergent operations
+* include screen-space derivatives, readInvocation() from
+* ARB_shader_ballot, etc. Note that this is a more precise version of
+* LLVM's "convergent" attribute, which simply stipulates that control
+* dependencies cannot be added, since the set of active threads can only be
+* reduced by adding control dependencies.
+*/
+   NIR_INTRINSIC_CONVERGENT = (1 << 2),
+
+   /**
+* Indicates whether this intrinsic is "cross-thread". An operation is
+* cross-thread if results in one thread depend on the set of active threads
+* when it is executed, as well as possibly the input value of the other
+* threads, and therefore optimizations cannot change the execution mask
+* when the operation is called. Examples of cross-thread operations include
+* the "any" reduction which returns "true" in all threads if any thread
+* inputs "true", ballotARB() from ARB_shader_ballot, etc. Note that any
+* cross-thread operation must be convergent.
+*/
+   NIR_INTRINSIC_CROSS_THREAD = (1 << 3),
+
+   /**
+* Indicates that this intrinsic is guaranteed to always be called in
+* uniform control flow, that is, control flow with the same execution mask
+* as when the program started. If an operation is uniform-control, it must
+* be convergent as well, since the optimizer must maintain the guarantee.
+*/
+   NIR_INTRINSIC_UNIFORM_CONTROL = (1 << 4),
 } nir_intrinsic_semantic_flag;
 
 /**
@@ -1460,6 +1493,99 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
 type, nir_instr_type_parallel_copy)
 
 /*
+ * Helpers to determine if an instruction is cross-thread, convergent, or
+ * uniform-control. See NIR_INTRINSIC_{CONVERGENT|CROSS_THREAD|UNIFORM_CONTROL}
+ * for the definitions.
+ */
+static inline bool
+nir_instr_is_uniform_control(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+  switch (nir_instr_as_alu(instr)->op) {
+  case nir_op_fddx:
+  case nir_op_fddy:
+  case nir_op_fddx_fine:
+  case nir_op_fddy_fine:
+  case nir_op_fddx_coarse:
+  case nir_op_fddy_coarse:
+ /* Section 8.13.1 (Derivative Functions) of the GLSL 4.50 spec says:
+  *
+  *"Derivatives are undefined within non-uniform control flow."
+  *
+  * Thus, we can assume they are called in uniform control flow. 
+  */
+ return true;
+
+  default:
+ return false;
+  }
+
+   case nir_instr_type_intrinsic: {
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+  return nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_UNIFORM_CONTROL;
+   }
+
+   case nir_instr_type_tex:
+ switch (nir_instr_as_tex(instr)->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+/* These three take implicit derivatives, so they are
+ * uniform-control as well.
+ */
+return true;
+
+ default:
+return false;
+ }
+
+   default:
+  return false;
+   }
+}
+
+static inline bool
+nir_instr_is_cross_thread(const nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+  return nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_CROSS_THREAD;
+   }
+
+   default:
+  return false;
+   }
+}
+
+static inline bool
+nir_instr_is_convergent(const nir_instr *instr)
+{
+   /* Instructions marked as uniform-control must be convergent, since

Re: [Mesa-dev] [PATCH 00/14] AMD GCN tile swizzle

2017-07-31 Thread Dave Airlie
On 1 August 2017 at 09:40, Marek Olšák  wrote:
> Hi,
>
> This might slightly increase MRT performance. You need the amdgpu
> kernel driver if you want it for SI and CI.
>
> I've fixed a few of issues with the original code, enabled it for
> mipmapped textures, and added GFX9 support. I wasn't able to make it
> work with mipmapping on SI. There is also a hack to make mipmapping
> work with DCC on VI - my solution is to overallocate DCC to make
> corruption go away with tile swizzling.
>
> I've tested SI, CI, VI.
>
> I haven't tested GFX9. The GFX9 patch is just the first version for
> testing. With current GB_ADDR_CONFIG, only Vega10 can do tile
> swizzling. The code doesn't do anything for Raven due to its (possibly
> incorrect) GB_ADDR_CONFIG.
>
I think this will regress radv, as I don't see it setting the
SHAREABLE flag anywhere
in the radv code, if I missed that.

If you have a branch I'm happy to check it, or just run the Vulkan
deferred demo from
https://github.com/SaschaWillems/Vulkan/

Otherwise it all looks reasonable, I should probably add the fmask
counter to radv once it lands.

So for the rest of them
Reviewed-by: Dave Airlie 

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] ac/nir: fix lsb emission

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

This makes it match radeonsi. The LLVM backend itself will emit the
correct instruction, but LLVM might do incorrect optimizations since it
thinks the output is undefined when the input is 0, even though it's not
supposed to be. We really need a new intrinsic, or for the backend to
become smarter and recognize this pattern.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/amd/common/ac_nir_to_llvm.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c08f102..75f3f78 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1187,7 +1187,17 @@ static LLVMValueRef emit_find_lsb(struct ac_llvm_context 
*ctx,
 */
LLVMConstInt(ctx->i1, 1, false),
};
-   return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, 
AC_FUNC_ATTR_READNONE);
+
+   LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+ params, 2,
+ AC_FUNC_ATTR_READNONE);
+
+   /* TODO: We need an intrinsic to skip this conditional. */
+   /* Check for zero: */
+   return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+  LLVMIntEQ, src0,
+  ctx->i32_0, ""),
+  LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }
 
 static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/14] ac/surface: remove RADEON_SURF_HAS_TILE_MODE_INDEX

2017-07-31 Thread Dave Airlie
On 1 August 2017 at 09:40, Marek Olšák  wrote:
> From: Marek Olšák 
>
> it's useless

left over from radeon winsys I assume.

Reviewed-by: Dave Airlie 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] ac/nir: fix nir_op_unpack_64_2x32_split_y emission

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

This was broken thanks to a typo in b2367cf.

Cc: Nicolai Hähnle 
---
 src/amd/common/ac_nir_to_llvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 75f3f78..f756b9a 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1895,7 +1895,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
LLVMVectorType(ctx->ac.i32, 
2),
"");
result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
-ctx->ac.i32_0, "");
+ctx->ac.i32_1, "");
break;
}
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/14] ac/surface: move tile_swizzle to ac_surface and document it

2017-07-31 Thread Dave Airlie
On 1 August 2017 at 09:40, Marek Olšák  wrote:
> From: Marek Olšák 

Thanks for documenting it!

Reviewed-by: Dave Airlie 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] nir: fix algebraic optimizations

2017-07-31 Thread Connor Abbott
From: Connor Abbott 

The optimizations are only valid for 32-bit integers. They were
mistakenly firing for 64-bit integers as well.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/nir/nir_opt_algebraic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index df58542..ad75228 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -250,8 +250,8 @@ optimizations = [
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
(('ushr', a, 0), a),
-   (('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
-   (('iand', 0x, ('ushr', a, 16)), ('ushr', a, 16)),
+   (('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
+   (('iand', 0x, ('ushr@32', a, 16)), ('ushr', a, 16)),
# Exponential/logarithmic identities
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] radeonsi: fix various CLEAR_STATE issues

2017-07-31 Thread Michel Dänzer
On 01/08/17 07:43 AM, Marek Olšák wrote:
> From: Marek Olšák 

Fixes: 064550238ef0 ("radeonsi: use CLEAR_STATE to initialize some
  registers")
Bugzilla: https://bugs.freedesktop.org/101969
Tested-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] radeonsi: don't print AMD twice in the renderer string with the marketing name

2017-07-31 Thread Michel Dänzer
On 01/08/17 07:43 AM, Marek Olšák wrote:
> From: Marek Olšák 
> 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index c58048f..e9402f8 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -1338,21 +1338,22 @@ bool r600_common_screen_init(struct 
> r600_common_screen *rscreen,
>struct radeon_winsys *ws, unsigned flags)
>  {
>   char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = 
> {};
>   struct utsname uname_data;
>   const char *chip_name;
>  
>   ws->query_info(ws, >info);
>   rscreen->ws = ws;
>  
>   if ((chip_name = r600_get_marketing_name(ws)))
> - snprintf(family_name, sizeof(family_name), "%s / ", 
> r600_get_family_name(rscreen));
> + snprintf(family_name, sizeof(family_name), "%s / ",
> +  r600_get_family_name(rscreen) + 4);

Is the idea here that the string returned by r600_get_marketing_name
already contains AMD? If so, I'm afraid it's not that simple, some
entries in amdgpu.ids start directly with "Radeon" and don't contain any
vendor name. (FWIW, some contain ATI instead of AMD as well)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix bad cast conversions in viewport()

2017-07-31 Thread Matt Turner
On Mon, Jul 31, 2017 at 12:43 PM, Samuel Pitoiset
 wrote:
> Fixes: ddc32537d6 ("mesa: clamp viewport values only once when using 
> glViewport()")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101981
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101989
> Signed-off-by: Samuel Pitoiset 

I don't understand how this regression happened. I see in the results
of your Jenkins build #57 that
ES3-CTS.functional.state_query.integers.viewport_getfloat fails on all
platforms. Do we need to improve something in the CI?

> ---
>  src/mesa/main/viewport.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
> index 3dce320d1d..fc384909e6 100644
> --- a/src/mesa/main/viewport.c
> +++ b/src/mesa/main/viewport.c
> @@ -94,9 +94,10 @@ static void
>  viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width,
>   GLsizei height)
>  {
> +   struct gl_viewport_inputs input = { x, y, width, height };
> +
> /* Clamp the viewport to the implementation dependent values. */
> -   clamp_viewport(ctx, (GLfloat *), (GLfloat *),
> -  (GLfloat *), (GLfloat *));

As an aside, this is violating C's aliasing rules. This pattern isn't
allowed in Mesa since commit 88ad8c7dedb87d92a5bed0868f108076185ec089
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/14] ac/surface: enable tile swizzle for mipmapped textures

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

The tile swizzle computation was done after the whole miptree was computed,
but that was too late, because at that point AddrSurfInfoOut contained
information about the smallest miplevel, which is never 2D-tiled.

The correct way is to do the computation before the second level is computed.
---
 src/amd/common/ac_surface.c | 80 ++---
 1 file changed, 46 insertions(+), 34 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 1eff4e5..87a8993 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -400,42 +400,74 @@ static unsigned cik_get_macro_tile_index(struct 
radeon_surf *surf)
tileb = MIN2(surf->u.legacy.tile_split, tileb);
 
for (index = 0; tileb > 64; index++)
tileb >>= 1;
 
assert(index < 16);
return index;
 }
 
 /**
+ * This must be called after the first level is computed.
+ *
  * Copy surface-global settings like pipe/bank config from level 0 surface
- * computation.
+ * computation, and compute tile swizzle.
  */
-static void gfx6_surface_settings(const struct radeon_info* info,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
- struct radeon_surf *surf)
+static int gfx6_surface_settings(ADDR_HANDLE addrlib,
+const struct radeon_info *info,
+const struct ac_surf_config *config,
+ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
+struct radeon_surf *surf)
 {
surf->surf_alignment = csio->baseAlign;
surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
gfx6_set_micro_tile_mode(surf, info);
 
/* For 2D modes only. */
if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
surf->u.legacy.num_banks = csio->pTileInfo->banks;
surf->u.legacy.macro_tile_index = csio->macroModeIndex;
} else {
surf->u.legacy.macro_tile_index = 0;
}
+
+   /* Compute tile swizzle. */
+   if (config->info.surf_index &&
+   surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+   !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) 
&&
+   (config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) 
{
+   ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+   AddrBaseSwizzleIn.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+   AddrBaseSwizzleOut.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
+   AddrBaseSwizzleIn.surfIndex = 
p_atomic_inc_return(config->info.surf_index) - 1;
+   AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
+   AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
+   AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
+   AddrBaseSwizzleIn.tileMode = csio->tileMode;
+
+   int r = AddrComputeBaseSwizzle(addrlib, ,
+  );
+   if (r != ADDR_OK)
+   return r;
+
+   assert(AddrBaseSwizzleOut.tileSwizzle <=
+  u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+   surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+   }
+   return 0;
 }
 
 /**
  * Fill in the tiling information in \p surf based on the given surface config.
  *
  * The following fields of \p surf must be initialized by the caller:
  * blk_w, blk_h, bpe, flags.
  */
 static int gfx6_compute_surface(ADDR_HANDLE addrlib,
const struct radeon_info *info,
@@ -637,21 +669,24 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
for (level = 0; level < config->info.levels; level++) {
r = gfx6_compute_level(addrlib, config, surf, false, 
level, compressed,
   , 
,
   , , 
, );
if (r)
return r;
 
if (level > 0)
continue;
 
-   gfx6_surface_settings(info, , surf);
+   r = gfx6_surface_settings(addrlib, info, config,
+ , surf);
+   if (r)
+   return r;
}
}
 
/* Calculate texture layout information for 

[Mesa-dev] [PATCH 13/14] radeonsi: program tile swizzle for color and FMASK surfaces for GFX & SDMA

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_texture.c |  4 +++-
 src/gallium/drivers/radeonsi/cik_sdma.c   |  7 +++
 src/gallium/drivers/radeonsi/si_descriptors.c | 13 -
 src/gallium/drivers/radeonsi/si_state.c   | 12 ++--
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b391cbb..fb52dfb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -195,20 +195,21 @@ struct r600_transfer {
 };
 
 struct r600_fmask_info {
uint64_t offset;
uint64_t size;
unsigned alignment;
unsigned pitch_in_pixels;
unsigned bank_height;
unsigned slice_tile_max;
unsigned tile_mode_index;
+   unsigned tile_swizzle;
 };
 
 struct r600_cmask_info {
uint64_t offset;
uint64_t size;
unsigned alignment;
unsigned slice_tile_max;
uint64_t base_address_reg;
 };
 
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 07df2d4..e3d462e 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -750,20 +750,21 @@ void r600_texture_get_fmask_info(struct 
r600_common_screen *rscreen,
 
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
 
out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * 
fmask.u.legacy.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
 
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
+   out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
 }
 
 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
 {
r600_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, 
>fmask);
 
@@ -1440,21 +1441,21 @@ struct pipe_resource *r600_texture_create(struct 
pipe_screen *screen,
 
 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen 
*screen,
  const struct 
pipe_resource *templ,
  struct winsys_handle 
*whandle,
   unsigned usage)
 {
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pb_buffer *buf = NULL;
unsigned stride = 0, offset = 0;
unsigned array_mode;
-   struct radeon_surf surface;
+   struct radeon_surf surface = {};
int r;
struct radeon_bo_metadata metadata = {};
struct r600_texture *rtex;
bool is_scanout;
 
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != 
PIPE_TEXTURE_RECT) ||
  templ->depth0 != 1 || templ->last_level != 0)
return NULL;
 
@@ -1504,20 +1505,21 @@ static struct pipe_resource 
*r600_texture_from_handle(struct pipe_screen *screen
rtex->resource.external_usage = usage;
 
if (rscreen->apply_opaque_metadata)
rscreen->apply_opaque_metadata(rscreen, rtex, );
 
/* Validate that addrlib arrived at the same surface parameters. */
if (rscreen->chip_class >= GFX9) {
assert(metadata.u.gfx9.swizzle_mode == 
surface.u.gfx9.surf.swizzle_mode);
}
 
+   assert(rtex->surface.tile_swizzle == 0);
return >resource.b.b;
 }
 
 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
 struct pipe_resource *texture,
 struct r600_texture **staging)
 {
struct r600_texture *rtex = (struct r600_texture*)texture;
struct pipe_resource resource;
struct r600_texture **flushed_depth_texture = staging ?
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index 99285a6..8154d72 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -155,20 +155,24 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
uint64_t src_address = rsrc->resource.gpu_address +
   rsrc->surface.u.legacy.level[src_level].offset;
unsigned dst_mode = rdst->surface.u.legacy.level[dst_level].mode;
unsigned src_mode = rsrc->surface.u.legacy.level[src_level].mode;
unsigned dst_tile_index = 
rdst->surface.u.legacy.tiling_index[dst_level];
unsigned 

[Mesa-dev] [PATCH 10/14] winsys/amdgpu: enable computation of tile swizzle

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h  |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index d438b6d..99e4d77 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -85,19 +85,29 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
struct ac_surf_config config;
 
config.info.width = tex->width0;
config.info.height = tex->height0;
config.info.depth = tex->depth0;
config.info.array_size = tex->array_size;
config.info.samples = tex->nr_samples;
config.info.levels = tex->last_level + 1;
config.is_3d = !!(tex->target == PIPE_TEXTURE_3D);
config.is_cube = !!(tex->target == PIPE_TEXTURE_CUBE);
-   config.info.surf_index = NULL;
+
+   /* Use different surface counters for color and FMASK, so that MSAA MRTs
+* always use consecutive surface indices when FMASK is allocated between
+* them.
+*/
+   if (flags & RADEON_SURF_FMASK)
+  config.info.surf_index = >surf_index_fmask;
+   else if (!(flags & RADEON_SURF_Z_OR_SBUFFER))
+  config.info.surf_index = >surf_index_color;
+   else
+  config.info.surf_index = NULL;
 
return ac_compute_surface(ws->addrlib, >info, , mode, surf);
 }
 
 void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
 {
ws->base.surface_init = amdgpu_surface_init;
 }
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 7cd2f20..7aca612 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -50,20 +50,22 @@ struct amdgpu_winsys {
struct pipe_reference reference;
struct pb_cache bo_cache;
struct pb_slabs bo_slabs;
 
amdgpu_device_handle dev;
 
mtx_t bo_fence_lock;
 
int num_cs; /* The number of command streams created. */
unsigned num_total_rejected_cs;
+   uint32_t surf_index_color;
+   uint32_t surf_index_fmask;
uint32_t next_bo_unique_id;
uint64_t allocated_vram;
uint64_t allocated_gtt;
uint64_t mapped_vram;
uint64_t mapped_gtt;
uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
uint64_t num_gfx_IBs;
uint64_t num_sdma_IBs;
uint64_t num_mapped_buffers;
uint64_t gfx_bo_list_counter;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/14] ac/surface: increment surf_index only when tile swizzle is allowed

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_surface.c| 6 --
 src/amd/common/ac_surface.h| 2 +-
 src/amd/vulkan/radv_image.c| 2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 61b4e41..68700f4 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -23,20 +23,21 @@
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  */
 
 #include "ac_surface.h"
 #include "amd_family.h"
 #include "amdgpu_id.h"
 #include "ac_gpu_info.h"
 #include "util/macros.h"
+#include "util/u_atomic.h"
 #include "util/u_math.h"
 
 #include 
 #include 
 #include 
 #include 
 #include 
 
 #include "addrlib/addrinterface.h"
 
@@ -699,27 +700,28 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 
/* Make sure HTILE covers the whole miptree, because the shader reads
 * TC-compatible HTILE even for levels where it's disabled by DB.
 */
if (surf->htile_size && config->info.levels > 1)
surf->htile_size *= 2;
 
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Work out tile swizzle. */
-   if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+   if (config->info.surf_index &&
+   surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) 
&&
(config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) 
{
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 
-   AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
+   AddrBaseSwizzleIn.surfIndex = 
p_atomic_inc_return(config->info.surf_index) - 1;
AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
AddrBaseSwizzleIn.macroModeIndex = 
AddrSurfInfoOut.macroModeIndex;
AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
AddrComputeBaseSwizzle(addrlib, , 
);
 
assert(AddrBaseSwizzleOut.tileSwizzle <=
   u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
}
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 01a71f3..b2620f9 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -202,24 +202,24 @@ struct radeon_surf {
 
 /* GFX9+ return values. */
 struct gfx9_surf_layout gfx9;
 } u;
 };
 
 struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
-   uint32_t surf_index;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
+   uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
 };
 
 struct ac_surf_config {
struct ac_surf_info info;
unsigned is_3d : 1;
unsigned is_cube : 1;
 };
 
 ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
   const struct amdgpu_gpu_info *amdinfo,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 499287d..8456d3a 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -802,21 +802,21 @@ radv_image_create(VkDevice _device,
for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; 
++i)
if (pCreateInfo->pQueueFamilyIndices[i] == 
VK_QUEUE_FAMILY_EXTERNAL_KHR)
image->queue_family_mask |= (1u << 
RADV_MAX_QUEUE_FAMILIES) - 1u;
else
image->queue_family_mask |= 1u << 
pCreateInfo->pQueueFamilyIndices[i];
}
 
image->shareable = vk_find_struct_const(pCreateInfo->pNext,

EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout 
&& !image->shareable) {
-   image->info.surf_index = 
p_atomic_inc_return(>image_mrt_offset_counter) - 1;
+   image->info.surf_index = >image_mrt_offset_counter;
}
 
radv_init_surface(device, >surface, create_info);
 
device->ws->surface_init(device->ws, >info, >surface);
 
image->size = image->surface.surf_size;
image->alignment = image->surface.surf_alignment;
 
if (image->exclusive || image->queue_family_mask == 1)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 

[Mesa-dev] [PATCH 11/14] gallium/radeon: reallocate textures with non-zero tile_swizzle on export

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2d58dc9..07df2d4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -561,27 +561,29 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
 
if (resource->target != PIPE_BUFFER) {
/* This is not supported now, but it might be required for 
OpenCL
 * interop in the future.
 */
if (resource->nr_samples > 1 || rtex->is_depth)
return false;
 
/* Move a suballocated texture into a non-suballocated 
allocation. */
-   if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+   if (rscreen->ws->buffer_is_suballocated(res->buf) ||
+   rtex->surface.tile_swizzle) {
assert(!res->b.is_shared);
r600_reallocate_texture_inplace(rctx, rtex,
PIPE_BIND_SHARED, 
false);
rctx->b.flush(>b, NULL, 0);
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+   assert(rtex->surface.tile_swizzle == 0);
}
 
/* Since shader image stores don't support DCC on VI,
 * disable it for external clients that want write
 * access.
 */
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
if (r600_texture_disable_dcc(rctx, rtex))
update_metadata = true;
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/14] radeonsi: if FMASK is disabled, set CB_COLORi_FMASK = CB_COLORi_BASE properly

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index c151a98..6e67824 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2755,21 +2755,21 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
}
 
if (tex->dcc_separate_buffer)
radeon_add_to_buffer_list(>b, >b.gfx,
  tex->dcc_separate_buffer,
  RADEON_USAGE_READWRITE,
  RADEON_PRIO_DCC);
 
/* Compute mutable surface parameters. */
cb_color_base = tex->resource.gpu_address >> 8;
-   cb_color_fmask = cb_color_base;
+   cb_color_fmask = 0;
cb_dcc_base = 0;
cb_color_info = cb->cb_color_info | tex->cb_color_info;
cb_color_attrib = cb->cb_color_attrib;
 
if (tex->fmask.size)
cb_color_fmask = (tex->resource.gpu_address + 
tex->fmask.offset) >> 8;
 
/* Set up DCC. */
if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
bool is_msaa_resolve_dst = state->cbufs[0] &&
@@ -2787,20 +2787,22 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
if (sctx->b.chip_class >= GFX9) {
struct gfx9_surf_meta_flags meta;
 
if (tex->dcc_offset)
meta = tex->surface.u.gfx9.dcc;
else
meta = tex->surface.u.gfx9.cmask;
 
/* Set mutable surface parameters. */
cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
+   if (!tex->fmask.size)
+   cb_color_fmask = cb_color_base;
cb_color_attrib |= 
S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
   
S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
   S_028C74_RB_ALIGNED(meta.rb_aligned) 
|
   
S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
 
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE 
+ i * 0x3C, 15);
radeon_emit(cs, cb_color_base); /* 
CB_COLOR0_BASE */
radeon_emit(cs, cb_color_base >> 32);   /* 
CB_COLOR0_BASE_EXT */
radeon_emit(cs, cb->cb_color_attrib2);  /* 
CB_COLOR0_ATTRIB2 */
radeon_emit(cs, cb->cb_color_view); /* 
CB_COLOR0_VIEW */
@@ -2819,20 +2821,22 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i 
* 4,
   
S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch));
} else {
/* Compute mutable surface parameters (SI-CI-VI). */
const struct legacy_surf_level *level_info =

>surface.u.legacy.level[cb->base.u.tex.level];
unsigned pitch_tile_max, slice_tile_max, 
tile_mode_index;
unsigned cb_color_pitch, cb_color_slice, 
cb_color_fmask_slice;
 
cb_color_base += level_info->offset >> 8;
+   if (!tex->fmask.size)
+   cb_color_fmask = cb_color_base;
if (cb_dcc_base)
cb_dcc_base += level_info->dcc_offset >> 8;
 
pitch_tile_max = level_info->nblk_x / 8 - 1;
slice_tile_max = level_info->nblk_x *
 level_info->nblk_y / 64 - 1;
tile_mode_index = si_tile_mode_index(tex, 
cb->base.u.tex.level, false);
 
cb_color_attrib |= 
S_028C74_TILE_MODE_INDEX(tile_mode_index);
cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/14] TEST PIGLIT: ac/surface: compute tile swizzle for GFX9

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_surface.c| 87 +-
 src/amd/common/ac_surface.h|  3 +
 src/gallium/drivers/radeon/r600_texture.c  |  1 +
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c |  2 +
 4 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 823a65d..f13c270 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -792,20 +792,21 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
 
ret = Addr2GetPreferredSurfaceSetting(addrlib, , );
if (ret != ADDR_OK)
return ret;
 
*swizzle_mode = sout.swizzleMode;
return 0;
 }
 
 static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
+   const struct ac_surf_config *config,
struct radeon_surf *surf, bool compressed,
ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
 {
ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
ADDR_E_RETURNCODE ret;
 
out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
out.pMipInfo = mip_info;
 
@@ -866,20 +867,51 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
ret = Addr2ComputeHtileInfo(addrlib, , );
if (ret != ADDR_OK)
return ret;
 
surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned;
surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned;
surf->htile_size = hout.htileBytes;
surf->htile_slice_size = hout.sliceSize;
surf->htile_alignment = hout.baseAlign;
} else {
+   /* Compute tile swizzle for the color surface. */
+   if (config->info.surf_index &&
+   in->swizzleMode >= ADDR_SW_4KB_Z_X &&
+   !out.mipChainInTail &&
+   !(surf->flags & RADEON_SURF_SHAREABLE) &&
+   (in->numSamples > 1 || !(surf->flags & 
RADEON_SURF_SCANOUT))) {
+   ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
+   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
+
+   xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
+   xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
+
+   xin.surfIndex = 
p_atomic_inc_return(config->info.surf_index) - 1;
+   xin.flags = in->flags;
+   xin.swizzleMode = in->swizzleMode;
+   xin.resourceType = in->resourceType;
+   xin.format = in->format;
+   xin.numSamples = in->numSamples;
+   xin.numFrags = in->numFrags;
+
+   ret = Addr2ComputePipeBankXor(addrlib, , );
+   if (ret != ADDR_OK)
+   return ret;
+
+   assert(xout.pipeBankXor <=
+  u_bit_consecutive(0, sizeof(surf->tile_swizzle) 
* 8));
+   surf->tile_swizzle = xout.pipeBankXor;
+
+   printf("tile_swizzle = 0x%x, index = %u\n", 
xout.pipeBankXor, xin.surfIndex);
+   }
+
/* DCC */
if (!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
!(surf->flags & RADEON_SURF_SCANOUT) &&
!compressed &&
in->swizzleMode != ADDR_SW_LINEAR &&
/* TODO: We could support DCC with MSAA. */
in->numSamples == 1) {
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
 
@@ -929,20 +961,50 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
fin.numFrags = in->numFrags;
 
ret = Addr2ComputeFmaskInfo(addrlib, , );
if (ret != ADDR_OK)
return ret;
 
surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
surf->u.gfx9.fmask.epitch = fout.pitch - 1;
surf->u.gfx9.fmask_size = fout.fmaskBytes;
surf->u.gfx9.fmask_alignment = fout.baseAlign;
+
+   /* Compute tile swizzle for the FMASK surface. */
+   if (config->info.fmask_surf_index &&
+   fin.swizzleMode >= ADDR_SW_4KB_Z_X &&
+   !(surf->flags & RADEON_SURF_SHAREABLE)) {
+   ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
+   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
+
+   xin.size = 
sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
+   xout.size = 

[Mesa-dev] [PATCH 08/14] ac/surface: limit tile swizzle to non-mipmaps on SI

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

Mipmapping with tile swizzle doesn't work.
---
 src/amd/common/ac_surface.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 87a8993..3716d3d 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -428,21 +428,23 @@ static int gfx6_surface_settings(ADDR_HANDLE addrlib,
surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
surf->u.legacy.num_banks = csio->pTileInfo->banks;
surf->u.legacy.macro_tile_index = csio->macroModeIndex;
} else {
surf->u.legacy.macro_tile_index = 0;
}
 
/* Compute tile swizzle. */
-   if (config->info.surf_index &&
+   /* TODO: fix tile swizzle with mipmapping for SI */
+   if ((info->chip_class >= CIK || config->info.levels == 1) &&
+   config->info.surf_index &&
surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) 
&&
(config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) 
{
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 
AddrBaseSwizzleIn.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
AddrBaseSwizzleOut.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
 
AddrBaseSwizzleIn.surfIndex = 
p_atomic_inc_return(config->info.surf_index) - 1;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/14] ac/surface: compute tile swizzle only when it's allowed

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_surface.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 4647ce4..61b4e41 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -698,22 +698,24 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
}
 
/* Make sure HTILE covers the whole miptree, because the shader reads
 * TC-compatible HTILE even for levels where it's disabled by DB.
 */
if (surf->htile_size && config->info.levels > 1)
surf->htile_size *= 2;
 
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
 
-   /* workout base swizzle */
-   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+   /* Work out tile swizzle. */
+   if (surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+   !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) 
&&
+   (config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) 
{
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 
AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
AddrBaseSwizzleIn.macroModeIndex = 
AddrSurfInfoOut.macroModeIndex;
AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
AddrComputeBaseSwizzle(addrlib, , 
);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/14] ac/surface: add RADEON_SURF_SHAREABLE

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

Shareable textures won't use tile swizzle.
---
 src/amd/common/ac_surface.h   | 1 +
 src/gallium/drivers/radeon/r600_texture.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 3c9e13e..01a71f3 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -60,20 +60,21 @@ enum radeon_micro_mode {
 #define RADEON_SURF_SCANOUT (1 << 16)
 #define RADEON_SURF_ZBUFFER (1 << 17)
 #define RADEON_SURF_SBUFFER (1 << 18)
 #define RADEON_SURF_Z_OR_SBUFFER(RADEON_SURF_ZBUFFER | 
RADEON_SURF_SBUFFER)
 /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
 #define RADEON_SURF_FMASK   (1 << 21)
 #define RADEON_SURF_DISABLE_DCC (1 << 22)
 #define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
 #define RADEON_SURF_IMPORTED(1 << 24)
 #define RADEON_SURF_OPTIMIZE_FOR_SPACE  (1 << 25)
+#define RADEON_SURF_SHAREABLE   (1 << 26)
 
 struct legacy_surf_level {
 uint64_toffset;
 uint64_tslice_size;
 uint64_tdcc_offset;
 uint64_tdcc_fast_clear_size;
 uint16_tnblk_x;
 uint16_tnblk_y;
 enum radeon_surf_mode   mode;
 };
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 3aac3c7..2d58dc9 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -273,22 +273,24 @@ static int r600_init_surface(struct r600_common_screen 
*rscreen,
/* This should catch bugs in gallium users setting incorrect 
flags. */
assert(ptex->nr_samples <= 1 &&
   ptex->array_size == 1 &&
   ptex->depth0 == 1 &&
   ptex->last_level == 0 &&
   !(flags & RADEON_SURF_Z_OR_SBUFFER));
 
flags |= RADEON_SURF_SCANOUT;
}
 
+   if (ptex->bind & PIPE_BIND_SHARED)
+   flags |= RADEON_SURF_SHAREABLE;
if (is_imported)
-   flags |= RADEON_SURF_IMPORTED;
+   flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
  array_mode, surface);
if (r) {
return r;
}
 
if (rscreen->chip_class >= GFX9) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/14] ac/surface: remove RADEON_SURF_HAS_TILE_MODE_INDEX

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

it's useless
---
 src/amd/common/ac_surface.h| 1 -
 src/amd/vulkan/radv_image.c| 1 -
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c | 3 ---
 3 files changed, 5 deletions(-)

diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index ee96003..3c9e13e 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -55,21 +55,20 @@ enum radeon_micro_mode {
 RADEON_MICRO_MODE_DEPTH = 2,
 RADEON_MICRO_MODE_ROTATED = 3,
 };
 
 /* the first 16 bits are reserved for libdrm_radeon, don't use them */
 #define RADEON_SURF_SCANOUT (1 << 16)
 #define RADEON_SURF_ZBUFFER (1 << 17)
 #define RADEON_SURF_SBUFFER (1 << 18)
 #define RADEON_SURF_Z_OR_SBUFFER(RADEON_SURF_ZBUFFER | 
RADEON_SURF_SBUFFER)
 /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
 #define RADEON_SURF_FMASK   (1 << 21)
 #define RADEON_SURF_DISABLE_DCC (1 << 22)
 #define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
 #define RADEON_SURF_IMPORTED(1 << 24)
 #define RADEON_SURF_OPTIMIZE_FOR_SPACE  (1 << 25)
 
 struct legacy_surf_level {
 uint64_toffset;
 uint64_tslice_size;
 uint64_tdcc_offset;
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 4b47e17..499287d 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -101,21 +101,20 @@ radv_init_surface(struct radv_device *device,
unreachable("unhandled image type");
}
 
if (is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
}
 
if (is_stencil)
surface->flags |= RADEON_SURF_SBUFFER;
 
-   surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
   VK_IMAGE_USAGE_STORAGE_BIT)) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
 device->physical_device->rad_info.chip_class < VI ||
 create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) 
||
 !radv_is_colorbuffer_format_supported(pCreateInfo->format, 
))
surface->flags |= RADEON_SURF_DISABLE_DCC;
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
index eaa978e..e3ccb81 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -35,23 +35,20 @@
 #include "radv_amdgpu_surface.h"
 #include "sid.h"
 
 #include "ac_surface.h"
 
 static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
  const struct radeon_surf *surf)
 {
unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
 
-   if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
-   return -EINVAL;
-
if (!surf->blk_w || !surf->blk_h)
return -EINVAL;
 
switch (type) {
case RADEON_SURF_TYPE_1D:
if (surf_info->height > 1)
return -EINVAL;
/* fall through */
case RADEON_SURF_TYPE_2D:
case RADEON_SURF_TYPE_CUBEMAP:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/14] ac/surface: set structure size and handle errors for AddrComputeBaseSwizzle

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_surface.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 68700f4..1eff4e5 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -707,26 +707,33 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
 
/* Work out tile swizzle. */
if (config->info.surf_index &&
surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) 
&&
(config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) 
{
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 
+   AddrBaseSwizzleIn.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+   AddrBaseSwizzleOut.size = 
sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
AddrBaseSwizzleIn.surfIndex = 
p_atomic_inc_return(config->info.surf_index) - 1;
AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
AddrBaseSwizzleIn.macroModeIndex = 
AddrSurfInfoOut.macroModeIndex;
AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
-   AddrComputeBaseSwizzle(addrlib, , 
);
+
+   r = AddrComputeBaseSwizzle(addrlib, ,
+  );
+   if (r != ADDR_OK)
+   return r;
 
assert(AddrBaseSwizzleOut.tileSwizzle <=
   u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
}
return 0;
 }
 
 /* This is only called when expecting a tiled layout. */
 static int
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/14] AMD GCN tile swizzle

2017-07-31 Thread Marek Olšák
Hi,

This might slightly increase MRT performance. You need the amdgpu
kernel driver if you want it for SI and CI.

I've fixed a few of issues with the original code, enabled it for
mipmapped textures, and added GFX9 support. I wasn't able to make it
work with mipmapping on SI. There is also a hack to make mipmapping
work with DCC on VI - my solution is to overallocate DCC to make
corruption go away with tile swizzling.

I've tested SI, CI, VI.

I haven't tested GFX9. The GFX9 patch is just the first version for
testing. With current GB_ADDR_CONFIG, only Vega10 can do tile
swizzling. The code doesn't do anything for Raven due to its (possibly
incorrect) GB_ADDR_CONFIG.

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/14] ac/surface: align DCC size for surfaces that use tile swizzle

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

Note that dcc_alignment = pipe_interleave_bytes * num_pipes * num_banks,
which is greater than the previous open-coded alignment.
---
 src/amd/common/ac_surface.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 3716d3d..823a65d 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -727,23 +727,30 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
}
}
}
}
 
/* Recalculate the whole DCC miptree size including disabled levels.
 * This is what addrlib does, but calling addrlib would be a lot more
 * complicated.
 */
if (surf->dcc_size && config->info.levels > 1) {
+   /* The smallest miplevels that are never compressed by DCC
+* still read the DCC buffer via TC if the base level uses DCC,
+* and for some reason the DCC buffer needs to be larger if
+* the miptree uses non-zero tile_swizzle. Otherwise there are
+* VM faults.
+*
+* "dcc_alignment * 4" was determined by trial and error.
+*/
surf->dcc_size = align64(surf->surf_size >> 8,
-info->pipe_interleave_bytes *
-info->num_tile_pipes);
+surf->dcc_alignment * 4);
}
 
/* Make sure HTILE covers the whole miptree, because the shader reads
 * TC-compatible HTILE even for levels where it's disabled by DB.
 */
if (surf->htile_size && config->info.levels > 1)
surf->htile_size *= 2;
 
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
return 0;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/14] ac/surface: move tile_swizzle to ac_surface and document it

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

Gfx9 will use it too.
---
 src/amd/common/ac_surface.c  |  5 -
 src/amd/common/ac_surface.h  | 16 +++-
 src/amd/vulkan/radv_device.c |  6 +++---
 src/amd/vulkan/radv_image.c  |  6 +++---
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 26f3729..4647ce4 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -709,21 +709,24 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
 
AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
AddrBaseSwizzleIn.macroModeIndex = 
AddrSurfInfoOut.macroModeIndex;
AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
AddrComputeBaseSwizzle(addrlib, , 
);
-   surf->u.legacy.tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+
+   assert(AddrBaseSwizzleOut.tileSwizzle <=
+  u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+   surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
}
return 0;
 }
 
 /* This is only called when expecting a tiled layout. */
 static int
 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
bool is_fmask, AddrSwizzleMode *swizzle_mode)
 {
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 3eaef63..ee96003 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -90,21 +90,20 @@ struct legacy_surf_layout {
 unsignedmacro_tile_index:4; /* max 15 */
 
 /* Whether the depth miptree or stencil miptree as used by the DB are
  * adjusted from their TC compatible form to ensure depth/stencil
  * compatibility. If either is true, the corresponding plane cannot be
  * sampled from.
  */
 unsigneddepth_adjusted:1;
 unsignedstencil_adjusted:1;
 
-uint8_t tile_swizzle;
 struct legacy_surf_levellevel[RADEON_SURF_MAX_LEVELS];
 struct legacy_surf_levelstencil_level[RADEON_SURF_MAX_LEVELS];
 uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
 uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
 };
 
 /* Same as addrlib - AddrResourceType. */
 enum gfx9_resource_type {
 RADEON_RESOURCE_1D = 0,
 RADEON_RESOURCE_2D,
@@ -161,20 +160,35 @@ struct radeon_surf {
 unsignednum_dcc_levels:4;
 unsignedis_linear:1;
 /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
 unsignedmicro_tile_mode:3;
 uint32_tflags;
 
 /* These are return values. Some of them can be set by the caller, but
  * they will be treated as hints (e.g. bankw, bankh) and might be
  * changed by the calculator.
  */
+
+/* Tile swizzle can be OR'd with low bits of the BASE_256B address.
+ * The value is the same for all mipmap levels. Supported tile modes:
+ * - GFX6: Only macro tiling.
+ * - GFX9: Only *_X swizzle modes. Level 0 must not be in the mip tail.
+ *
+ * Only these surfaces are allowed to set it:
+ * - color (if it doesn't have to be displayable)
+ * - DCC (same tile swizzle as color)
+ * - FMASK
+ * - CMASK if it's TC-compatible or if the gen is GFX9
+ * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
+ */
+uint8_t tile_swizzle;
+
 uint64_tsurf_size;
 uint64_tdcc_size;
 uint64_thtile_size;
 
 uint32_thtile_slice_size;
 
 uint32_tsurf_alignment;
 uint32_tdcc_alignment;
 uint32_thtile_alignment;
 
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 19f1e10..57081e4 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2998,48 +2998,48 @@ radv_initialise_color_surface(struct radv_device 
*device,
/* This must be set for fast clear to work without 
FMASK. */
if (device->physical_device->rad_info.chip_class >= CIK)
cb->cb_color_pitch |= 
S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= 
S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);

[Mesa-dev] [PATCH] Revert "st_glsl_to_tgsi: rewrite rename registers to use array fully."

2017-07-31 Thread Dave Airlie
From: Dave Airlie 

This reverts commit 3008161d28e38336ba39aba4769a2deaf9732f55,
which caused a regression for VMWare.

The initial code had some recursion in it, that I removed by accident
trying to add back the recursion broke lots of things, take the high
road and revert for now.

Fixes: 3008161d (st_glsl_to_tgsi: rewrite rename registers to use array fully.)
Reviewed-by: Brian Paul 
Tested-by: Brian Paul 
Signed-off-by: Dave Airlie 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 55 --
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3983fe7..d496fff 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -399,7 +399,7 @@ find_array_type(struct inout_decl *decls, unsigned count, 
unsigned array_id)
 }
 
 struct rename_reg_pair {
-   bool valid;
+   int old_reg;
int new_reg;
 };
 
@@ -568,7 +568,7 @@ public:
 
void simplify_cmp(void);
 
-   void rename_temp_registers(struct rename_reg_pair *renames);
+   void rename_temp_registers(int num_renames, struct rename_reg_pair 
*renames);
void get_first_temp_read(int *first_reads);
void get_first_temp_write(int *first_writes);
void get_last_temp_read_first_temp_write(int *last_reads, int 
*first_writes);
@@ -4835,37 +4835,36 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
 
 /* Replaces all references to a temporary register index with another index. */
 void
-glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
+glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct 
rename_reg_pair *renames)
 {
foreach_in_list(glsl_to_tgsi_instruction, inst, >instructions) {
   unsigned j;
+  int k;
   for (j = 0; j < num_inst_src_regs(inst); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY) {
-int old_idx = inst->src[j].index;
-if (renames[old_idx].valid)
-   inst->src[j].index = renames[old_idx].new_reg;
- }
+ if (inst->src[j].file == PROGRAM_TEMPORARY)
+for (k = 0; k < num_renames; k++)
+   if (inst->src[j].index == renames[k].old_reg)
+  inst->src[j].index = renames[k].new_reg;
   }
 
   for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
-int old_idx = inst->tex_offsets[j].index;
-if (renames[old_idx].valid)
-   inst->tex_offsets[j].index = renames[old_idx].new_reg;
- }
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
+for (k = 0; k < num_renames; k++)
+   if (inst->tex_offsets[j].index == renames[k].old_reg)
+  inst->tex_offsets[j].index = renames[k].new_reg;
   }
 
   if (inst->resource.file == PROGRAM_TEMPORARY) {
- int old_idx = inst->resource.index;
- if (renames[old_idx].valid)
-inst->resource.index = renames[old_idx].new_reg;
+ for (k = 0; k < num_renames; k++)
+if (inst->resource.index == renames[k].old_reg)
+   inst->resource.index = renames[k].new_reg;
   }
 
   for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY) {
-int old_idx = inst->dst[j].index;
-if (renames[old_idx].valid)
-   inst->dst[j].index = renames[old_idx].new_reg;}
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ for (k = 0; k < num_renames; k++)
+if (inst->dst[j].index == renames[k].old_reg)
+   inst->dst[j].index = renames[k].new_reg;
   }
}
 }
@@ -5446,6 +5445,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct 
rename_reg_pair, this->next_temp);
int i, j;
+   int num_renames = 0;
 
/* Read the indices of the last read and first write to each temp register
 * into an array so that we don't have to traverse the instruction list as
@@ -5472,8 +5472,9 @@ glsl_to_tgsi_visitor::merge_registers(void)
   * as the register at index j. */
  if (first_writes[i] <= first_writes[j] &&
  last_reads[i] <= first_writes[j]) {
-renames[j].new_reg = i;
-renames[j].valid = true;
+renames[num_renames].old_reg = j;
+renames[num_renames].new_reg = i;
+num_renames++;
 
 /* Update the first_writes and last_reads arrays with the new
  * values for the merged register index, and mark the newly unused
@@ -5486,7 +5487,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
   }
}
 
-   rename_temp_registers(renames);
+   

Re: [Mesa-dev] [PATCH] docs: Add Vulkan to features.txt

2017-07-31 Thread Jordan Justen
On 2017-07-31 16:08:51, Bas Nieuwenhuizen wrote:
> On Tue, Aug 1, 2017 at 12:32 AM, Jordan Justen
> > +Vulkan 1.0 -- all DONE: anv
> 
> So while we don't have conformance, we have at several times had local
> conformance suite runs pass all tests, so I think we can write up radv
> as all done here too?

Ok, I can add radv here if you recommend it.

> > +
> > +Khronos and EXT extensions that are not part of any Vulkan version:
> > +  VK_EXT_acquire_xlib_display   not started
> > +  VK_EXT_blend_operation_advanced   not started
> > +  VK_EXT_debug_marker   not started
> 
> Do we even want to implement this as driver, or let this be for the layers?

I just used grep to find the extensions. If the consensus is to drop
this, I can do that.

> > +  VK_KHX_multiview  DONE (anv)
> 
> I started this one for radv too, not sure if we put those in if one
> driver already has them though.

Based on GL, I don't think we have a way to indicate that it is
started for another driver after one has completed it. Do you have a
recommendation?

Side note: Looking at the header of the file, it looks like I should
use 'in progress' rather than 'started'.

> btw, no vendor extensions?

Looking at the 'Khronos, ARB, and OES extensions that are not part of
any OpenGL or OpenGL ES version' section, it doesn't seem to include
vendor extensions. I did see a few 'NV' extensions in other sections
of the file, but I think in those cases they were rolled into the core
versions.

Should we add another section for extensions that at least 1 driver
has implemented or started to implement?

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: Add Vulkan to features.txt

2017-07-31 Thread Bas Nieuwenhuizen
On Tue, Aug 1, 2017 at 12:32 AM, Jordan Justen
 wrote:
> To get the extension list:
>
> $ git grep -hE "extension name=\"VK_(EXT|KHR|KHX)" src/vulkan/registry/vk.xml 
> | \
>   grep -v disabled | awk '{print $2}' | sed -E 's/(name=)?"//g' | sort
>
> To find anv(il) and radv supported extensions:
>
> $ git grep -hE "'VK_([A-Z]+)_[a-z]" src/intel/
>
> $ git grep -hE "'VK_([A-Z]+)_[a-z]" src/amd/
>
> Signed-off-by: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Dave Airlie 
> ---
>  docs/features.txt | 58 
> +++
>  1 file changed, 58 insertions(+)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 1f628e1c030..5ec1591540a 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -333,6 +333,64 @@ we DO NOT WANT implementations of these extensions for 
> Mesa.
>GL_ARB_shadow_ambient Superseded by 
> GL_ARB_fragment_program
>GL_ARB_vertex_blend   Superseded by 
> GL_ARB_vertex_program
>
> +Vulkan 1.0 -- all DONE: anv

So while we don't have conformance, we have at several times had local
conformance suite runs pass all tests, so I think we can write up radv
as all done here too?
> +
> +Khronos and EXT extensions that are not part of any Vulkan version:
> +  VK_EXT_acquire_xlib_display   not started
> +  VK_EXT_blend_operation_advanced   not started
> +  VK_EXT_debug_marker   not started

Do we even want to implement this as driver, or let this be for the layers?

> +  VK_EXT_debug_report   not started
> +  VK_EXT_direct_mode_displaynot started
> +  VK_EXT_discard_rectangles not started
> +  VK_EXT_display_controlnot started
> +  VK_EXT_display_surface_counternot started
> +  VK_EXT_hdr_metadata   not started
> +  VK_EXT_sampler_filter_minmax  not started
> +  VK_EXT_shader_subgroup_ballot not started
> +  VK_EXT_shader_subgroup_vote   not started
> +  VK_EXT_swapchain_colorspace   not started
> +  VK_EXT_validation_flags   not started
> +  VK_KHR_16bit_storage  started (Alejandro)
> +  VK_KHR_android_surfacenot started
> +  VK_KHR_dedicated_allocation   DONE (anv, radv)
> +  VK_KHR_descriptor_update_template DONE (anv, radv)
> +  VK_KHR_displaynot started
> +  VK_KHR_display_swapchain  not started
> +  VK_KHR_external_fence not started
> +  VK_KHR_external_fence_capabilitiesnot started
> +  VK_KHR_external_fence_fd  not started
> +  VK_KHR_external_fence_win32   not started
> +  VK_KHR_external_memoryDONE (anv, radv)
> +  VK_KHR_external_memory_capabilities   DONE (anv, radv)
> +  VK_KHR_external_memory_fd DONE (anv, radv)
> +  VK_KHR_external_memory_win32  not started
> +  VK_KHR_external_semaphore DONE (radv)
> +  VK_KHR_external_semaphore_capabilitiesDONE (radv)
> +  VK_KHR_external_semaphore_fd  DONE (radv)
> +  VK_KHR_external_semaphore_win32   not started
> +  VK_KHR_get_memory_requirements2   DONE (anv, radv)
> +  VK_KHR_get_physical_device_properties2DONE (anv, radv)
> +  VK_KHR_get_surface_capabilities2  DONE (anv)
> +  VK_KHR_incremental_presentDONE (anv, radv)
> +  VK_KHR_maintenance1   DONE (anv, radv)
> +  VK_KHR_mir_surfacenot started
> +  VK_KHR_push_descriptorDONE (anv, radv)
> +  VK_KHR_sampler_mirror_clamp_to_edge   DONE (anv, radv)
> +  VK_KHR_shader_draw_parameters DONE (anv, radv)
> +  VK_KHR_shared_presentable_image   not started
> +  VK_KHR_storage_buffer_storage_class   DONE (anv, radv)
> +  VK_KHR_surfaceDONE (anv, radv)
> +  VK_KHR_swapchain  DONE (anv, radv)
> +  VK_KHR_variable_pointers  DONE (anv, radv)
> +  VK_KHR_wayland_surfaceDONE (anv, radv)
> +  VK_KHR_win32_keyed_mutex  not started
> +  

Re: [Mesa-dev] [PATCH] st/dri: don't set PIPE_BIND_SHARED for privately-allocated renderbuffers

2017-07-31 Thread Eric Anholt
Marek Olšák  writes:

> From: Marek Olšák 
>
> which are MSAA and depth/stencil buffers.

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix bad cast conversions in viewport()

2017-07-31 Thread Roland Scheidegger
Am 31.07.2017 um 21:43 schrieb Samuel Pitoiset:
> Fixes: ddc32537d6 ("mesa: clamp viewport values only once when using 
> glViewport()")

Reviewed-by: Roland Scheidegger 

FWIW there's quite some bogus function commens where the indicated
callers no longer match (e.g. "Usually called from _mesa_Viewport()."
when it's never called by that, or similarly "calls
_mesa_set_viewport()" when it doesn't do that.)

Roland

> Signed-off-by: Samuel Pitoiset 
> ---
>  src/mesa/main/viewport.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
> index 3dce320d1d..fc384909e6 100644
> --- a/src/mesa/main/viewport.c
> +++ b/src/mesa/main/viewport.c
> @@ -94,9 +94,10 @@ static void
>  viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width,
>   GLsizei height)
>  {
> +   struct gl_viewport_inputs input = { x, y, width, height };
> +
> /* Clamp the viewport to the implementation dependent values. */
> -   clamp_viewport(ctx, (GLfloat *), (GLfloat *),
> -  (GLfloat *), (GLfloat *));
> +   clamp_viewport(ctx, , , , );
>  
> /* The GL_ARB_viewport_array spec says:
>  *
> @@ -110,7 +111,7 @@ viewport(struct gl_context *ctx, GLint x, GLint y, 
> GLsizei width,
>  * signal the driver once at the end.
>  */
> for (unsigned i = 0; i < ctx->Const.MaxViewports; i++)
> -  set_viewport_no_notify(ctx, i, x, y, width, height);
> +  set_viewport_no_notify(ctx, i, input.X, input.Y, input.Width, 
> input.Height);
>  
> if (ctx->Driver.Viewport)
>ctx->Driver.Viewport(ctx);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [AppVeyor] mesa master #5074 completed

2017-07-31 Thread Roland Scheidegger
Am 01.08.2017 um 00:29 schrieb Nicolai Hähnle:
> On 01.08.2017 00:25, Roland Scheidegger wrote:
>> FWIW while this works with the windows build, it still does not with a
>> scons linux build here:
>>Compiling src/mesa/drivers/dri/common/dri_util.c ...
>> In file included from src/mesa/drivers/dri/common/dri_util.c:45:0:
>> src/util/xmlpool.h:103:29: fatal error: xmlpool/options.h: No such file
>> or directory
>> compilation terminated.
>>
>> Looks to me like the include path should be provided by
>> xmlpool_options.dir.dir but I'm not quite sure how that's supposed to
>> work or why it's failing... But the file is definitely generated
>> (./build/linux-x86_64-debug/util/xmlpool/options.h)
> 
> Hmm, that seems to be a separate issue. I did try to build on Linux with
> SCons, but without any arguments, and that works fine. What's the
> command line to reproduce that build failure?

I did not add any arguments I just compiled it like so:
LLVM= scons -j8

Maybe it'll depend on scons or python versions then...

Roland


> Thanks,
> Nicolai
> 
> 
>>
>>
>> Roland
>>
>> Am 31.07.2017 um 17:25 schrieb AppVeyor:
>>>
>>>Build mesa 5074 completed
>>>   
>>> >> >
>>>
>>> Commit 90c8f17cf8 by Nicolai Hähnle  on
>>> 7/31/2017 3:17 PM:
>>> Attempt to fix AppVeyor build, round 2
>>>
>>> Configure your notification preferences
>>> >> >
>>>
>>>
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DwID-g=uilaK90D4TOVoH58JNXRgQ=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0=g7S2IPCa5AOkGzjpV0YRlmb5Sw4Y967O0lJRijMg9jM=A3czOZqmZEe7ssFeOjWBijBixT1CY6iaMZ0vligU86s=
>>>
>>
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] radeonsi: print CE IBs into ddebug reports

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/r600/r600_hw_context.c|  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c |  7 +--
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
 src/gallium/drivers/radeonsi/si_debug.c   | 14 --
 src/gallium/drivers/radeonsi/si_hw_context.c  | 10 ++
 src/gallium/drivers/radeonsi/si_pipe.h|  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c  | 15 +++
 7 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index ca7f41d..a821c35 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -277,21 +277,21 @@ void r600_context_gfx_flush(void *context, unsigned flags,
if (ctx->trace_buf)
eg_trace_emit(ctx);
/* old kernels and userspace don't set SX_MISC, so we must reset it to 
0 here */
if (ctx->b.chip_class == R600) {
radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
}
 
if (ctx->is_debug) {
/* Save the IB for debug contexts. */
radeon_clear_saved_cs(>last_gfx);
-   radeon_save_cs(ws, cs, >last_gfx);
+   radeon_save_cs(ws, cs, >last_gfx, true);
r600_resource_reference(>last_trace_buf, ctx->trace_buf);
r600_resource_reference(>trace_buf, NULL);
}
/* Flush the CS. */
ws->cs_flush(cs, flags, >b.last_gfx_fence);
if (fence)
ws->fence_reference(fence, ctx->b.last_gfx_fence);
ctx->b.num_gfx_cs_flushes++;
 
if (ctx->is_debug) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8c66cc3..c58048f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -435,21 +435,21 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
(rctx->screen->debug_flags & DBG_CHECK_VM) &&
rctx->check_vm_faults;
 
if (!radeon_emitted(cs, 0)) {
if (fence)
rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
return;
}
 
if (check_vm)
-   radeon_save_cs(rctx->ws, cs, );
+   radeon_save_cs(rctx->ws, cs, , true);
 
rctx->ws->cs_flush(cs, flags, >last_sdma_fence);
if (fence)
rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
 
if (check_vm) {
/* Use conservative timeout 800ms, after which we won't wait any
 * longer and assume the GPU is hung.
 */
rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 
800*1000*1000);
@@ -457,38 +457,41 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
rctx->check_vm_faults(rctx, , RING_DMA);
radeon_clear_saved_cs();
}
 }
 
 /**
  * Store a linearized copy of all chunks of \p cs together with the buffer
  * list in \p saved.
  */
 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
-   struct radeon_saved_cs *saved)
+   struct radeon_saved_cs *saved, bool get_buffer_list)
 {
void *buf;
unsigned i;
 
/* Save the IB chunks. */
saved->num_dw = cs->prev_dw + cs->current.cdw;
saved->ib = MALLOC(4 * saved->num_dw);
if (!saved->ib)
goto oom;
 
buf = saved->ib;
for (i = 0; i < cs->num_prev; ++i) {
memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
buf += cs->prev[i].cdw;
}
memcpy(buf, cs->current.buf, cs->current.cdw * 4);
 
+   if (!get_buffer_list)
+   return;
+
/* Save the buffer list. */
saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
saved->bo_list = CALLOC(saved->bo_count,
sizeof(saved->bo_list[0]));
if (!saved->bo_list) {
FREE(saved->ib);
goto oom;
}
ws->cs_get_buffer_list(cs, saved->bo_list);
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 4839c76..b391cbb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -760,21 +760,21 @@ bool r600_can_dump_shader(struct r600_common_screen 
*rscreen,
 bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
  unsigned processor);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct 
pipe_resource *dst,
  uint64_t offset, uint64_t size, unsigned value);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,

[Mesa-dev] [PATCH] st/dri: don't set PIPE_BIND_SHARED for privately-allocated renderbuffers

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

which are MSAA and depth/stencil buffers.
---
 src/gallium/state_trackers/dri/dri2.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 0cbc76f..e4e2a53 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -725,21 +725,22 @@ dri2_allocate_textures(struct dri_context *ctx,
/* Allocate private MSAA colorbuffers. */
if (drawable->stvis.samples > 1) {
   for (i = 0; i < statts_count; i++) {
  enum st_attachment_type statt = statts[i];
 
  if (statt == ST_ATTACHMENT_DEPTH_STENCIL)
 continue;
 
  if (drawable->textures[statt]) {
 templ.format = drawable->textures[statt]->format;
-templ.bind = drawable->textures[statt]->bind & ~PIPE_BIND_SCANOUT;
+templ.bind = drawable->textures[statt]->bind &
+ ~(PIPE_BIND_SCANOUT | PIPE_BIND_SHARED);
 templ.nr_samples = drawable->stvis.samples;
 
 /* Try to reuse the resource.
  * (the other resource parameters should be constant)
  */
 if (!drawable->msaa_textures[statt] ||
 drawable->msaa_textures[statt]->width0 != templ.width0 ||
 drawable->msaa_textures[statt]->height0 != templ.height0) {
/* Allocate a new one. */
pipe_resource_reference(>msaa_textures[statt], NULL);
@@ -774,21 +775,21 @@ dri2_allocate_textures(struct dri_context *ctx,
if (alloc_depthstencil) {
   enum st_attachment_type statt = ST_ATTACHMENT_DEPTH_STENCIL;
   struct pipe_resource **zsbuf;
   enum pipe_format format;
   unsigned bind;
 
   dri_drawable_get_format(drawable, statt, , );
 
   if (format) {
  templ.format = format;
- templ.bind = bind;
+ templ.bind = bind & ~PIPE_BIND_SHARED;
 
  if (drawable->stvis.samples > 1) {
 templ.nr_samples = drawable->stvis.samples;
 zsbuf = >msaa_textures[statt];
  }
  else {
 templ.nr_samples = 0;
 zsbuf = >textures[statt];
  }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] radeonsi: fix printing vertex buffer descriptors into ddebug reports

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_debug.c   | 3 +++
 src/gallium/drivers/radeonsi/si_descriptors.c | 7 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 06dea61..7c8a0fe 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -385,20 +385,23 @@ typedef unsigned (*slot_remap_func)(unsigned);
 static void si_dump_descriptor_list(struct si_descriptors *desc,
const char *shader_name,
const char *elem_name,
unsigned element_dw_size,
unsigned num_elements,
slot_remap_func slot_remap,
FILE *f)
 {
unsigned i, j;
 
+   if (!desc->list)
+   return;
+
for (i = 0; i < num_elements; i++) {
unsigned dw_offset = slot_remap(i) * element_dw_size;
uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : 
desc->list;
const char *list_note = desc->gpu_list ? "GPU list" : "CPU 
list";
uint32_t *cpu_list = desc->list + dw_offset;
uint32_t *gpu_list = gpu_ptr + dw_offset;
 
fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
shader_name, elem_name, i, list_note);
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index b080562..4de6086 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1094,20 +1094,21 @@ bool si_upload_vertex_buffer_descriptors(struct 
si_context *sctx)
 * the fine-grained upload path.
 */
u_upload_alloc(sctx->b.b.const_uploader, 0,
   desc_list_byte_size,
   si_optimal_tcc_alignment(sctx, desc_list_byte_size),
   (unsigned*)>buffer_offset,
   (struct pipe_resource**)>buffer, (void**));
if (!desc->buffer)
return false;
 
+   desc->list = ptr;
radeon_add_to_buffer_list(>b, >b.gfx,
  desc->buffer, RADEON_USAGE_READ,
  RADEON_PRIO_DESCRIPTORS);
 
assert(count <= SI_MAX_ATTRIBS);
 
for (i = 0; i < count; i++) {
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
unsigned offset;
@@ -2827,20 +2828,22 @@ void si_init_all_descriptors(struct si_context *sctx)
 SI_SGPR_RW_BUFFERS,
 /* The second set of usage/priority is used by
  * const buffers in RW buffer slots. */
 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
 RADEON_PRIO_SHADER_RINGS, 
RADEON_PRIO_CONST_BUFFER,
 _offset);
sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = 
SI_NUM_RW_BUFFERS;
 
si_init_descriptors(sctx, >vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
4, SI_NUM_VERTEX_BUFFERS, 0, 0, NULL);
+   FREE(sctx->vertex_buffers.list); /* not used */
+   sctx->vertex_buffers.list = NULL;
 
sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
sctx->total_ce_ram_allocated = ce_offset;
 
if (sctx->b.chip_class >= GFX9)
assert(ce_offset <= 4096);
else
assert(ce_offset <= 32768);
 
/* Set pipe_context functions. */
@@ -2940,21 +2943,23 @@ void si_release_all_descriptors(struct si_context *sctx)
si_release_sampler_views(>samplers[i].views);
si_release_image_views(>images[i]);
}
si_release_buffer_resources(>rw_buffers,
>descriptors[SI_DESCS_RW_BUFFERS]);
for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
pipe_vertex_buffer_unreference(>vertex_buffer[i]);
 
for (i = 0; i < SI_NUM_DESCS; ++i)
si_release_descriptors(>descriptors[i]);
-   si_release_descriptors(>vertex_buffers);
+
+   /* Only one member of si_descriptors needs to be freed: */
+   r600_resource_reference(>vertex_buffers.buffer, NULL);
 }
 
 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
 {
int i;
 
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, 
>const_and_shader_buffers[i]);
si_sampler_views_begin_new_cs(sctx, >samplers[i].views);
si_image_views_begin_new_cs(sctx, >images[i]);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 1/6] radeonsi: fix various CLEAR_STATE issues

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 7dadc4a..c151a98 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4542,24 +4542,46 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
   raster_config);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, 
R_028354_PA_SC_RASTER_CONFIG_1,
   raster_config_1);
} else {
si_write_harvested_raster_configs(sctx, pm4, 
raster_config, raster_config_1);
}
}
 
+   /* CLEAR_STATE doesn't clear these correctly on certain generations.
+* I don't know why. Deduced by trial and error.
+*/
+   if (sctx->b.chip_class <= CIK) {
+   si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+   si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, 
S_028204_WINDOW_OFFSET_DISABLE(1));
+   si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, 
S_028240_WINDOW_OFFSET_DISABLE(1));
+   si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+  S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+   si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+   si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+  S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+   }
+
if (sctx->b.chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
+   } else {
+   /* These registers, when written, also overwrite the CLEAR_STATE
+* context, so we can't rely on CLEAR_STATE setting them.
+* It would be an issue if there was another UMD changing them.
+*/
+   si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
+   si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
+   si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
}
 
if (sctx->b.chip_class >= CIK) {
if (sctx->b.chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
S_00B41C_CU_EN(0x));
} else {
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 
0);
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radeonsi: don't flush sL1 conditionally in WAIT_ON_CE_COUNTER

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

I don't know the condition for the flush, but we better turn this off.
The sL1 flush is used when CE dumps stuff into a ring buffer and the ring
buffer wraps.
---
 src/gallium/drivers/radeonsi/si_state_draw.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index a5f5b7f..dfe4236 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1138,32 +1138,32 @@ static void si_get_draw_start_count(struct si_context 
*sctx,
} else {
*start = info->start;
*count = info->count;
}
 }
 
 void si_ce_pre_draw_synchronization(struct si_context *sctx)
 {
if (sctx->ce_need_synchronization) {
radeon_emit(sctx->ce_ib, PKT3(PKT3_INCREMENT_CE_COUNTER, 0, 0));
-   radeon_emit(sctx->ce_ib, 1);
+   radeon_emit(sctx->ce_ib, 1); /* 1 = increment CE counter */
 
radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_WAIT_ON_CE_COUNTER, 0, 
0));
-   radeon_emit(sctx->b.gfx.cs, 1);
+   radeon_emit(sctx->b.gfx.cs, 0); /* 0 = don't flush sL1 
conditionally */
}
 }
 
 void si_ce_post_draw_synchronization(struct si_context *sctx)
 {
if (sctx->ce_need_synchronization) {
radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 
0));
-   radeon_emit(sctx->b.gfx.cs, 0);
+   radeon_emit(sctx->b.gfx.cs, 0); /* unused */
 
sctx->ce_need_synchronization = false;
}
 }
 
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_resource *indexbuf = info->index.resource;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: Add Vulkan to features.txt

2017-07-31 Thread Connor Abbott
On Mon, Jul 31, 2017 at 3:32 PM, Jordan Justen
 wrote:
> To get the extension list:
>
> $ git grep -hE "extension name=\"VK_(EXT|KHR|KHX)" src/vulkan/registry/vk.xml 
> | \
>   grep -v disabled | awk '{print $2}' | sed -E 's/(name=)?"//g' | sort
>
> To find anv(il) and radv supported extensions:
>
> $ git grep -hE "'VK_([A-Z]+)_[a-z]" src/intel/
>
> $ git grep -hE "'VK_([A-Z]+)_[a-z]" src/amd/
>
> Signed-off-by: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Dave Airlie 
> ---
>  docs/features.txt | 58 
> +++
>  1 file changed, 58 insertions(+)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 1f628e1c030..5ec1591540a 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -333,6 +333,64 @@ we DO NOT WANT implementations of these extensions for 
> Mesa.
>GL_ARB_shadow_ambient Superseded by 
> GL_ARB_fragment_program
>GL_ARB_vertex_blend   Superseded by 
> GL_ARB_vertex_program
>
> +Vulkan 1.0 -- all DONE: anv
> +
> +Khronos and EXT extensions that are not part of any Vulkan version:
> +  VK_EXT_acquire_xlib_display   not started
> +  VK_EXT_blend_operation_advanced   not started
> +  VK_EXT_debug_marker   not started
> +  VK_EXT_debug_report   not started
> +  VK_EXT_direct_mode_displaynot started
> +  VK_EXT_discard_rectangles not started
> +  VK_EXT_display_controlnot started
> +  VK_EXT_display_surface_counternot started
> +  VK_EXT_hdr_metadata   not started
> +  VK_EXT_sampler_filter_minmax  not started
> +  VK_EXT_shader_subgroup_ballot not started
> +  VK_EXT_shader_subgroup_vote   not started

I'm about to send out an updated series for these two, so you can mark
them as started. My series only enables them for radv, but they should
be trivial to enable for anv after my series too.

> +  VK_EXT_swapchain_colorspace   not started
> +  VK_EXT_validation_flags   not started
> +  VK_KHR_16bit_storage  started (Alejandro)
> +  VK_KHR_android_surfacenot started
> +  VK_KHR_dedicated_allocation   DONE (anv, radv)
> +  VK_KHR_descriptor_update_template DONE (anv, radv)
> +  VK_KHR_displaynot started
> +  VK_KHR_display_swapchain  not started
> +  VK_KHR_external_fence not started
> +  VK_KHR_external_fence_capabilitiesnot started
> +  VK_KHR_external_fence_fd  not started
> +  VK_KHR_external_fence_win32   not started
> +  VK_KHR_external_memoryDONE (anv, radv)
> +  VK_KHR_external_memory_capabilities   DONE (anv, radv)
> +  VK_KHR_external_memory_fd DONE (anv, radv)
> +  VK_KHR_external_memory_win32  not started
> +  VK_KHR_external_semaphore DONE (radv)
> +  VK_KHR_external_semaphore_capabilitiesDONE (radv)
> +  VK_KHR_external_semaphore_fd  DONE (radv)
> +  VK_KHR_external_semaphore_win32   not started
> +  VK_KHR_get_memory_requirements2   DONE (anv, radv)
> +  VK_KHR_get_physical_device_properties2DONE (anv, radv)
> +  VK_KHR_get_surface_capabilities2  DONE (anv)
> +  VK_KHR_incremental_presentDONE (anv, radv)
> +  VK_KHR_maintenance1   DONE (anv, radv)
> +  VK_KHR_mir_surfacenot started
> +  VK_KHR_push_descriptorDONE (anv, radv)
> +  VK_KHR_sampler_mirror_clamp_to_edge   DONE (anv, radv)
> +  VK_KHR_shader_draw_parameters DONE (anv, radv)
> +  VK_KHR_shared_presentable_image   not started
> +  VK_KHR_storage_buffer_storage_class   DONE (anv, radv)
> +  VK_KHR_surfaceDONE (anv, radv)
> +  VK_KHR_swapchain  DONE (anv, radv)
> +  VK_KHR_variable_pointers  DONE (anv, radv)
> +  VK_KHR_wayland_surfaceDONE (anv, radv)
> +  VK_KHR_win32_keyed_mutex  not started
> +  VK_KHR_win32_surface  not 

[Mesa-dev] [PATCH 2/6] radeonsi: set up HTILE in descriptors only when level 0 is accessible

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

Compression isn't enabled with non-zero levels.
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 18b070b..b080562 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -425,21 +425,21 @@ void si_set_mutable_tex_desc_fields(struct si_screen 
*sscreen,
if (sscreen->b.chip_class >= VI) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
 
if (vi_dcc_enabled(tex, first_level)) {
meta_va = (!tex->dcc_separate_buffer ? 
tex->resource.gpu_address : 0) +
  tex->dcc_offset;
 
if (sscreen->b.chip_class <= VI)
meta_va += base_level_info->dcc_offset;
-   } else if (tex->tc_compatible_htile) {
+   } else if (tex->tc_compatible_htile && first_level == 0) {
meta_va = tex->resource.gpu_address + tex->htile_offset;
}
 
if (meta_va) {
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = meta_va >> 8;
}
}
 
if (sscreen->b.chip_class >= GFX9) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeonsi: don't print AMD twice in the renderer string with the marketing name

2017-07-31 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index c58048f..e9402f8 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -1338,21 +1338,22 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
 struct radeon_winsys *ws, unsigned flags)
 {
char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = 
{};
struct utsname uname_data;
const char *chip_name;
 
ws->query_info(ws, >info);
rscreen->ws = ws;
 
if ((chip_name = r600_get_marketing_name(ws)))
-   snprintf(family_name, sizeof(family_name), "%s / ", 
r600_get_family_name(rscreen));
+   snprintf(family_name, sizeof(family_name), "%s / ",
+r600_get_family_name(rscreen) + 4);
else
chip_name = r600_get_family_name(rscreen);
 
if (uname(_data) == 0)
snprintf(kernel_version, sizeof(kernel_version),
 " / %s", uname_data.release);
 
if (HAVE_LLVM > 0) {
snprintf(llvm_string, sizeof(llvm_string),
 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [AppVeyor] mesa master #5074 completed

2017-07-31 Thread Nicolai Hähnle

On 01.08.2017 00:25, Roland Scheidegger wrote:

FWIW while this works with the windows build, it still does not with a
scons linux build here:
   Compiling src/mesa/drivers/dri/common/dri_util.c ...
In file included from src/mesa/drivers/dri/common/dri_util.c:45:0:
src/util/xmlpool.h:103:29: fatal error: xmlpool/options.h: No such file
or directory
compilation terminated.

Looks to me like the include path should be provided by
xmlpool_options.dir.dir but I'm not quite sure how that's supposed to
work or why it's failing... But the file is definitely generated
(./build/linux-x86_64-debug/util/xmlpool/options.h)


Hmm, that seems to be a separate issue. I did try to build on Linux with 
SCons, but without any arguments, and that works fine. What's the 
command line to reproduce that build failure?


Thanks,
Nicolai





Roland

Am 31.07.2017 um 17:25 schrieb AppVeyor:


   Build mesa 5074 completed
   

Commit 90c8f17cf8 by Nicolai Hähnle  on
7/31/2017 3:17 PM:
Attempt to fix AppVeyor build, round 2

Configure your notification preferences




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gallium: add PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE and corresponding cap

2017-07-31 Thread Roland Scheidegger
Am 31.07.2017 um 18:36 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle 
> 
> v2: rename cap to PIPE_CAP_QUERY_SO_OVERFLOW and be a bit more explicit
> in the documentation
> ---
> 
> I decided to keep the query names as they were, to avoid any possibility
> of regression. Perhaps the non-ANY query could separate be renamed to
> PIPE_QUERY_SO_OVERFLOW_STREAM_PREDICATE, but that seemed like excessive
> churn to me.
> 
> I did take most of the proposed doc update, though, and renamed the cap.
> 
> Cheers,
> Nicolai
> ---
>  src/gallium/auxiliary/util/u_dump_defines.c  |  1 +
>  src/gallium/auxiliary/util/u_inlines.h   |  1 +
>  src/gallium/docs/source/context.rst  | 12 ++--
>  src/gallium/docs/source/screen.rst   |  6 ++
>  src/gallium/drivers/etnaviv/etnaviv_screen.c |  1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
>  src/gallium/drivers/i915/i915_screen.c   |  1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
>  src/gallium/drivers/r300/r300_screen.c   |  1 +
>  src/gallium/drivers/r600/r600_pipe.c |  1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
>  src/gallium/drivers/softpipe/sp_screen.c |  1 +
>  src/gallium/drivers/svga/svga_screen.c   |  1 +
>  src/gallium/drivers/swr/swr_screen.cpp   |  1 +
>  src/gallium/drivers/trace/tr_dump_state.c|  1 +
>  src/gallium/drivers/vc4/vc4_screen.c |  1 +
>  src/gallium/drivers/virgl/virgl_screen.c |  1 +
>  src/gallium/include/pipe/p_defines.h |  3 +++
>  21 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_dump_defines.c 
> b/src/gallium/auxiliary/util/u_dump_defines.c
> index 9d831ef..9126feb 100644
> --- a/src/gallium/auxiliary/util/u_dump_defines.c
> +++ b/src/gallium/auxiliary/util/u_dump_defines.c
> @@ -372,6 +372,7 @@ util_dump_query_type_names[] = {
> "PIPE_QUERY_PRIMITIVES_EMITTED",
> "PIPE_QUERY_SO_STATISTICS",
> "PIPE_QUERY_SO_OVERFLOW_PREDICATE",
> +   "PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE",
> "PIPE_QUERY_GPU_FINISHED",
> "PIPE_QUERY_PIPELINE_STATISTICS",
>  };
> diff --git a/src/gallium/auxiliary/util/u_inlines.h 
> b/src/gallium/auxiliary/util/u_inlines.h
> index d57f61e..e0ed594 100644
> --- a/src/gallium/auxiliary/util/u_inlines.h
> +++ b/src/gallium/auxiliary/util/u_inlines.h
> @@ -537,6 +537,7 @@ util_query_clear_result(union pipe_query_result *result, 
> unsigned type)
> switch (type) {
> case PIPE_QUERY_OCCLUSION_PREDICATE:
> case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
> +   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
> case PIPE_QUERY_GPU_FINISHED:
>result->b = FALSE;
>break;
> diff --git a/src/gallium/docs/source/context.rst 
> b/src/gallium/docs/source/context.rst
> index a46131c..7002802 100644
> --- a/src/gallium/docs/source/context.rst
> +++ b/src/gallium/docs/source/context.rst
> @@ -428,9 +428,17 @@ XXX the 2nd value is equivalent to 
> ``PIPE_QUERY_PRIMITIVES_GENERATED`` but it is
>  unclear if it should be increased if stream output is not active.
>  
>  ``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` returns a boolean value indicating
> -whether the stream output targets have overflowed as a result of the
> +whether a selected stream output target has overflowed as a result of the
>  commands issued between ``begin_query`` and ``end_query``.
> -This query can be used with ``render_condition``.
> +This query can be used with ``render_condition``. The output stream is
> +selected by the stream number passed to ``create_query``.
> +
> +``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` returns a boolean value indicating
> +whether any stream output target has overflowed as a result of the commands
> +issued between ``begin_query`` and ``end_query``. This query can be used
> +with ``render_condition``, and its result is the logical OR of multiple
> +``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` queries, one for each stream output
> +target.
>  
>  ``PIPE_QUERY_GPU_FINISHED`` returns a boolean value indicating whether
>  all commands issued before ``end_query`` have completed. However, this
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index ee7accb..88d27c2 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -398,6 +398,12 @@ The integer capabilities:
>supported.
>  * ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should
>reference texture and sampler as NIR derefs instead of by indices.
> +* ``PIPE_CAP_QUERY_SO_OVERFLOW``: Whether the
> +  ``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` and
> +  ``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` query types are supported. Note 
> that
> +  for a 

[Mesa-dev] [PATCH] docs: Add Vulkan to features.txt

2017-07-31 Thread Jordan Justen
To get the extension list:

$ git grep -hE "extension name=\"VK_(EXT|KHR|KHX)" src/vulkan/registry/vk.xml | 
\
  grep -v disabled | awk '{print $2}' | sed -E 's/(name=)?"//g' | sort

To find anv(il) and radv supported extensions:

$ git grep -hE "'VK_([A-Z]+)_[a-z]" src/intel/

$ git grep -hE "'VK_([A-Z]+)_[a-z]" src/amd/

Signed-off-by: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Dave Airlie 
---
 docs/features.txt | 58 +++
 1 file changed, 58 insertions(+)

diff --git a/docs/features.txt b/docs/features.txt
index 1f628e1c030..5ec1591540a 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -333,6 +333,64 @@ we DO NOT WANT implementations of these extensions for 
Mesa.
   GL_ARB_shadow_ambient Superseded by 
GL_ARB_fragment_program
   GL_ARB_vertex_blend   Superseded by 
GL_ARB_vertex_program
 
+Vulkan 1.0 -- all DONE: anv
+
+Khronos and EXT extensions that are not part of any Vulkan version:
+  VK_EXT_acquire_xlib_display   not started
+  VK_EXT_blend_operation_advanced   not started
+  VK_EXT_debug_marker   not started
+  VK_EXT_debug_report   not started
+  VK_EXT_direct_mode_displaynot started
+  VK_EXT_discard_rectangles not started
+  VK_EXT_display_controlnot started
+  VK_EXT_display_surface_counternot started
+  VK_EXT_hdr_metadata   not started
+  VK_EXT_sampler_filter_minmax  not started
+  VK_EXT_shader_subgroup_ballot not started
+  VK_EXT_shader_subgroup_vote   not started
+  VK_EXT_swapchain_colorspace   not started
+  VK_EXT_validation_flags   not started
+  VK_KHR_16bit_storage  started (Alejandro)
+  VK_KHR_android_surfacenot started
+  VK_KHR_dedicated_allocation   DONE (anv, radv)
+  VK_KHR_descriptor_update_template DONE (anv, radv)
+  VK_KHR_displaynot started
+  VK_KHR_display_swapchain  not started
+  VK_KHR_external_fence not started
+  VK_KHR_external_fence_capabilitiesnot started
+  VK_KHR_external_fence_fd  not started
+  VK_KHR_external_fence_win32   not started
+  VK_KHR_external_memoryDONE (anv, radv)
+  VK_KHR_external_memory_capabilities   DONE (anv, radv)
+  VK_KHR_external_memory_fd DONE (anv, radv)
+  VK_KHR_external_memory_win32  not started
+  VK_KHR_external_semaphore DONE (radv)
+  VK_KHR_external_semaphore_capabilitiesDONE (radv)
+  VK_KHR_external_semaphore_fd  DONE (radv)
+  VK_KHR_external_semaphore_win32   not started
+  VK_KHR_get_memory_requirements2   DONE (anv, radv)
+  VK_KHR_get_physical_device_properties2DONE (anv, radv)
+  VK_KHR_get_surface_capabilities2  DONE (anv)
+  VK_KHR_incremental_presentDONE (anv, radv)
+  VK_KHR_maintenance1   DONE (anv, radv)
+  VK_KHR_mir_surfacenot started
+  VK_KHR_push_descriptorDONE (anv, radv)
+  VK_KHR_sampler_mirror_clamp_to_edge   DONE (anv, radv)
+  VK_KHR_shader_draw_parameters DONE (anv, radv)
+  VK_KHR_shared_presentable_image   not started
+  VK_KHR_storage_buffer_storage_class   DONE (anv, radv)
+  VK_KHR_surfaceDONE (anv, radv)
+  VK_KHR_swapchain  DONE (anv, radv)
+  VK_KHR_variable_pointers  DONE (anv, radv)
+  VK_KHR_wayland_surfaceDONE (anv, radv)
+  VK_KHR_win32_keyed_mutex  not started
+  VK_KHR_win32_surface  not started
+  VK_KHR_xcb_surfaceDONE (anv, radv)
+  VK_KHR_xlib_surface   DONE (anv, radv)
+  VK_KHX_device_group   not started
+  VK_KHX_device_group_creation  not started
+  VK_KHX_multiview  DONE (anv)
+
 
 A graphical representation of this information can be found at
 

Re: [Mesa-dev] [AppVeyor] mesa master #5074 completed

2017-07-31 Thread Roland Scheidegger
FWIW while this works with the windows build, it still does not with a
scons linux build here:
  Compiling src/mesa/drivers/dri/common/dri_util.c ...
In file included from src/mesa/drivers/dri/common/dri_util.c:45:0:
src/util/xmlpool.h:103:29: fatal error: xmlpool/options.h: No such file
or directory
compilation terminated.

Looks to me like the include path should be provided by
xmlpool_options.dir.dir but I'm not quite sure how that's supposed to
work or why it's failing... But the file is definitely generated
(./build/linux-x86_64-debug/util/xmlpool/options.h)


Roland

Am 31.07.2017 um 17:25 schrieb AppVeyor:
> 
>   Build mesa 5074 completed
>   
> 
> Commit 90c8f17cf8 by Nicolai Hähnle  on
> 7/31/2017 3:17 PM:
> Attempt to fix AppVeyor build, round 2
> 
> Configure your notification preferences
> 
> 
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gallium: add PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE and corresponding cap

2017-07-31 Thread Nicolai Hähnle

On 31.07.2017 21:02, Marek Olšák wrote:

Does the rest of the series need to be rebased? I wonder if this is
the time to review it.


There are only trivial changes where the cap was renamed. It's ready as 
far as I'm concerned -- I've pushed the latest version to 
https://cgit.freedesktop.org/~nh/mesa/log/?h=arb_transform_feedback_overflow_query 
right now.


There's the issue of the firmware bug, but the series has a workaround, 
and if/when the bug gets fixed we can make the workaround conditional on 
the firmware version in a separate patch.


Cheers,
Nicolai




Marek

On Mon, Jul 31, 2017 at 6:36 PM, Nicolai Hähnle  wrote:

From: Nicolai Hähnle 

v2: rename cap to PIPE_CAP_QUERY_SO_OVERFLOW and be a bit more explicit
 in the documentation
---

I decided to keep the query names as they were, to avoid any possibility
of regression. Perhaps the non-ANY query could separate be renamed to
PIPE_QUERY_SO_OVERFLOW_STREAM_PREDICATE, but that seemed like excessive
churn to me.

I did take most of the proposed doc update, though, and renamed the cap.

Cheers,
Nicolai
---
  src/gallium/auxiliary/util/u_dump_defines.c  |  1 +
  src/gallium/auxiliary/util/u_inlines.h   |  1 +
  src/gallium/docs/source/context.rst  | 12 ++--
  src/gallium/docs/source/screen.rst   |  6 ++
  src/gallium/drivers/etnaviv/etnaviv_screen.c |  1 +
  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
  src/gallium/drivers/i915/i915_screen.c   |  1 +
  src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
  src/gallium/drivers/r300/r300_screen.c   |  1 +
  src/gallium/drivers/r600/r600_pipe.c |  1 +
  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
  src/gallium/drivers/softpipe/sp_screen.c |  1 +
  src/gallium/drivers/svga/svga_screen.c   |  1 +
  src/gallium/drivers/swr/swr_screen.cpp   |  1 +
  src/gallium/drivers/trace/tr_dump_state.c|  1 +
  src/gallium/drivers/vc4/vc4_screen.c |  1 +
  src/gallium/drivers/virgl/virgl_screen.c |  1 +
  src/gallium/include/pipe/p_defines.h |  3 +++
  21 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_dump_defines.c 
b/src/gallium/auxiliary/util/u_dump_defines.c
index 9d831ef..9126feb 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -372,6 +372,7 @@ util_dump_query_type_names[] = {
 "PIPE_QUERY_PRIMITIVES_EMITTED",
 "PIPE_QUERY_SO_STATISTICS",
 "PIPE_QUERY_SO_OVERFLOW_PREDICATE",
+   "PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE",
 "PIPE_QUERY_GPU_FINISHED",
 "PIPE_QUERY_PIPELINE_STATISTICS",
  };
diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index d57f61e..e0ed594 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -537,6 +537,7 @@ util_query_clear_result(union pipe_query_result *result, 
unsigned type)
 switch (type) {
 case PIPE_QUERY_OCCLUSION_PREDICATE:
 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 case PIPE_QUERY_GPU_FINISHED:
result->b = FALSE;
break;
diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index a46131c..7002802 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -428,9 +428,17 @@ XXX the 2nd value is equivalent to 
``PIPE_QUERY_PRIMITIVES_GENERATED`` but it is
  unclear if it should be increased if stream output is not active.

  ``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` returns a boolean value indicating
-whether the stream output targets have overflowed as a result of the
+whether a selected stream output target has overflowed as a result of the
  commands issued between ``begin_query`` and ``end_query``.
-This query can be used with ``render_condition``.
+This query can be used with ``render_condition``. The output stream is
+selected by the stream number passed to ``create_query``.
+
+``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` returns a boolean value indicating
+whether any stream output target has overflowed as a result of the commands
+issued between ``begin_query`` and ``end_query``. This query can be used
+with ``render_condition``, and its result is the logical OR of multiple
+``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` queries, one for each stream output
+target.

  ``PIPE_QUERY_GPU_FINISHED`` returns a boolean value indicating whether
  all commands issued before ``end_query`` have completed. However, this
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index ee7accb..88d27c2 100644
--- 

Re: [Mesa-dev] [PATCH v2] mesa: Fix swizzling for luminance/intensity in _mesa_readpixels

2017-07-31 Thread Chris Wilson
Quoting Chris Wilson (2017-07-31 22:51:25)
> Luminance/Intensity when converted to RGB should be replicated to fill
> the RGB channels, but they differ on how the alpha channel is filled, as
> luminance is set to 1 (unless alpha is supplied) and intensity is
> replicated into alpha as well.
> 
> https://www.khronos.org/opengl/wiki/Image_Format:
> 
> Legacy Image Formats
> 
> Warning: This section describes legacy OpenGL APIs that have been
> removed from core OpenGL 3.1 and above (they are only deprecated in
> OpenGL 3.0). It is recommended that you not use this functionality in
> your programs.
> 
> As with other deprecated functionality, it is advised that you not rely
> on these features.
> 
> Luminance and intensity formats are color formats. They are one or two
> channel formats like RED or RG, but they specify particular behavior.
> 
> When a GL_RED format is sampled in a shader, the resulting vec4 is (Red,
> 0, 0, 1). When a GL_INTENSITY format is sampled, the resulting vec4 is
> (I, I, I, I). The single intensity value is read into all four
> components. For GL_LUMINANCE, the result is (L, L, L, 1). There is also
> a two-channel GL_LUMINANCE_ALPHA format, which gives (L, L, L, A).
> 
> v2: luminance -> xxx1, intensity -> , luminance_alpha -> xxxw

If that quote is the expected behaviour for glReadPixels and
glGetTexSubImage, there's a similar stanza to fix in texgetimage.c
(As well as the piglit tests to update.)

With a quick grep through VK-GL-CTS I didn't anything to refer to.
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] mesa: Fix swizzling for luminance/intensity in _mesa_readpixels

2017-07-31 Thread Chris Wilson
Luminance/Intensity when converted to RGB should be replicated to fill
the RGB channels, but they differ on how the alpha channel is filled, as
luminance is set to 1 (unless alpha is supplied) and intensity is
replicated into alpha as well.

https://www.khronos.org/opengl/wiki/Image_Format:

Legacy Image Formats

Warning: This section describes legacy OpenGL APIs that have been
removed from core OpenGL 3.1 and above (they are only deprecated in
OpenGL 3.0). It is recommended that you not use this functionality in
your programs.

As with other deprecated functionality, it is advised that you not rely
on these features.

Luminance and intensity formats are color formats. They are one or two
channel formats like RED or RG, but they specify particular behavior.

When a GL_RED format is sampled in a shader, the resulting vec4 is (Red,
0, 0, 1). When a GL_INTENSITY format is sampled, the resulting vec4 is
(I, I, I, I). The single intensity value is read into all four
components. For GL_LUMINANCE, the result is (L, L, L, 1). There is also
a two-channel GL_LUMINANCE_ALPHA format, which gives (L, L, L, A).

v2: luminance -> xxx1, intensity -> , luminance_alpha -> xxxw

Fixes:5038d839b8e4 ("mesa: use _mesa_format_convert to implement glReadPixels.")
Cc: Iago Toral Quiroga 
Cc: Jason Ekstrand 
---
 src/mesa/main/readpix.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 6ce340ddf9..894b87d826 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -471,17 +471,23 @@ read_rgba_pixels( struct gl_context *ctx,
 * Depending on the base formats involved in the conversion we might need to
 * rebase some values, so for these formats we compute a rebase swizzle.
 */
-   if (rb->_BaseFormat == GL_LUMINANCE || rb->_BaseFormat == GL_INTENSITY) {
+   if (rb->_BaseFormat == GL_LUMINANCE) {
   needs_rebase = true;
   rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_X;
   rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
+   } else if (rb->_BaseFormat == GL_INTENSITY) {
+  needs_rebase = true;
+  rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_X;
} else if (rb->_BaseFormat == GL_LUMINANCE_ALPHA) {
   needs_rebase = true;
   rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_X;
   rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_W;
} else if (_mesa_get_format_base_format(rb_format) != rb->_BaseFormat) {
   needs_rebase =
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Prefer using streaming reads from WC mmaps

2017-07-31 Thread Matt Turner
Yep, seems like a good idea.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Fix swizzling for luminance/intensity in _mesa_readpixels

2017-07-31 Thread Chris Wilson
Luminance/Intensity when converted to RGBA should be replicated to fill
the RGB channels.

Fixes:5038d839b8e4 ("mesa: use _mesa_format_convert to implement glReadPixels.")
Cc: Iago Toral Quiroga 
Cc: Jason Ekstrand 
---
 src/mesa/main/readpix.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 6ce340ddf9..2d6c7521ef 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -474,14 +474,14 @@ read_rgba_pixels( struct gl_context *ctx,
if (rb->_BaseFormat == GL_LUMINANCE || rb->_BaseFormat == GL_INTENSITY) {
   needs_rebase = true;
   rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_X;
   rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
} else if (rb->_BaseFormat == GL_LUMINANCE_ALPHA) {
   needs_rebase = true;
   rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
-  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
-  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+  rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_X;
+  rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_X;
   rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_W;
} else if (_mesa_get_format_base_format(rb_format) != rb->_BaseFormat) {
   needs_rebase =
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] android: link libmesa_intel_common with zlib and expat

2017-07-31 Thread Rob Herring
On Mon, Jul 31, 2017 at 3:45 PM, Emil Velikov  wrote:
> On 31 July 2017 at 09:32, Tapani Pälli  wrote:
>> Makes it possible to build Mesa on Android with -DDEBUG with
>> the next patch that reverts 4f695731.
>>
>> Signed-off-by: Tapani Pälli 
>> ---
>>  src/intel/Android.common.mk | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk
>> index f056f0a..12cea6e 100644
>> --- a/src/intel/Android.common.mk
>> +++ b/src/intel/Android.common.mk
>> @@ -32,10 +32,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
>>  LOCAL_SRC_FILES := $(COMMON_FILES)
>>
>>  LOCAL_C_INCLUDES := \
>> +   external/zlib \
> Ideally this will be part of zlib's LOCAL_EXPORT_C_INCLUDE_DIRS, at
> some point in the future.

FYI, it already is and has been since M. So it depends whether you
care about L support. IMO, with O coming out soon, it's time to drop
L. 3 years/versions of Android support in master seems like plenty to
me.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: simplify intel_image_format_lookup()

2017-07-31 Thread Matt Turner
Both are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Revert "mesa: stop assigning unused storage for non-bindless opaque types"

2017-07-31 Thread Samuel Pitoiset
This reverts commit fcbb93e860246375d03f280f927f79d3645a8988 and
also  commit 7c5b204e38d8cae70f5bf26e7223da5bc448bb5c to avoid
compilation errors.

Basically, the parameter indexes look wrong when a non-bindless
sampler is declared inside a nested struct (because it is skipped).
I think it's safer to just restore the previous behaviour which is
here since ages and also because the initial attempt is only a
little performance improvement.

This fixes a regression with
ES2-CTS.functional.shaders.struct.uniform.sampler_nested*.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101983
Cc: 17.2 
Signed-off-by: Samuel Pitoiset 
---
 src/mesa/program/ir_to_mesa.cpp | 56 +
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ac12b59d07..775211cefb 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2409,8 +2409,10 @@ namespace {
 class add_uniform_to_shader : public program_resource_visitor {
 public:
add_uniform_to_shader(struct gl_shader_program *shader_program,
-struct gl_program_parameter_list *params)
-  : shader_program(shader_program), params(params), idx(-1)
+struct gl_program_parameter_list *params,
+ gl_shader_stage shader_type)
+  : shader_program(shader_program), params(params), idx(-1),
+shader_type(shader_type)
{
   /* empty */
}
@@ -2433,6 +2435,7 @@ private:
struct gl_program_parameter_list *params;
int idx;
ir_variable *var;
+   gl_shader_stage shader_type;
 };
 
 } /* anonymous namespace */
@@ -2444,18 +2447,49 @@ add_uniform_to_shader::visit_field(const glsl_type 
*type, const char *name,
const enum glsl_interface_packing,
bool /* last_field */)
 {
-   /* opaque types don't use storage in the param list unless they are
-* bindless samplers or images.
-*/
-   if (type->contains_opaque() && !var->data.bindless)
+   /* atomics don't get real storage */
+   if (type->contains_atomic())
   return;
 
-   assert(_mesa_lookup_parameter_index(params, name) < 0);
+   gl_register_file file;
+   if (type->without_array()->is_sampler() && !var->data.bindless) {
+  file = PROGRAM_SAMPLER;
+   } else {
+  file = PROGRAM_UNIFORM;
+   }
+
+   int index = _mesa_lookup_parameter_index(params, name);
+   if (index < 0) {
+  unsigned size = type_size(type) * 4;
+
+  index = _mesa_add_parameter(params, file, name, size, type->gl_type,
+ NULL, NULL);
 
-   unsigned size = type_size(type) * 4;
+  /* Sampler uniform values are stored in prog->SamplerUnits,
+   * and the entry in that array is selected by this index we
+   * store in ParameterValues[].
+   */
+  if (file == PROGRAM_SAMPLER) {
+unsigned location;
+const bool found =
+   this->shader_program->UniformHash->get(location,
+  
params->Parameters[index].Name);
+assert(found);
+
+if (!found)
+   return;
+
+struct gl_uniform_storage *storage =
+>shader_program->data->UniformStorage[location];
 
-   int index = _mesa_add_parameter(params, PROGRAM_UNIFORM, name, size,
-   type->gl_type, NULL, NULL);
+ assert(storage->type->is_sampler() &&
+storage->opaque[shader_type].active);
+
+for (unsigned int j = 0; j < size / 4; j++)
+params->ParameterValues[index + j][0].f =
+   storage->opaque[shader_type].index + j;
+  }
+   }
 
/* The first part of the uniform that's processed determines the base
 * location of the whole uniform (for structures).
@@ -2479,7 +2513,7 @@ _mesa_generate_parameters_list_for_uniforms(struct 
gl_shader_program
struct gl_program_parameter_list
*params)
 {
-   add_uniform_to_shader add(shader_program, params);
+   add_uniform_to_shader add(shader_program, params, sh->Stage);
 
foreach_in_list(ir_instruction, node, sh->ir) {
   ir_variable *var = node->as_variable();
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/13] swr/rast: fixes for 32-bit builds

2017-07-31 Thread Emil Velikov
Hi Tim,

Some of the inline functions seem unused.
Very quick search showed the following:

InterpolateComponent
_simd128_abs_ps
_simd_abs_ps

Might be worth cleaning things up, first?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/isl: Tighten up restrictions for CCS on gen7

2017-07-31 Thread Jason Ekstrand
On Mon, Jul 31, 2017 at 1:28 PM, Nanley Chery  wrote:

> On Mon, Jul 31, 2017 at 01:03:38PM -0700, Jason Ekstrand wrote:
> > On Mon, Jul 31, 2017 at 12:11 PM, Nanley Chery 
> > wrote:
> >
> > > On Sat, Jul 22, 2017 at 04:54:24PM -0700, Jason Ekstrand wrote:
> > > > It may technically be possible to enable some sort of fast-clear
> support
> > > > for at least the base slice of a 2D array texture on gen7.  However,
> > > > it's not documented to work, we've never tried to do it in GL, and we
> > > > have no idea what the hardware does if you turn on CCS_D with arrayed
> > > > rendering.  Let's just play it safe and disallow it for now.  If
> someone
> > > > really cares that much about gen7 performance, they can come along
> and
> > > > try to get it working later.
> > > > ---
> > > >  src/intel/isl/isl.c | 34 --
> > > >  1 file changed, 24 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> > > > index 9cf5821..5465496 100644
> > > > --- a/src/intel/isl/isl.c
> > > > +++ b/src/intel/isl/isl.c
> > > > @@ -1749,9 +1749,30 @@ isl_surf_get_ccs_surf(const struct isl_device
> > > *dev,
> > > > if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
> > > >return false;
> > > >
> > > > +   /* The PRM doesn't say this explicitly, but fast-clears don't
> appear
> > > to
> > > > +* work for 3D textures until gen9 where the layout of 3D
> textures
> > > changes
> > > > +* to match 2D array textures.
> > > > +*/
> > > > if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
> > > >return false;
> > > >
> > > > +   /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color
> Clear
> > > of
> > > > +* Non-MultiSampler Render Target Restrictions):
> > > > +*
> > > > +*"Support is for non-mip-mapped and non-array surface types
> > > only."
> > > > +*
> > > > +* This restriction is lifted on gen8+.  Technically, it may be
> > > possible to
> > > > +* create a CCS for an arrayed or mipmapped image and only enable
> > > CCS_D
> > > > +* when rendering to the base slice.  However, there is no
> > > documentation
> > > > +* tell us what the hardware would do in that case or what it
> does
> > > if you
> > > > +* walk off the bases slice.  (Does it ignore CCS or does it
> start
> > > > +* scribbling over random memory?)  We play it safe and just
> follow
> > > the
> > > > +* docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
> > > > +*/
> > > > +   if (ISL_DEV_GEN(dev) <= 7 &&
> > > > +   (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
> > > > +  return false;
> > > > +
> > >
> > > Why are mipmapped surfaces unsafe? A user is restricted to rendering
> > > into one miplevel at a time.
> > >
> >
> > Same two reasons:
> >
> >  1) The docs say it doesn't work.
> >  2) The miptree layout may be complene nonsense when it comes to the CCS.
> > On gen8+, the surface is re-laid-out for the auxiliary surface but, on
> > gen7, they try to do a direct translation of x/y coordinates in the main
> > surface to x/y in the CCS.
> >
> > --Jason
> >
> >
>
> Sorry, my question wasn't clear. I meant to ask the following: why
> disable CCS completely for mipmapped surfaces instead of enabling it
> only for the base miplevel?
>

Right.  Yes, we could probably enable it in that case.  I'm reasonably
happy to turn it on and just restrict it like you did before.  I was just
trying to get something that I knew worked so we could land the anv patches
in time.

--Jason


> > > -Nanley
> > >
> > > > if (isl_format_is_compressed(surf->format))
> > > >return false;
> > > >
> > > > @@ -1789,21 +1810,14 @@ isl_surf_get_ccs_surf(const struct isl_device
> > > *dev,
> > > >return false;
> > > > }
> > > >
> > > > -   /* Multi-LOD and multi-layer CCS isn't supported on gen7. */
> > > > -   const uint8_t levels = ISL_DEV_GEN(dev) <= 7 ? 1 : surf->levels;
> > > > -   const uint32_t array_len = ISL_DEV_GEN(dev) <= 7 ?
> > > > -  1 : surf->logical_level0_px.array_
> len;
> > > > -   const uint32_t depth = ISL_DEV_GEN(dev) <= 7 ?
> > > > -  1 : surf->logical_level0_px.depth;
> > > > -
> > > > return isl_surf_init(dev, ccs_surf,
> > > >  .dim = surf->dim,
> > > >  .format = ccs_format,
> > > >  .width = surf->logical_level0_px.width,
> > > >  .height = surf->logical_level0_px.height,
> > > > -.depth = depth,
> > > > -.levels = levels,
> > > > -.array_len = array_len,
> > > > +.depth = surf->logical_level0_px.depth,
> > > > +.levels = surf->levels,
> > > > +.array_len = surf->logical_level0_px.array_
> len,
> 

Re: [Mesa-dev] [PATCH] swr: Add arch flags to support Cray and PGI compilers

2017-07-31 Thread Chuck Atkins
Hi Tim,


> If the Cray flags are for wrapper scripts, why do we need specific flags
> for that instead of using the underlying compiler flags?
>

Sort answer: It's the "Cray" way of doing things.

Long answer: The target-cpu flag sometimes just controlls the -march flags
(or equiv) but it can also add other low level flags.  By using the
target-cpu flag with the cray compiler wrappers, you ensure that you're
using whatever flags for a given architecture are appropriate for the
underlying compiler, even if you don't have that compiler knowledge
specified encoded anywhere in your configure.  For instance, when using
another compiler backend that ./configure isn't explicitly checking for
(pathscale, actual cray compiler, etc.), then the build will continue to
work because -target-cpu gets translated by the wrpper to whatever is
appropriate.  You'll also get a default set of flags loaded anyways based
on your module environment.  Specifying target-cpu replaces those default
flags whereas adding -xCORE-AVX512 would just append to them, maybe
overriding the default flags, maybe not, depending on how the module
environment is set up.  It's one of the many quirks and oddities of the
Cray Programming Environment.



> I’m guessing you intend this for the 17.2 branch as well?
>

Nope.  I've no pressing customer need for it so keeping it in master but
out of stable is fine with me.


--
Chuck Atkins
Staff R Engineer, Scientific Computing
Kitware, Inc.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] intel: move gen_decoder.* back to COMMON_FILES

2017-07-31 Thread Emil Velikov
On 31 July 2017 at 09:32, Tapani Pälli  wrote:
> this change reverts commit 4f695731, we want to be able to build
> with -DDEBUG and gen_decoder on Android.
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/Makefile.common.am | 2 +-
>  src/intel/Makefile.sources   | 6 ++
>  2 files changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/src/intel/Makefile.common.am b/src/intel/Makefile.common.am
> index 1c1c2ee..49e9c6a 100644
> --- a/src/intel/Makefile.common.am
> +++ b/src/intel/Makefile.common.am
> @@ -22,7 +22,7 @@
>  noinst_LTLIBRARIES += common/libintel_common.la
>
>  common_libintel_common_la_CFLAGS = $(AM_CFLAGS) $(LIBDRM_CFLAGS)
> -common_libintel_common_la_SOURCES = $(COMMON_FILES) $(DECODER_FILES)
> +common_libintel_common_la_SOURCES = $(COMMON_FILES)
>
>  if HAVE_PLATFORM_ANDROID
>  common_libintel_common_la_CFLAGS += $(ANDROID_CFLAGS)
> diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
> index 2b3065e..10abef6 100644
> --- a/src/intel/Makefile.sources
> +++ b/src/intel/Makefile.sources
> @@ -11,6 +11,8 @@ COMMON_FILES = \
> common/gen_clflush.h \
> common/gen_debug.c \
> common/gen_debug.h \
> +   common/gen_decoder.c \
> +   common/gen_decoder.h \
> common/gen_device_info.c \
> common/gen_device_info.h \
> common/gen_l3_config.c \
> @@ -18,10 +20,6 @@ COMMON_FILES = \
> common/gen_urb_config.c \
> common/gen_sample_positions.h
>
> -DECODER_FILES = \
> -   common/gen_decoder.h \
> -   common/gen_decoder.c
> -
Personally I would keep things as-is and add $(DECODER_FILES) to the
Android build.

Regardless if you opt for that route or not:
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] android: link libmesa_intel_common with zlib and expat

2017-07-31 Thread Emil Velikov
On 31 July 2017 at 09:32, Tapani Pälli  wrote:
> Makes it possible to build Mesa on Android with -DDEBUG with
> the next patch that reverts 4f695731.
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/Android.common.mk | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk
> index f056f0a..12cea6e 100644
> --- a/src/intel/Android.common.mk
> +++ b/src/intel/Android.common.mk
> @@ -32,10 +32,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
>  LOCAL_SRC_FILES := $(COMMON_FILES)
>
>  LOCAL_C_INCLUDES := \
> +   external/zlib \
Ideally this will be part of zlib's LOCAL_EXPORT_C_INCLUDE_DIRS, at
some point in the future.

Regardless, patch looks good:
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Android: fix xmlconfig build

2017-07-31 Thread Emil Velikov
On 31 July 2017 at 16:46, Rob Herring  wrote:
> Commit 601093f95ddf ("xmlconfig: move into src/util") broke the Android
> build due to missing libexpat dependency:
>
> external/mesa3d/src/util/xmlconfig.c:34:10: fatal error: 'expat.h' file not 
> found
>
> Fixes: 601093f95ddf ("xmlconfig: move into src/util")
> Cc: Nicolai Hähnle 
> Signed-off-by: Rob Herring 
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: Update feature list for GL 4.6

2017-07-31 Thread Adam Jackson
On Mon, 2017-07-31 at 13:05 -0700, Matt Turner wrote:

> > @@ -221,6 +221,20 @@ GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi
> >GL_KHR_robustness DONE (i965)
> >GL_EXT_shader_integer_mix DONE (all drivers 
> > that support GLSL)
> > 
> > +GL 4.6, GLSL 4.60
> > +
> > +  GL_ARB_gl_spirv   not started
> 
> Nicolai and Ian are working on this.
> 
> > +  GL_ARB_gl_spirv_extensionsnot started
> 
> And presumably this one too.

Noted.

> > +  GL_ARB_indirect_parametersDONE (nvc0, 
> > radeonsi)
> > +  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
> > radeonsi, softpipe, swr)
> > +  GL_ARB_polygon_offset_clamp   not started
> > +  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
> > nvc0, radeonsi, softpipe)
> > +  GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
> > radeonsi)
> > +  GL_ARB_shader_group_vote  DONE (nvc0, 
> > radeonsi)
> 
> I implemented this one recently on i965, but I didn't realize it was
> listed below. Doesn't matter if it's fixed in this patch.

My apologies for missing that, I'd written this patch against a stale
checkout.

Fixed both the above issues and merged, thanks.

- ajax
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/isl: Tighten up restrictions for CCS on gen7

2017-07-31 Thread Nanley Chery
On Mon, Jul 31, 2017 at 01:03:38PM -0700, Jason Ekstrand wrote:
> On Mon, Jul 31, 2017 at 12:11 PM, Nanley Chery 
> wrote:
> 
> > On Sat, Jul 22, 2017 at 04:54:24PM -0700, Jason Ekstrand wrote:
> > > It may technically be possible to enable some sort of fast-clear support
> > > for at least the base slice of a 2D array texture on gen7.  However,
> > > it's not documented to work, we've never tried to do it in GL, and we
> > > have no idea what the hardware does if you turn on CCS_D with arrayed
> > > rendering.  Let's just play it safe and disallow it for now.  If someone
> > > really cares that much about gen7 performance, they can come along and
> > > try to get it working later.
> > > ---
> > >  src/intel/isl/isl.c | 34 --
> > >  1 file changed, 24 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> > > index 9cf5821..5465496 100644
> > > --- a/src/intel/isl/isl.c
> > > +++ b/src/intel/isl/isl.c
> > > @@ -1749,9 +1749,30 @@ isl_surf_get_ccs_surf(const struct isl_device
> > *dev,
> > > if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
> > >return false;
> > >
> > > +   /* The PRM doesn't say this explicitly, but fast-clears don't appear
> > to
> > > +* work for 3D textures until gen9 where the layout of 3D textures
> > changes
> > > +* to match 2D array textures.
> > > +*/
> > > if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
> > >return false;
> > >
> > > +   /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear
> > of
> > > +* Non-MultiSampler Render Target Restrictions):
> > > +*
> > > +*"Support is for non-mip-mapped and non-array surface types
> > only."
> > > +*
> > > +* This restriction is lifted on gen8+.  Technically, it may be
> > possible to
> > > +* create a CCS for an arrayed or mipmapped image and only enable
> > CCS_D
> > > +* when rendering to the base slice.  However, there is no
> > documentation
> > > +* tell us what the hardware would do in that case or what it does
> > if you
> > > +* walk off the bases slice.  (Does it ignore CCS or does it start
> > > +* scribbling over random memory?)  We play it safe and just follow
> > the
> > > +* docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
> > > +*/
> > > +   if (ISL_DEV_GEN(dev) <= 7 &&
> > > +   (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
> > > +  return false;
> > > +
> >
> > Why are mipmapped surfaces unsafe? A user is restricted to rendering
> > into one miplevel at a time.
> >
> 
> Same two reasons:
> 
>  1) The docs say it doesn't work.
>  2) The miptree layout may be complene nonsense when it comes to the CCS.
> On gen8+, the surface is re-laid-out for the auxiliary surface but, on
> gen7, they try to do a direct translation of x/y coordinates in the main
> surface to x/y in the CCS.
> 
> --Jason
> 
> 

Sorry, my question wasn't clear. I meant to ask the following: why
disable CCS completely for mipmapped surfaces instead of enabling it
only for the base miplevel?

> > -Nanley
> >
> > > if (isl_format_is_compressed(surf->format))
> > >return false;
> > >
> > > @@ -1789,21 +1810,14 @@ isl_surf_get_ccs_surf(const struct isl_device
> > *dev,
> > >return false;
> > > }
> > >
> > > -   /* Multi-LOD and multi-layer CCS isn't supported on gen7. */
> > > -   const uint8_t levels = ISL_DEV_GEN(dev) <= 7 ? 1 : surf->levels;
> > > -   const uint32_t array_len = ISL_DEV_GEN(dev) <= 7 ?
> > > -  1 : surf->logical_level0_px.array_len;
> > > -   const uint32_t depth = ISL_DEV_GEN(dev) <= 7 ?
> > > -  1 : surf->logical_level0_px.depth;
> > > -
> > > return isl_surf_init(dev, ccs_surf,
> > >  .dim = surf->dim,
> > >  .format = ccs_format,
> > >  .width = surf->logical_level0_px.width,
> > >  .height = surf->logical_level0_px.height,
> > > -.depth = depth,
> > > -.levels = levels,
> > > -.array_len = array_len,
> > > +.depth = surf->logical_level0_px.depth,
> > > +.levels = surf->levels,
> > > +.array_len = surf->logical_level0_px.array_len,
> > >  .samples = 1,
> > >  .row_pitch = row_pitch,
> > >  .usage = ISL_SURF_USAGE_CCS_BIT,
> > > --
> > > 2.5.0.400.gff86faf
> > >
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/targets: Fix d3dadapter9 build after xmlconfig move.

2017-07-31 Thread Emil Velikov
On 31 July 2017 at 20:57, Bas Nieuwenhuizen  wrote:
> Signed-off-by: Bas Nieuwenhuizen 
> Fixes: 601093f95dd "xmlconfig: move into src/util"
> Cc: Nicolai Hähnle 

I forgot that nine is a bit different than the rest. Thanks for catching this.

Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/13] swr/rast: split gen_knobs template into .cpp and .h files

2017-07-31 Thread Emil Velikov
Hi Tim,

What's the goal behind the split. Please add a couple of words in the
commit message.

On 31 July 2017 at 20:40, Tim Rowley  wrote:
> ---
>  src/gallium/drivers/swr/Makefile.am|   3 +-
>  src/gallium/drivers/swr/SConscript |   4 +-
>  .../drivers/swr/rasterizer/codegen/gen_knobs.py|  14 +-
>  .../swr/rasterizer/codegen/templates/gen_knobs.cpp | 112 +---
>  .../swr/rasterizer/codegen/templates/gen_knobs.h   | 147 
> +
>  .../drivers/swr/rasterizer/core/knobs_init.h   |  12 +-
>  6 files changed, 166 insertions(+), 126 deletions(-)
>  create mode 100644 
> src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
>
> diff --git a/src/gallium/drivers/swr/Makefile.am 
> b/src/gallium/drivers/swr/Makefile.am
> index 73fe904..b20f128 100644
> --- a/src/gallium/drivers/swr/Makefile.am
> +++ b/src/gallium/drivers/swr/Makefile.am
> @@ -115,7 +115,7 @@ rasterizer/codegen/gen_knobs.cpp: 
> rasterizer/codegen/gen_knobs.py rasterizer/cod
> --output rasterizer/codegen/gen_knobs.cpp \
> --gen_cpp
>
> -rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py 
> rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp 
> rasterizer/codegen/gen_common.py
> +rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py 
> rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.h 
> rasterizer/codegen/gen_common.py
> $(MKDIR_GEN)
> $(PYTHON_GEN) \
> $(srcdir)/rasterizer/codegen/gen_knobs.py \
> @@ -347,5 +347,6 @@ EXTRA_DIST = \
> rasterizer/codegen/templates/gen_builder.hpp \
> rasterizer/codegen/templates/gen_header_init.hpp \
> rasterizer/codegen/templates/gen_knobs.cpp \
> +   rasterizer/codegen/templates/gen_knobs.h \
> rasterizer/codegen/templates/gen_llvm.hpp \
> rasterizer/codegen/templates/gen_rasterizer.cpp
> diff --git a/src/gallium/drivers/swr/SConscript 
> b/src/gallium/drivers/swr/SConscript
> index a32807d..b394cbc 100644
> --- a/src/gallium/drivers/swr/SConscript
> +++ b/src/gallium/drivers/swr/SConscript
> @@ -53,8 +53,8 @@ env.CodeGenerate(
>  source = '',
>  command = python_cmd + ' $SCRIPT --output $TARGET --gen_h'
>  )
> -Depends('rasterizer/codegen/gen_knobs.cpp',
Seems like this should have been gen_knobs.h in the first place - oops :-)

> -swrroot + 'rasterizer/codegen/templates/gen_knobs.cpp')
> +Depends('rasterizer/codegen/gen_knobs.h',
> +swrroot + 'rasterizer/codegen/templates/gen_knobs.h')
>

The build bits are
Reviewed-by: Emil Velikov 

> --- /dev/null
> +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
> @@ -0,0 +1,147 @@
> +/**
> +*
> +* Copyright 2015-2017
> +* Intel Corporation
> +*
> +* Licensed under the Apache License, Version 2.0 (the "License");
> +* you may not use this file except in compliance with the License.
> +* You may obtain a copy of the License at
> +*
> +* http ://www.apache.org/licenses/LICENSE-2.0
> +*
I'm not a lawyer so I'm not sure if having Apache licensed code is
fine with rest of Mesa.

Considering that rest of SWR (barring the original gen_knobs.cpp where
this is comes from) uses MIT X11/Expat I'd stay consistent and
re-license this/these files.
If possible, of course.


> --- a/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
> @@ -91,16 +91,18 @@ static inline void ConvertEnvToKnob(const char* 
> pOverride, std::string& knobValu
>  template 
>  static inline void InitKnob(T& knob)
>  {
> -
> -// TODO, read registry first
> -
> -// Second, read environment variables
> +// Read environment variables
>  const char* pOverride = getenv(knob.Name());
>
>  if (pOverride)
>  {
> -auto knobValue = knob.Value();
> +auto knobValue = knob.DefaultValue();
>  ConvertEnvToKnob(pOverride, knobValue);
>  knob.Value(knobValue);
>  }
> +else
> +{
> +// Set default value
> +knob.Value(knob.DefaultValue());
This and the underlying code seems to have changed a bit.

Would be nice to keep "dummy split" and functionality changes as
separate patches.
Then again: it's not my code, so please don't read too much into my suggestion.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: Add arch flags to support Cray and PGI compilers

2017-07-31 Thread Rowley, Timothy O
If the Cray flags are for wrapper scripts, why do we need specific flags for 
that instead of using the underlying compiler flags?

I’m guessing you intend this for the 17.2 branch as well?

-Tim

> On Jul 31, 2017, at 2:53 PM, Chuck Atkins  wrote:
> 
> Note that the Cray flags (-target-cpu=) need to come first since the
> cray programming environment uses wappers around other compilers.  By
> checking the wrapper flags first, you can be sure to match the wrapper
> flag instead of the underlying compiler (gcc, intel, pgi, etc.) flags.
> 
> Signed-off-by: Chuck Atkins 
> Cc: Tim Rowley 
> ---
> configure.ac | 8 
> 1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 6302aa2b0c..3b45baf6d0 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -2511,7 +2511,7 @@ if test -n "$with_gallium_drivers"; then
> AC_SUBST([SWR_CXX11_CXXFLAGS])
> 
> swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
> -",-mavx,-march=core-avx" \
> +
> ",-target-cpu=sandybridge,-mavx,-march=core-avx,-tp=sandybridge" \
> SWR_AVX_CXXFLAGS
> AC_SUBST([SWR_AVX_CXXFLAGS])
> 
> @@ -2523,21 +2523,21 @@ if test -n "$with_gallium_drivers"; then
> ;;
> xavx2)
> swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
> -",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
> +",-target-cpu=haswell,-mavx2 -mfma -mbmi2 
> -mf16c,-march=core-avx2,-tp=haswell" \
> SWR_AVX2_CXXFLAGS
> AC_SUBST([SWR_AVX2_CXXFLAGS])
> HAVE_SWR_AVX2=yes
> ;;
> xknl)
> swr_require_cxx_feature_flags "KNL" "defined(__AVX512F__) 
> && defined(__AVX512ER__)" \
> -",-march=knl,-xMIC-AVX512" \
> +",-target-cpu=mic-knl,-march=knl,-xMIC-AVX512" \
> SWR_KNL_CXXFLAGS
> AC_SUBST([SWR_KNL_CXXFLAGS])
> HAVE_SWR_KNL=yes
> ;;
> xskx)
> swr_require_cxx_feature_flags "SKX" "defined(__AVX512F__) 
> && defined(__AVX512BW__)" \
> -",-march=skylake-avx512,-xCORE-AVX512" \
> +
> ",-target-cpu=x86-skylake,-march=skylake-avx512,-xCORE-AVX512" \
> SWR_SKX_CXXFLAGS
> AC_SUBST([SWR_SKX_CXXFLAGS])
> HAVE_SWR_SKX=yes
> -- 
> 2.13.3
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: Update feature list for GL 4.6

2017-07-31 Thread Matt Turner
On Mon, Jul 31, 2017 at 9:43 AM, Adam Jackson  wrote:
> ARB_polygon_offset_clamp and ARB_texture_filter_anisotropic look like
> they'd be pretty trivial to wire up.
>
> Signed-off-by: Adam Jackson 
> ---
>  docs/features.txt | 22 ++
>  1 file changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 79b71de543..3db22e47e2 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -221,6 +221,20 @@ GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi
>GL_KHR_robustness DONE (i965)
>GL_EXT_shader_integer_mix DONE (all drivers 
> that support GLSL)
>
> +GL 4.6, GLSL 4.60
> +
> +  GL_ARB_gl_spirv   not started

Nicolai and Ian are working on this.

> +  GL_ARB_gl_spirv_extensionsnot started

And presumably this one too.

> +  GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
> +  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
> radeonsi, softpipe, swr)
> +  GL_ARB_polygon_offset_clamp   not started
> +  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
> nvc0, radeonsi, softpipe)
> +  GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
> radeonsi)
> +  GL_ARB_shader_group_vote  DONE (nvc0, radeonsi)

I implemented this one recently on i965, but I didn't realize it was
listed below. Doesn't matter if it's fixed in this patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/isl: Tighten up restrictions for CCS on gen7

2017-07-31 Thread Jason Ekstrand
On Mon, Jul 31, 2017 at 12:11 PM, Nanley Chery 
wrote:

> On Sat, Jul 22, 2017 at 04:54:24PM -0700, Jason Ekstrand wrote:
> > It may technically be possible to enable some sort of fast-clear support
> > for at least the base slice of a 2D array texture on gen7.  However,
> > it's not documented to work, we've never tried to do it in GL, and we
> > have no idea what the hardware does if you turn on CCS_D with arrayed
> > rendering.  Let's just play it safe and disallow it for now.  If someone
> > really cares that much about gen7 performance, they can come along and
> > try to get it working later.
> > ---
> >  src/intel/isl/isl.c | 34 --
> >  1 file changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> > index 9cf5821..5465496 100644
> > --- a/src/intel/isl/isl.c
> > +++ b/src/intel/isl/isl.c
> > @@ -1749,9 +1749,30 @@ isl_surf_get_ccs_surf(const struct isl_device
> *dev,
> > if (surf->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)
> >return false;
> >
> > +   /* The PRM doesn't say this explicitly, but fast-clears don't appear
> to
> > +* work for 3D textures until gen9 where the layout of 3D textures
> changes
> > +* to match 2D array textures.
> > +*/
> > if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
> >return false;
> >
> > +   /* From the HSW PRM Volume 7: 3D-Media-GPGPU, page 652 (Color Clear
> of
> > +* Non-MultiSampler Render Target Restrictions):
> > +*
> > +*"Support is for non-mip-mapped and non-array surface types
> only."
> > +*
> > +* This restriction is lifted on gen8+.  Technically, it may be
> possible to
> > +* create a CCS for an arrayed or mipmapped image and only enable
> CCS_D
> > +* when rendering to the base slice.  However, there is no
> documentation
> > +* tell us what the hardware would do in that case or what it does
> if you
> > +* walk off the bases slice.  (Does it ignore CCS or does it start
> > +* scribbling over random memory?)  We play it safe and just follow
> the
> > +* docs and don't allow CCS_D for arrayed or mip-mapped surfaces.
> > +*/
> > +   if (ISL_DEV_GEN(dev) <= 7 &&
> > +   (surf->levels > 1 || surf->logical_level0_px.array_len > 1))
> > +  return false;
> > +
>
> Why are mipmapped surfaces unsafe? A user is restricted to rendering
> into one miplevel at a time.
>

Same two reasons:

 1) The docs say it doesn't work.
 2) The miptree layout may be complene nonsense when it comes to the CCS.
On gen8+, the surface is re-laid-out for the auxiliary surface but, on
gen7, they try to do a direct translation of x/y coordinates in the main
surface to x/y in the CCS.

--Jason


> -Nanley
>
> > if (isl_format_is_compressed(surf->format))
> >return false;
> >
> > @@ -1789,21 +1810,14 @@ isl_surf_get_ccs_surf(const struct isl_device
> *dev,
> >return false;
> > }
> >
> > -   /* Multi-LOD and multi-layer CCS isn't supported on gen7. */
> > -   const uint8_t levels = ISL_DEV_GEN(dev) <= 7 ? 1 : surf->levels;
> > -   const uint32_t array_len = ISL_DEV_GEN(dev) <= 7 ?
> > -  1 : surf->logical_level0_px.array_len;
> > -   const uint32_t depth = ISL_DEV_GEN(dev) <= 7 ?
> > -  1 : surf->logical_level0_px.depth;
> > -
> > return isl_surf_init(dev, ccs_surf,
> >  .dim = surf->dim,
> >  .format = ccs_format,
> >  .width = surf->logical_level0_px.width,
> >  .height = surf->logical_level0_px.height,
> > -.depth = depth,
> > -.levels = levels,
> > -.array_len = array_len,
> > +.depth = surf->logical_level0_px.depth,
> > +.levels = surf->levels,
> > +.array_len = surf->logical_level0_px.array_len,
> >  .samples = 1,
> >  .row_pitch = row_pitch,
> >  .usage = ISL_SURF_USAGE_CCS_BIT,
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Add arch flags to support Cray and PGI compilers

2017-07-31 Thread Chuck Atkins
Note that the Cray flags (-target-cpu=) need to come first since the
cray programming environment uses wappers around other compilers.  By
checking the wrapper flags first, you can be sure to match the wrapper
flag instead of the underlying compiler (gcc, intel, pgi, etc.) flags.

Signed-off-by: Chuck Atkins 
Cc: Tim Rowley 
---
 configure.ac | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6302aa2b0c..3b45baf6d0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2511,7 +2511,7 @@ if test -n "$with_gallium_drivers"; then
 AC_SUBST([SWR_CXX11_CXXFLAGS])
 
 swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
-",-mavx,-march=core-avx" \
+
",-target-cpu=sandybridge,-mavx,-march=core-avx,-tp=sandybridge" \
 SWR_AVX_CXXFLAGS
 AC_SUBST([SWR_AVX_CXXFLAGS])
 
@@ -2523,21 +2523,21 @@ if test -n "$with_gallium_drivers"; then
 ;;
 xavx2)
 swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
-",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+",-target-cpu=haswell,-mavx2 -mfma -mbmi2 
-mf16c,-march=core-avx2,-tp=haswell" \
 SWR_AVX2_CXXFLAGS
 AC_SUBST([SWR_AVX2_CXXFLAGS])
 HAVE_SWR_AVX2=yes
 ;;
 xknl)
 swr_require_cxx_feature_flags "KNL" "defined(__AVX512F__) 
&& defined(__AVX512ER__)" \
-",-march=knl,-xMIC-AVX512" \
+",-target-cpu=mic-knl,-march=knl,-xMIC-AVX512" \
 SWR_KNL_CXXFLAGS
 AC_SUBST([SWR_KNL_CXXFLAGS])
 HAVE_SWR_KNL=yes
 ;;
 xskx)
 swr_require_cxx_feature_flags "SKX" "defined(__AVX512F__) 
&& defined(__AVX512BW__)" \
-",-march=skylake-avx512,-xCORE-AVX512" \
+
",-target-cpu=x86-skylake,-march=skylake-avx512,-xCORE-AVX512" \
 SWR_SKX_CXXFLAGS
 AC_SUBST([SWR_SKX_CXXFLAGS])
 HAVE_SWR_SKX=yes
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/targets: Fix d3dadapter9 build after xmlconfig move.

2017-07-31 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
Fixes: 601093f95dd "xmlconfig: move into src/util"
Cc: Nicolai Hähnle 
---
 src/gallium/targets/d3dadapter9/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/targets/d3dadapter9/Makefile.am 
b/src/gallium/targets/d3dadapter9/Makefile.am
index 824e8d6b932..9357d30332a 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -27,6 +27,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
-I$(top_srcdir)/src/mapi/ \
-I$(top_srcdir)/src/mesa/ \
+   -I$(top_builddir)/src/util \
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
-I$(top_srcdir)/src/gallium/winsys \
-I$(top_srcdir)/src/gallium/state_trackers/nine \
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa v2] util/ra: fix memory leak

2017-07-31 Thread Eric Anholt
Eric Engestrom  writes:

> From: Eric Engestrom 
>
> CID: 1415909
> Fixes: 7a34a0e8903249c41fae "ra: Add a callback for selecting a register
>  from what's available."
> Signed-off-by: Eric Engestrom 

Reviewed and pushed.  Thanks!


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: Update feature list for GL 4.6

2017-07-31 Thread Samuel Pitoiset



On 07/31/2017 06:43 PM, Adam Jackson wrote:

ARB_polygon_offset_clamp and ARB_texture_filter_anisotropic look like
they'd be pretty trivial to wire up.


And KHR_no_error is mostly done as well. :)

Reviewed-by: Samuel Pitoiset 



Signed-off-by: Adam Jackson 
---
  docs/features.txt | 22 ++
  1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 79b71de543..3db22e47e2 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -221,6 +221,20 @@ GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi
GL_KHR_robustness DONE (i965)
GL_EXT_shader_integer_mix DONE (all drivers 
that support GLSL)
  
+GL 4.6, GLSL 4.60

+
+  GL_ARB_gl_spirv   not started
+  GL_ARB_gl_spirv_extensionsnot started
+  GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
+  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, softpipe, swr)
+  GL_ARB_polygon_offset_clamp   not started
+  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
+  GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
+  GL_ARB_shader_group_vote  DONE (nvc0, radeonsi)
+  GL_ARB_texture_filter_anisotropic not started
+  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+)
+  GL_KHR_no_error   started (Timothy 
Arceri)
+
  These are the extensions cherry-picked to make GLES 3.1
  GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
  
@@ -282,20 +296,14 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve

GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
GL_ARB_fragment_shader_interlock  not started
-  GL_ARB_gl_spirv   not started
GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
nvc0, radeonsi, softpipe, llvmpipe)
-  GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
GL_ARB_parallel_shader_compilenot started, but 
Chia-I Wu did some related work in 2014
-  GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, softpipe, swr)
GL_ARB_post_depth_coverageDONE (i965)
GL_ARB_robustness_isolation   not started
GL_ARB_sample_locations   not started
GL_ARB_seamless_cubemap_per_texture   DONE (i965, nvc0, 
radeonsi, r600, softpipe, swr)
-  GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
GL_ARB_shader_ballot  DONE (nvc0, radeonsi)
GL_ARB_shader_clock   DONE (i965/gen7+, 
nv50, nvc0, radeonsi)
-  GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
-  GL_ARB_shader_group_vote  DONE (nvc0, radeonsi)
GL_ARB_shader_stencil_export  DONE (i965/gen9+, 
radeonsi, softpipe, llvmpipe, swr)
GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+, 
nvc0, radeonsi)
GL_ARB_sparse_buffer  DONE (radeonsi/CIK+)
@@ -303,9 +311,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
GL_ARB_sparse_texture2not started
GL_ARB_sparse_texture_clamp   not started
GL_ARB_texture_filter_minmax  not started
-  GL_ARB_transform_feedback_overflow_query  DONE (i965/gen6+)
GL_KHR_blend_equation_advanced_coherent   DONE (i965/gen9+)
-  GL_KHR_no_error   started (Timothy 
Arceri)
GL_KHR_texture_compression_astc_hdr   DONE (i965/bxt)
GL_KHR_texture_compression_astc_sliced_3d DONE (i965/gen9+)
GL_OES_depth_texture_cube_map DONE (all drivers 
that support GLSL 1.30+)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101981] Commit ddc32537d6db69198e88ef0dfe19770bf9daa536 breaks rendering in multiple applications

2017-07-31 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101981

--- Comment #3 from Samuel Pitoiset  ---
Yeah, I have just sent a fix
https://lists.freedesktop.org/archives/mesa-dev/2017-July/164829.html

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: fix bad cast conversions in viewport()

2017-07-31 Thread Samuel Pitoiset
Fixes: ddc32537d6 ("mesa: clamp viewport values only once when using 
glViewport()")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101981
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101989
Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/viewport.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
index 3dce320d1d..fc384909e6 100644
--- a/src/mesa/main/viewport.c
+++ b/src/mesa/main/viewport.c
@@ -94,9 +94,10 @@ static void
 viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width,
  GLsizei height)
 {
+   struct gl_viewport_inputs input = { x, y, width, height };
+
/* Clamp the viewport to the implementation dependent values. */
-   clamp_viewport(ctx, (GLfloat *), (GLfloat *),
-  (GLfloat *), (GLfloat *));
+   clamp_viewport(ctx, , , , );
 
/* The GL_ARB_viewport_array spec says:
 *
@@ -110,7 +111,7 @@ viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei 
width,
 * signal the driver once at the end.
 */
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++)
-  set_viewport_no_notify(ctx, i, x, y, width, height);
+  set_viewport_no_notify(ctx, i, input.X, input.Y, input.Width, 
input.Height);
 
if (ctx->Driver.Viewport)
   ctx->Driver.Viewport(ctx);
-- 
2.13.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/13] swr/rast: fix movemask_ps / movemask_pd on AVX512

2017-07-31 Thread Tim Rowley
---
 src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 1001417..1dbfff8 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -554,15 +554,20 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer 
a)
 
 static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
 {
-__mmask8 m = _mm512_test_epi64_mask(castpd_si(a), set1_epi32(-1));
+__mmask8 m = _mm512_test_epi64_mask(castpd_si(a), 
set1_epi64(0x8000LL));
 return static_cast(m);
 }
 static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a)
 {
-__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(-1));
+__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x800));
 return static_cast(m);
 }
 
+static SIMDINLINE Integer SIMDCALL set1_epi64(long long i) // return i (all 
elements are same value)
+{
+return _mm512_set1_epi64(i);
+}
+
 static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements 
are same value)
 {
 return _mm512_set1_epi32(i);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/13] swr/rast: fix core / knights split of AVX512 intrinsics

2017-07-31 Thread Tim Rowley
Move AVX512BW specific intrinics to be Core-only.

Move some AVX512F intrinsics back to common implementation file.
---
 .../drivers/swr/rasterizer/common/simdlib.hpp  |  2 +
 .../swr/rasterizer/common/simdlib_512_avx512.inl   | 53 +
 .../rasterizer/common/simdlib_512_avx512_core.inl  | 54 ++
 .../common/simdlib_512_avx512_knights.inl  | 15 --
 4 files changed, 69 insertions(+), 55 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp 
b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
index 22d7da4..500cf8a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -214,6 +214,8 @@ struct SIMDBase : Traits::IsaImpl
 using Vec4  = typename Traits::Vec4;
 using Mask  = typename Traits::Mask;
 
+static const size_t VECTOR_BYTES = sizeof(Float);
+
 // Populates a SIMD Vec4 from a non-simd vector. So p = xyzw becomes  
  .
 static SIMDINLINE
 void vec4_load1_ps(Vec4& r, const float *p)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 1dbfff8..95e4c31 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -158,6 +158,11 @@ private:
 return _mm512_maskz_set1_epi32(m, -1);
 }
 
+static SIMDINLINE Integer vmask(__mmask8 m)
+{
+return _mm512_maskz_set1_epi64(m, -1LL);
+}
+
 public:
 //---
 // Single precision floating point arithmetic operations
@@ -187,8 +192,8 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return 
round_ps 0xff) ? 0xff : (a + b) 
(uint8) 
+//SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
+//SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) 
(uint8) 
 SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
 SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
 SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -202,7 +207,7 @@ SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
 SIMD_IWRAPPER_2(mullo_epi32);
 SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
 SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+//SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
 
 //---
 // Logical operations
@@ -276,7 +281,7 @@ static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // 
return (float)a(i
 return _mm512_cvtepi32_ps(a);
 }
 
-SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a(uint8 --> int16)
+//SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a(uint8 --> int16)
 SIMD_IWRAPPER_1_4(cvtepu8_epi32); // return (int32)a(uint8 --> int32)
 SIMD_IWRAPPER_1_8(cvtepu16_epi32);// return (int32)a(uint16 --> int32)
 SIMD_IWRAPPER_1_4(cvtepu16_epi64);// return (int64)a(uint16 --> int64)
@@ -317,20 +322,6 @@ static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float 
b) { return cmp_ps(a, b); }
 
 template
-static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
-{
-// Legacy vector mask generator
-__mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast(CmpTypeT));
-return vmask(result);
-}
-template
-static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
-{
-// Legacy vector mask generator
-__mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast(CmpTypeT));
-return vmask(result);
-}
-template
 static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b)
 {
 // Legacy vector mask generator
@@ -345,12 +336,12 @@ static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, 
Integer b)
 return vmask(result);
 }
 
-SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8);// return a 
== b (int8)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16);   // return a 
== b (int16)
+//SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8);// return 
a == b (int8)
+//SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16);   // return 
a == b (int16)
 SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32);   // return a 
== b (int32)
 SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64);   // return a 
== b (int64)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8);// return a 
> b (int8)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16);   // return a 
> b (int16)
+//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8);// return 
a > b (int8)
+//SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16);   // return 
a > b (int16)
 SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32);   // return a 
> b (int32)
 SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64);   // return a 
> b (int64)
 SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32);   // return a 
< b (int32)
@@ -458,7 +449,7 @@ 

[Mesa-dev] [PATCH 08/13] swr/rast: rename frontend pVertexStore

2017-07-31 Thread Tim Rowley
Rename to reflect global nature.
---
 src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index f9eda83..e51f967 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1332,7 +1332,7 @@ static void TessellationStages(
 TSDestroyCtx(tsCtx);
 }
 
-THREAD PA_STATE::SIMDVERTEX *pVertexStore = nullptr;
+THREAD PA_STATE::SIMDVERTEX *gpVertexStore = nullptr;
 THREAD uint32_t gVertexStoreSize = 0;
 
 //
@@ -1459,19 +1459,22 @@ void ProcessDraw(
 // grow the vertex store for the PA as necessary
 if (gVertexStoreSize < vertexStoreSize)
 {
-if (pVertexStore != nullptr)
+if (gpVertexStore != nullptr)
 {
-AlignedFree(pVertexStore);
+AlignedFree(gpVertexStore);
+gpVertexStore = nullptr;
 }
 
-pVertexStore = reinterpret_cast(AlignedMalloc(vertexStoreSize, 64));
+SWR_ASSERT(gpVertexStore == nullptr);
+
+gpVertexStore = reinterpret_cast(AlignedMalloc(vertexStoreSize, 64));
 gVertexStoreSize = vertexStoreSize;
 
-SWR_ASSERT(pVertexStore != nullptr);
+SWR_ASSERT(gpVertexStore != nullptr);
 }
 
 // choose primitive assembler
-PA_FACTORY paFactory(pDC, state.topology, 
work.numVerts, pVertexStore, numVerts, state.frontendState.vsVertexSize);
+PA_FACTORY paFactory(pDC, state.topology, 
work.numVerts, gpVertexStore, numVerts, state.frontendState.vsVertexSize);
 PA_STATE& pa = paFactory.GetPA();
 
 #if USE_SIMD16_FRONTEND
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/13] swr/rast: simdlib better seperation of core vs. knights avx512

2017-07-31 Thread Tim Rowley
---
 src/gallium/drivers/swr/Makefile.am|   2 +-
 src/gallium/drivers/swr/Makefile.sources   |   8 +
 .../drivers/swr/rasterizer/common/simdlib.hpp  |  21 ++-
 .../swr/rasterizer/common/simdlib_128_avx512.inl   | 108 +++-
 .../rasterizer/common/simdlib_128_avx512_core.inl  | 193 +
 .../common/simdlib_128_avx512_knights.inl  |  35 
 .../swr/rasterizer/common/simdlib_256_avx512.inl   | 128 +++---
 .../rasterizer/common/simdlib_256_avx512_core.inl  | 127 ++
 .../common/simdlib_256_avx512_knights.inl  |  35 
 .../swr/rasterizer/common/simdlib_512_avx512.inl   |  79 +++--
 .../rasterizer/common/simdlib_512_avx512_core.inl  | 181 +++
 .../common/simdlib_512_avx512_knights.inl  | 183 +++
 .../common/simdlib_512_avx512_masks_core.inl   |  27 +++
 .../common/simdlib_512_avx512_masks_knights.inl|  27 +++
 .../swr/rasterizer/common/simdlib_types.hpp|   2 +-
 15 files changed, 911 insertions(+), 245 deletions(-)
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
 create mode 100644 
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 05fc3b3..73fe904 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -285,7 +285,7 @@ lib_LTLIBRARIES += libswrKNL.la
 libswrKNL_la_CXXFLAGS = \
$(PTHREAD_CFLAGS) \
$(SWR_KNL_CXXFLAGS) \
-   -DKNOB_ARCH=KNOB_ARCH_AVX512 -DAVX512F_STRICT \
+   -DKNOB_ARCH=KNOB_ARCH_AVX512 -DSIMD_ARCH_KNIGHTS \
$(COMMON_CXXFLAGS)
 
 libswrKNL_la_SOURCES = \
diff --git a/src/gallium/drivers/swr/Makefile.sources 
b/src/gallium/drivers/swr/Makefile.sources
index 3c1118b..53f8bf0 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -69,11 +69,19 @@ COMMON_CXX_SOURCES := \
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+   rasterizer/common/simdlib_128_avx512_core.inl \
+   rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+   rasterizer/common/simdlib_256_avx512_core.inl \
+   rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+   rasterizer/common/simdlib_512_avx512_core.inl \
+   rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+   rasterizer/common/simdlib_512_avx512_masks_core.inl \
+   rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp 
b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
index fb11132..0c79cdd 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -55,6 +55,11 @@ namespace SIMDImpl
 {
 #define __SIMD_LIB_AVX512_HPP__
 #include "simdlib_128_avx512.inl"
+#if defined(SIMD_ARCH_KNIGHTS)
+#include "simdlib_128_avx512_knights.inl"
+#else // optimize for core
+#include "simdlib_128_avx512_core.inl"
+#endif // defined(SIMD_ARCH_KNIGHTS)
 #undef __SIMD_LIB_AVX512_HPP__
 }; // struct AVX2Impl
 #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
@@ -105,6 +110,11 @@ namespace SIMDImpl
 {
 #define __SIMD_LIB_AVX512_HPP__
 #include "simdlib_256_avx512.inl"
+#if defined(SIMD_ARCH_KNIGHTS)
+#include "simdlib_256_avx512_knights.inl"
+#else // optimize for core
+#include "simdlib_256_avx512_core.inl"
+#endif // defined(SIMD_ARCH_KNIGHTS)
 #undef __SIMD_LIB_AVX512_HPP__
 }; // struct AVX2Impl
 #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
@@ -150,13 +160,20 @@ namespace SIMDImpl
 
 
 #if SIMD_ARCH >= SIMD_ARCH_AVX512
-struct AVX512Impl
+struct AVX512Impl : AVXImplBase
 {
 #define __SIMD_LIB_AVX512_HPP__
 #include "simdlib_512_avx512.inl"
 #include "simdlib_512_avx512_masks.inl"
+#if 

[Mesa-dev] [PATCH 10/13] swr/rast: SIMD16 shaders - widen fetch and vertex shaders

2017-07-31 Thread Tim Rowley
Work in progress, disabled by default.
---
 .../drivers/swr/rasterizer/core/frontend.cpp   |  33 
 src/gallium/drivers/swr/rasterizer/core/knobs.h|   1 +
 src/gallium/drivers/swr/rasterizer/core/state.h|  10 ++
 .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  16 ++
 .../drivers/swr/rasterizer/jitter/JitManager.h |   8 +
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 175 -
 6 files changed, 238 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index e51f967..daea088 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1478,13 +1478,22 @@ void ProcessDraw(
 PA_STATE& pa = paFactory.GetPA();
 
 #if USE_SIMD16_FRONTEND
+#if USE_SIMD16_SHADERS
+simd16vertexvin;
+#else
 simdvertex  vin_lo;
 simdvertex  vin_hi;
+#endif
 SWR_VS_CONTEXT  vsContext_lo;
 SWR_VS_CONTEXT  vsContext_hi;
 
+#if USE_SIMD16_SHADERS
+vsContext_lo.pVin = reinterpret_cast();
+vsContext_hi.pVin = reinterpret_cast();
+#else
 vsContext_lo.pVin = _lo;
 vsContext_hi.pVin = _hi;
+#endif
 vsContext_lo.AlternateOffset = 0;
 vsContext_hi.AlternateOffset = 1;
 
@@ -1565,17 +1574,31 @@ void ProcessDraw(
 {
 // 1. Execute FS/VS for a single SIMD.
 AR_BEGIN(FEFetchShader, pDC->drawId);
+#if USE_SIMD16_SHADERS
+state.pfnFetchFunc(fetchInfo_lo, vin);
+#else
 state.pfnFetchFunc(fetchInfo_lo, vin_lo);
 
 if ((i + KNOB_SIMD_WIDTH) < endVertex)  // 1/2 of 
KNOB_SIMD16_WIDTH
 {
 state.pfnFetchFunc(fetchInfo_hi, vin_hi);
 }
+#endif
 AR_END(FEFetchShader, 0);
 
 // forward fetch generated vertex IDs to the vertex shader
+#if USE_SIMD16_SHADERS
+#if 0
+vsContext_lo.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 
0);
+vsContext_hi.VertexID = _simd16_extract(fetchInfo_lo.VertexID, 
1);
+#else
+vsContext_lo.VertexID = fetchInfo_lo.VertexID;
+vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
+#endif
+#else
 vsContext_lo.VertexID = fetchInfo_lo.VertexID;
 vsContext_hi.VertexID = fetchInfo_hi.VertexID;
+#endif
 
 // Setup active mask for vertex shader.
 vsContext_lo.mask = GenerateMask(endVertex - i);
@@ -1584,8 +1607,18 @@ void ProcessDraw(
 // forward cut mask to the PA
 if (IsIndexedT::value)
 {
+#if USE_SIMD16_SHADERS
+#if 0
+*pvCutIndices_lo = 
_simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 0)));
+*pvCutIndices_hi = 
_simd_movemask_ps(_simd_castsi_ps(_simd16_extract(fetchInfo_lo.CutMask, 1)));
+#else
+*pvCutIndices_lo = 
_simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
+*pvCutIndices_hi = 
_simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask2));
+#endif
+#else
 *pvCutIndices_lo = 
_simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
 *pvCutIndices_hi = 
_simd_movemask_ps(_simd_castsi_ps(fetchInfo_hi.CutMask));
+#endif
 }
 
 UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h 
b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index 10bd4a5..fe0a044 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -41,6 +41,7 @@
 #define ENABLE_AVX512_SIMD161
 #define USE_8x2_TILE_BACKEND1
 #define USE_SIMD16_FRONTEND 1
+#define USE_SIMD16_SHADERS  0   // requires USE_SIMD16_FRONTEND
 
 ///
 // Architecture validation
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index 7af3f82..9e63955 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -577,6 +577,12 @@ struct SWR_FETCH_CONTEXT
 uint32_t StartInstance; // IN: start instance
 simdscalari VertexID;   // OUT: vector of vertex IDs
 simdscalari CutMask;// OUT: vector mask of indices 
which have the cut index value
+#if USE_SIMD16_SHADERS
+//simd16scalari VertexID; // OUT: vector of vertex IDs
+//simd16scalari CutMask;  // OUT: vector mask of 
indices which have the cut index value
+simdscalari VertexID2;  // OUT: vector of vertex IDs
+simdscalari CutMask2;   // OUT: 

  1   2   3   >