Re: [Mesa-dev] [PATCH v4 1/3] dri: Add UYVY as available format

2017-06-21 Thread Lin, Johnson
@Kristian

-Original Message-
From: Lin, Johnson 
Sent: Thursday, June 22, 2017 11:28 AM
To: mesa-dev@lists.freedesktop.org
Cc: Lin, Johnson 
Subject: [PATCH v4 1/3] dri: Add UYVY as available format

UYVY is diffrent with YUYV in byte order.
YUYV is already declared in dri_interface.h, this CL add the difinitions for 
UYVY.
Drivers can add UYVY as supported format
---
 include/GL/internal/dri_interface.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index fc2d4bbe22ef..6992da16d5f8 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1211,6 +1211,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_NV120x3231564e
 #define __DRI_IMAGE_FOURCC_NV160x3631564e
 #define __DRI_IMAGE_FOURCC_YUYV0x56595559
+#define __DRI_IMAGE_FOURCC_UYVY0x59565955
 
 #define __DRI_IMAGE_FOURCC_YVU410  0x39555659
 #define __DRI_IMAGE_FOURCC_YVU411  0x31315659
@@ -1224,7 +1225,7 @@ struct __DRIdri2ExtensionRec {
  * RGB and RGBA are may be usable directly as images but its still
  * recommended to call fromPlanar with plane == 0.
  *
- * Y_U_V, Y_UV and Y_XUXV all requires call to fromPlanar to create
+ * Y_U_V, Y_UV,Y_XUXV and Y_UXVX all requires call to fromPlanar to 
+ create
  * usable sub-images, sampling from images return raw YUV data and
  * color conversion needs to be done in the shader.
  *
@@ -1236,6 +1237,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_COMPONENTS_Y_U_V   0x3003
 #define __DRI_IMAGE_COMPONENTS_Y_UV0x3004
 #define __DRI_IMAGE_COMPONENTS_Y_XUXV  0x3005
+#define __DRI_IMAGE_COMPONENTS_Y_UXVX  0x3008
 #define __DRI_IMAGE_COMPONENTS_R   0x3006
 #define __DRI_IMAGE_COMPONENTS_RG  0x3007
 
--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 2/3] nir: Add a lowering pass for UYVY textures

2017-06-21 Thread Johnson Lin
Similar with support for YUYV but with byte order difference in sampler
---
 src/compiler/nir/nir.h   |  1 +
 src/compiler/nir/nir_lower_tex.c | 18 ++
 2 files changed, 19 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ab7ba14303b7..1b4e47058d4d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2449,6 +2449,7 @@ typedef struct nir_lower_tex_options {
unsigned lower_y_uv_external;
unsigned lower_y_u_v_external;
unsigned lower_yx_xuxv_external;
+   unsigned lower_xy_uxvx_external;
 
/**
 * To emulate certain texture wrap modes, this can be used
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 4ef81955513e..65681decb1c0 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -301,6 +301,20 @@ lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
   nir_channel(b, xuxv, 3));
 }
 
+static void
+lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex)
+{
+  b->cursor = nir_after_instr(>instr);
+
+  nir_ssa_def *y = sample_plane(b, tex, 0);
+  nir_ssa_def *uxvx = sample_plane(b, tex, 1);
+
+  convert_yuv_to_rgb(b, tex,
+ nir_channel(b, y, 1),
+ nir_channel(b, uxvx, 0),
+ nir_channel(b, uxvx, 2));
+}
+
 /*
  * Emits a textureLod operation used to replace an existing
  * textureGrad instruction.
@@ -760,6 +774,10 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
  progress = true;
   }
 
+  if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
+ lower_xy_uxvx_external(b, tex);
+ progress = true;
+  }
 
   if (sat_mask) {
  saturate_src(b, tex, sat_mask);
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/3] dri: Add UYVY as available format

2017-06-21 Thread Johnson Lin
UYVY is diffrent with YUYV in byte order.
YUYV is already declared in dri_interface.h,
this CL add the difinitions for UYVY.
Drivers can add UYVY as supported format
---
 include/GL/internal/dri_interface.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index fc2d4bbe22ef..6992da16d5f8 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1211,6 +1211,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_NV120x3231564e
 #define __DRI_IMAGE_FOURCC_NV160x3631564e
 #define __DRI_IMAGE_FOURCC_YUYV0x56595559
+#define __DRI_IMAGE_FOURCC_UYVY0x59565955
 
 #define __DRI_IMAGE_FOURCC_YVU410  0x39555659
 #define __DRI_IMAGE_FOURCC_YVU411  0x31315659
@@ -1224,7 +1225,7 @@ struct __DRIdri2ExtensionRec {
  * RGB and RGBA are may be usable directly as images but its still
  * recommended to call fromPlanar with plane == 0.
  *
- * Y_U_V, Y_UV and Y_XUXV all requires call to fromPlanar to create
+ * Y_U_V, Y_UV,Y_XUXV and Y_UXVX all requires call to fromPlanar to create
  * usable sub-images, sampling from images return raw YUV data and
  * color conversion needs to be done in the shader.
  *
@@ -1236,6 +1237,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_COMPONENTS_Y_U_V   0x3003
 #define __DRI_IMAGE_COMPONENTS_Y_UV0x3004
 #define __DRI_IMAGE_COMPONENTS_Y_XUXV  0x3005
+#define __DRI_IMAGE_COMPONENTS_Y_UXVX  0x3008
 #define __DRI_IMAGE_COMPONENTS_R   0x3006
 #define __DRI_IMAGE_COMPONENTS_RG  0x3007
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 3/3] i965/i915: Add UYVY as the supported format

2017-06-21 Thread Johnson Lin
Trigger the correct sampler options for it. Similar with YUYV
---
 src/intel/compiler/brw_compiler.h|  1 +
 src/intel/compiler/brw_nir.c |  1 +
 src/mesa/drivers/dri/i915/intel_screen.c | 21 -
 src/mesa/drivers/dri/i965/brw_wm.c   |  7 +++
 src/mesa/drivers/dri/i965/intel_screen.c | 21 -
 5 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 78873744ce5f..3f383403883c 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -168,6 +168,7 @@ struct brw_sampler_prog_key_data {
uint32_t y_u_v_image_mask;
uint32_t y_uv_image_mask;
uint32_t yx_xuxv_image_mask;
+   uint32_t xy_uxvx_image_mask;
 };
 
 /**
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index de8f519b4e10..49d3cf365647 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -770,6 +770,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
+   tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask;
 
if (nir_lower_tex(nir, _options)) {
   nir_validate_shader(nir);
diff --git a/src/mesa/drivers/dri/i915/intel_screen.c 
b/src/mesa/drivers/dri/i915/intel_screen.c
index cba5434b5e1b..7936d4915e65 100644
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -227,17 +227,20 @@ static struct intel_image_format intel_image_formats[] = {
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
{ 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
 
-   /* For YUYV buffers, we set up two overlapping DRI images and treat
-* them as planar buffers in the compositors.  Plane 0 is GR88 and
-* samples YU or YV pairs and places Y into the R component, while
-* plane 1 is ARGB and samples YUYV clusters and places pairs and
-* places U into the G component and V into A.  This lets the
-* texture sampler interpolate the Y components correctly when
-* sampling from plane 0, and interpolate U and V correctly when
-* sampling from plane 1. */
+   /* For YUYV and UYVY buffers, we set up two overlapping DRI images
+* and treat them as planar buffers in the compositors.
+* Plane 0 is GR88 and samples YU or YV pairs and places Y into
+* the R component, while plane 1 is ARGB/ABGR and samples YUYV/UYVY
+* clusters and places pairs and places U into the G component and
+* V into A.  This lets the texture sampler interpolate the Y
+* components correctly when sampling from plane 0, and interpolate
+* U and V correctly when sampling from plane 1. */
{ __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
-   { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } }
+   { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
+   { __DRI_IMAGE_FOURCC_UYVY, __DRI_IMAGE_COMPONENTS_Y_UXVX, 2,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
+   { 0, 1, 0, __DRI_IMAGE_FORMAT_ABGR, 4 } } }
 };
 
 static __DRIimage *
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index a93f4c503792..71118c1ca598 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -270,6 +270,10 @@ brw_debug_recompile_sampler_key(struct brw_context *brw,
found |= key_debug(brw, "yx_xuxv image bound",
   old_key->yx_xuxv_image_mask,
   key->yx_xuxv_image_mask);
+   found |= key_debug(brw, "xy_uxvx image bound",
+  old_key->xy_uxvx_image_mask,
+  key->xy_uxvx_image_mask);
+
 
for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
   found |= key_debug(brw, "textureGather workarounds",
@@ -412,6 +416,9 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
 case __DRI_IMAGE_COMPONENTS_Y_XUXV:
key->yx_xuxv_image_mask |= 1 << s;
break;
+case __DRI_IMAGE_COMPONENTS_Y_UXVX:
+   key->xy_uxvx_image_mask |= 1 << s;
+   break;
 default:
break;
 }
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 83b8a24509a4..8c6f3d81b14a 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -278,17 +278,20 @@ static struct intel_image_format intel_image_formats[] = {
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
{ 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
 
-   /* For YUYV buffers, we set up two overlapping DRI images and treat
-* them as planar buffers in the compositors.  Plane 0 is GR88 and
-* samples YU or YV pairs and places Y into the R 

[Mesa-dev] [Bug 101552] Make GALLIUM_HUD lower the grid max value if metric stays much lower all the time

2017-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101552

--- Comment #2 from Shmerl  ---
Thanks for the pointer! That helps indeed. I think for something like FPS
metric, it can be a useful default. But then I guess you might want to have a
negative modifier (counterpart to d).

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] radeonsi: ppc64 glamor backtrace

2017-06-21 Thread Michel Dänzer
On 22/06/17 02:56 AM, Grazvydas Ignotas wrote:
> Looks like nobody tested radeonsi on BE for 5 months at least. You can
> try the attached patch, but I suspect there will be other places like
> this...

Yeah, this is just the tip of the iceberg. There are many reasons why
radeonsi currently can't (and never could) work on big endian hosts.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101552] Make GALLIUM_HUD lower the grid max value if metric stays much lower all the time

2017-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101552

Michel Dänzer  changed:

   What|Removed |Added

 Resolution|--- |INVALID
 Status|NEW |RESOLVED

--- Comment #1 from Michel Dänzer  ---
There's already a modifier for that, from GALLIUM_HUD=help:

  'd' activates dynamic Y axis readjustment to set the value of
  the Y axis to match the highest value still visible in the graph.

(Maybe this should be the default though, at least for some graphs)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101552] Make GALLIUM_HUD lower the grid max value if metric stays much lower all the time

2017-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101552

Bug ID: 101552
   Summary: Make GALLIUM_HUD lower the grid max value if metric
stays much lower all the time
   Product: Mesa
   Version: git
  Hardware: All
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: shtetl...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Currently, if metrics shoot to some very high value, grid adjusts to it to
address that spike, but then it stays that way, even if metric later drops and
never grows that much again. Which basically can make the graph appear
practically at zero all the time after that happens.

It makes sense for grid to adjust to something lower as well, once all spikes
are over. This auto-adjustment feature can be optional with some parameter
which enables it.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Gallium: Passing drirc options to create_screen() and fixing Rocket League

2017-06-21 Thread Rob Clark
On Wed, Jun 21, 2017 at 8:15 PM, Marek Olšák  wrote:
> On Wed, Jun 21, 2017 at 10:37 PM, Rob Clark  wrote:
>> On Tue, Jun 20, 2017 at 6:54 PM, Marek Olšák  wrote:
>>> Hi,
>>>
>>> This series updates pipe loaders so that flags such as drirc options
>>> can be passed to create_screen(). I have compile-tested everything
>>> except clover.
>>>
>>> The first pipe_screen flag is a drirc option to fix incorrect grass
>>> rendering in Rocket League for radeonsi. Rocket League expects DirectX
>>> behavior for partial derivative computations after discard/kill, but
>>> radeonsi implements the more efficient but stricter OpenGL behavior
>>> and that will remain our default behavior. The new screen flag forces
>>> radeonsi to use the DX behavior for that game.
>>>
>>
>> do we really want this to be a *global* option for the screen?
>
> Yes. Shaders are pipe_screen (global) objects in radeonsi, so a
> compiler option also has to be global. We can't look at the context
> during the TGSI->LLVM translation.

well, I didn't really mean per-screen vs per-context, as much as
per-screen vs per-shader (or maybe more per-screen vs
per-instruction?)

>>
>> I'm just thinking, some drivers use lowering passes that internally
>> generate kill's.  I *guess* it would only matter if they also had
>> ddx/ddy instructions, but not sure.
>>
>> not really sure if this would actually be a problem or not..
>
> Whether or not this affects you depends on how your hardware
> implements kill/discard. Not just ddx/ddy, texture instructions
> computing derivatives internally are affected by kill/discard too.
>

I guess most of the lowering passes that introduce kill's are more
like legacy gl things (clip-planes, glBitmap(), maybe some others but
pretty sure it is all legacy type stuff), so maybe it isn't likely to
matter.  I'm not totally sure about the expected interactions between
kill and other instructions, so not totally sure about whether I
should care.. but figured I should point it out.. and if it doesn't
matter, it doesn't matter.

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: perf: minimize the chances to spread queries across batchbuffers

2017-06-21 Thread Lionel Landwerlin
Counter related to timings will be sensitive to any delay introduced
by the software. In particular if our begin & end of performance
queries end up in different batches, time related counters will
exhibit biffer values caused by the time it takes for the kernel
driver to load new requests into the hardware.

Signed-off-by: Lionel Landwerlin 
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 06576a54d03..6b874d0bbee 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1063,6 +1063,14 @@ brw_end_perf_query(struct gl_context *ctx,
  obj->oa.begin_report_id + 1);
   }
 
+  /* We flush the batchbuffer here to minimize the chances that MI_RPC
+   * delimiting commands end up in different batchbuffers. If that's the
+   * case, the measurement will include the time it takes for the kernel
+   * scheduler to load a new request into the hardware. This is manifested
+   * in tools like frameretrace by spikes in the "GPU Core Clocks"
+   * counter.
+   */
+  intel_batchbuffer_flush(brw);
   --brw->perfquery.n_active_oa_queries;
 
   /* NB: even though the query has now ended, it can't be accumulated
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Use isl-based miptree also for stencil rbs

2017-06-21 Thread Jason Ekstrand
On Wed, Jun 21, 2017 at 2:48 PM, Jason Ekstrand 
wrote:

> Reviewed-by: Jason Ekstrand 
>
> On Wed, Jun 21, 2017 at 12:35 PM, Topi Pohjolainen <
> topi.pohjolai...@gmail.com> wrote:
>
>> Fixes dEQP-EGL.functional.image.render_multiple_contexts.
>> gles2_renderbuffer_stencil_stencil_buffer
>>
>> CC: Mark Janes 
>> CC: Jason Ekstrand 
>> CC: Kenneth Graunke 
>> Signed-off-by: Topi Pohjolainen 
>>
>
I went ahead and pushed the patch because I wanted to be able to rebase on
top of all your ISL work.


> ---
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ---
>>  1 file changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index abc7f989db..69b02ead78 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> @@ -897,7 +897,22 @@ intel_miptree_create_for_bo(struct brw_context *brw,
>>  {
>> struct intel_mipmap_tree *mt;
>> uint32_t tiling, swizzle;
>> -   GLenum target;
>> +   const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
>> +
>> +   if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8) {
>> +  mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
>> +0, 0, width, height, depth, 1, ISL_TILING_W,
>> +ISL_SURF_USAGE_STENCIL_BIT |
>> +ISL_SURF_USAGE_TEXTURE_BIT,
>> +BO_ALLOC_FOR_RENDER, bo);
>> +  if (!mt)
>> + return NULL;
>> +
>> +  assert(bo->size >= mt->surf.size);
>> +
>> +  brw_bo_reference(bo);
>> +  return mt;
>> +   }
>>
>> brw_bo_get_tiling(bo, , );
>>
>> @@ -912,8 +927,6 @@ intel_miptree_create_for_bo(struct brw_context *brw,
>>  */
>> assert(pitch >= 0);
>>
>> -   target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
>> -
>> /* The BO already has a tiling format and we shouldn't confuse the
>> lower
>>  * layers by making it try to find a tiling format again.
>>  */
>> --
>> 2.11.0
>>
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 13/13] anv/gpu_memcpy: Rename the gpu_memcpy function

2017-06-21 Thread Jason Ekstrand
I never liked the gpu_memcpy name anyway because I knew something like this
would come up.  I left you one trivial comment on patch 1.  Other than
that, the series is

Reviewied-by: Jason Ekstrand 

Given that you're changing core blorp, please make sure you run this
through Jenkins on GL. :-)

--Jason

On Wed, Jun 21, 2017 at 5:15 PM, Nanley Chery  wrote:

> A GPU memcpy function could alternatively be implemented using MI_*
> commands. Provide more detail into how this one operates in case another
> memcpy function is created.
>
> v2:
> - Update the commit message.
> v3:
> - Use 'memcpy' instead of 'cpy' (Jason Ekstrand)
> - Shorten 'streamout' to 'so'
>
> Suggested-by: Jason Ekstrand 
> Signed-off-by: Nanley Chery 
> Reviewed-by: Iago Toral Quiroga  (v2)
> ---
>  src/intel/vulkan/anv_genX.h| 8 
>  src/intel/vulkan/genX_cmd_buffer.c | 6 +++---
>  src/intel/vulkan/genX_gpu_memcpy.c | 8 
>  3 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> index 67147b0e92..8da5e075dc 100644
> --- a/src/intel/vulkan/anv_genX.h
> +++ b/src/intel/vulkan/anv_genX.h
> @@ -64,10 +64,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct
> anv_batch *batch,
>   VkShaderStageFlags active_stages,
>   const unsigned entry_size[4]);
>
> -void genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> - struct anv_bo *dst, uint32_t dst_offset,
> - struct anv_bo *src, uint32_t src_offset,
> - uint32_t size);
> +void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> +struct anv_bo *dst, uint32_t dst_offset,
> +struct anv_bo *src, uint32_t src_offset,
> +uint32_t size);
>
>  void genX(blorp_exec)(struct blorp_batch *batch,
>const struct blorp_params *params);
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 6a3e525eb3..53c58ca5b3 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -710,9 +710,9 @@ genX(CmdExecuteCommands)(
>   struct anv_state dst_state = secondary->state.render_pass_
> states;
>   assert(src_state.alloc_size == dst_state.alloc_size);
>
> - genX(cmd_buffer_gpu_memcpy)(primary, ss_bo, dst_state.offset,
> - ss_bo, src_state.offset,
> - src_state.alloc_size);
> + genX(cmd_buffer_so_memcpy)(primary, ss_bo, dst_state.offset,
> +ss_bo, src_state.offset,
> +src_state.alloc_size);
>}
>
>anv_cmd_buffer_add_secondary(primary, secondary);
> diff --git a/src/intel/vulkan/genX_gpu_memcpy.c
> b/src/intel/vulkan/genX_gpu_memcpy.c
> index 3cbc7235cf..5ef35e6283 100644
> --- a/src/intel/vulkan/genX_gpu_memcpy.c
> +++ b/src/intel/vulkan/genX_gpu_memcpy.c
> @@ -52,10 +52,10 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
>  }
>
>  void
> -genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> -struct anv_bo *dst, uint32_t dst_offset,
> -struct anv_bo *src, uint32_t src_offset,
> -uint32_t size)
> +genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
> +   struct anv_bo *dst, uint32_t dst_offset,
> +   struct anv_bo *src, uint32_t src_offset,
> +   uint32_t size)
>  {
> if (size == 0)
>return;
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 12/13] anv/blorp: Provide surface states for CCS resolves

2017-06-21 Thread Jason Ekstrand
On Wed, Jun 21, 2017 at 5:15 PM, Nanley Chery  wrote:

> In the future, we plan on using this method to resolve images whose
> surface state fast-clear value is dynamically updated during command
> buffer execution. Start using it now for testing and to reduce churn
> later on.
>
> Signed-off-by: Nanley Chery 
> Reviewed-by: Iago Toral Quiroga 
> ---
>  src/intel/vulkan/anv_blorp.c | 29 ++---
>  1 file changed, 10 insertions(+), 19 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index c4c744868c..7f6ed0efe4 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1493,7 +1493,6 @@ anv_image_ccs_clear(struct anv_cmd_buffer
> *cmd_buffer,
>
>  static void
>  ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
> -   struct blorp_batch *batch,
> uint32_t att)
>  {
> struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> @@ -1589,12 +1588,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer
> *cmd_buffer,
> if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
>return;
>
> -   struct blorp_surf surf;
> -   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> -att_state->aux_usage, );
> -   if (att_state->fast_clear)
> -  surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
> -
> /* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
>  *
>  *"When performing a render target resolve, PIPE_CONTROL with end
> of
> @@ -1610,12 +1603,8 @@ ccs_resolve_attachment(struct anv_cmd_buffer
> *cmd_buffer,
> cmd_buffer->state.pending_pipe_bits |=
>ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>
> -   for (uint32_t layer = 0; layer < fb->layers; layer++) {
> -  blorp_ccs_resolve(batch, ,
> -iview->isl.base_level,
> -iview->isl.base_array_layer + layer,
> -iview->isl.format, resolve_op);
> -   }
> +   anv_ccs_resolve(cmd_buffer, att_state->color_rt_state, image,
> +   iview->isl.base_level, fb->layers, resolve_op);
>
> cmd_buffer->state.pending_pipe_bits |=
>ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> @@ -1633,16 +1622,13 @@ anv_cmd_buffer_resolve_subpass(struct
> anv_cmd_buffer *cmd_buffer)
> struct anv_subpass *subpass = cmd_buffer->state.subpass;
>
>
> -   struct blorp_batch batch;
> -   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
> -
> for (uint32_t i = 0; i < subpass->color_count; ++i) {
>const uint32_t att = subpass->color_attachments[i].attachment;
>if (att == VK_ATTACHMENT_UNUSED)
>   continue;
>
>assert(att < cmd_buffer->state.pass->attachment_count);
> -  ccs_resolve_attachment(cmd_buffer, , att);
> +  ccs_resolve_attachment(cmd_buffer, att);
> }
>
> if (subpass->has_resolve) {
> @@ -1681,6 +1667,10 @@ anv_cmd_buffer_resolve_subpass(struct
> anv_cmd_buffer *cmd_buffer)
>   const VkRect2D render_area = cmd_buffer->state.render_area;
>
>   assert(src_iview->aspect_mask == dst_iview->aspect_mask);
> +
> + struct blorp_batch batch;
> + blorp_batch_init(_buffer->device->blorp, ,
> cmd_buffer, 0);
> +
>   resolve_image(, src_iview->image,
> src_iview->isl.base_level,
> src_iview->isl.base_array_layer,
> @@ -1692,11 +1682,12 @@ anv_cmd_buffer_resolve_subpass(struct
> anv_cmd_buffer *cmd_buffer)
> render_area.offset.x, render_area.offset.y,
> render_area.extent.width,
> render_area.extent.height);
>
> - ccs_resolve_attachment(cmd_buffer, , dst_att);
> + blorp_batch_finish();
>

At some point, I'd like to make blorp smarter so that it can avoid
re-emitting all of the state on every blorp op.  When that happens, we'll
want to figure out how to move blorp_batch_init/finish back out so that
they happen outside of the loop.  Not a big deal today though.


> +
> + ccs_resolve_attachment(cmd_buffer, dst_att);
>}
> }
>
> -   blorp_batch_finish();
>  }
>
>  void
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 01/13] intel/blorp: Assert levels and layers are in range

2017-06-21 Thread Jason Ekstrand
On Wed, Jun 21, 2017 at 5:15 PM, Nanley Chery  wrote:

> v2 (Jason Ekstrand):
> - Update commit title
> - Check aux level and layer as well
>
> Signed-off-by: Nanley Chery 
> Reviewed-by: Iago Toral Quiroga  (v1)
> ---
>  src/intel/blorp/blorp.c   | 7 +++
>  src/intel/blorp/blorp_clear.c | 4 
>  2 files changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
> index 9c88658e8a..7e30e20a59 100644
> --- a/src/intel/blorp/blorp.c
> +++ b/src/intel/blorp/blorp.c
> @@ -66,6 +66,8 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
>  unsigned int level, unsigned int layer,
>  enum isl_format format, bool is_render_target)
>  {
> +   assert(level < surf->surf->levels);
> +
> info->enabled = true;
>
> if (format == ISL_FORMAT_UNSUPPORTED)
> @@ -90,6 +92,9 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
> if (info->aux_usage != ISL_AUX_USAGE_NONE) {
>info->aux_surf = *surf->aux_surf;
>info->aux_addr = surf->aux_addr;
> +  assert(level < info->aux_surf.levels);
> +  assert(layer < MAX2(info->aux_surf.logical_level0_px.depth >>
> level,
> +  info->aux_surf.logical_level0_px.array_len));
> }
>
> info->clear_color = surf->clear_color;
> @@ -106,6 +111,8 @@ brw_blorp_surface_info_init(struct blorp_context
> *blorp,
> info->view.array_len = MAX2(info->surf.logical_level0_px.depth,
> info->surf.logical_level0_px.array_len);
>
> +   assert(layer < info->view.array_len);
>

Might be more straightforward to move this assert to the top and make it
look like the aux assert.  I don't care too much though.


> +
> if (!is_render_target &&
> (info->surf.dim == ISL_SURF_DIM_3D ||
>  info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) {
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index efacadfebe..369e18726f 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -707,10 +707,6 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> struct blorp_params params;
> blorp_params_init();
>
> -   /* Layered and mipmapped fast clear is only available from Gen8
> onwards. */
> -   assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 8 ||
> -  (level == 0 && layer == 0));
> -
> brw_blorp_surface_info_init(batch->blorp, , surf,
> level, layer, format, true);
>
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/11] gallium/hud: add glthread counters

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/hud/hud_context.c |  9 
 src/gallium/auxiliary/hud/hud_cpu.c | 74 +
 src/gallium/auxiliary/hud/hud_private.h |  8 
 3 files changed, 91 insertions(+)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index 9ab7822..8172313 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1146,20 +1146,29 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "cpu") == 0) {
  hud_cpu_graph_install(pane, ALL_CPUS);
   }
   else if (sscanf(name, "cpu%u%s", , s) == 1) {
  hud_cpu_graph_install(pane, i);
   }
   else if (strcmp(name, "API-thread-busy") == 0) {
  hud_thread_busy_install(pane, name, false);
   }
+  else if (strcmp(name, "API-thread-offloaded-slots") == 0) {
+ hud_thread_counter_install(pane, name, HUD_COUNTER_OFFLOADED);
+  }
+  else if (strcmp(name, "API-thread-direct-slots") == 0) {
+ hud_thread_counter_install(pane, name, HUD_COUNTER_DIRECT);
+  }
+  else if (strcmp(name, "API-thread-num-syncs") == 0) {
+ hud_thread_counter_install(pane, name, HUD_COUNTER_SYNCS);
+  }
   else if (strcmp(name, "main-thread-busy") == 0) {
  hud_thread_busy_install(pane, name, true);
   }
 #if HAVE_GALLIUM_EXTRA_HUD
   else if (sscanf(name, "nic-rx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_RX);
   }
   else if (sscanf(name, "nic-tx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_TX);
   }
diff --git a/src/gallium/auxiliary/hud/hud_cpu.c 
b/src/gallium/auxiliary/hud/hud_cpu.c
index 38403f9..4caaab6 100644
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -300,10 +300,84 @@ hud_thread_busy_install(struct hud_pane *pane, const char 
*name, bool main)
gr->query_new_value = query_api_thread_busy_status;
 
/* Don't use free() as our callback as that messes up Gallium's
 * memory debugger.  Use simple free_query_data() wrapper.
 */
gr->free_query_data = free_query_data;
 
hud_pane_add_graph(pane, gr);
hud_pane_set_max_value(pane, 100);
 }
+
+struct counter_info {
+   enum hud_counter counter;
+   unsigned last_value;
+   int64_t last_time;
+};
+
+static unsigned get_counter(struct hud_graph *gr, enum hud_counter counter)
+{
+   struct util_queue_monitoring *mon = gr->pane->hud->monitored_queue;
+
+   if (!mon || !mon->queue)
+  return 0;
+
+   switch (counter) {
+   case HUD_COUNTER_OFFLOADED:
+  return mon->num_offloaded_items;
+   case HUD_COUNTER_DIRECT:
+  return mon->num_direct_items;
+   case HUD_COUNTER_SYNCS:
+  return mon->num_syncs;
+   default:
+  assert(0);
+  return 0;
+   }
+}
+
+static void
+query_thread_counter(struct hud_graph *gr)
+{
+   struct counter_info *info = gr->query_data;
+   int64_t now = os_time_get_nano();
+
+   if (info->last_time) {
+  if (info->last_time + gr->pane->period*1000 <= now) {
+ unsigned current_value = get_counter(gr, info->counter);
+
+ hud_graph_add_value(gr, current_value - info->last_value);
+ info->last_value = current_value;
+ info->last_time = now;
+  }
+   } else {
+  /* initialize */
+  info->last_value = get_counter(gr, info->counter);
+  info->last_time = now;
+   }
+}
+
+void hud_thread_counter_install(struct hud_pane *pane, const char *name,
+enum hud_counter counter)
+{
+   struct hud_graph *gr = CALLOC_STRUCT(hud_graph);
+   if (!gr)
+  return;
+
+   strcpy(gr->name, name);
+
+   gr->query_data = CALLOC_STRUCT(counter_info);
+   if (!gr->query_data) {
+  FREE(gr);
+  return;
+   }
+
+   ((struct counter_info*)gr->query_data)->counter = counter;
+   gr->query_new_value = query_thread_counter;
+
+   /* Don't use free() as our callback as that messes up Gallium's
+* memory debugger.  Use simple free_query_data() wrapper.
+*/
+   gr->free_query_data = free_query_data;
+
+   hud_pane_add_graph(pane, gr);
+   hud_pane_set_max_value(pane, 100);
+}
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index b8726da..2b1717d 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -26,20 +26,26 @@
  **/
 
 #ifndef HUD_PRIVATE_H
 #define HUD_PRIVATE_H
 
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/list.h"
 #include "hud/font.h"
 
+enum hud_counter {
+   HUD_COUNTER_OFFLOADED,
+   HUD_COUNTER_DIRECT,
+   HUD_COUNTER_SYNCS,
+};
+
 struct hud_context {
struct pipe_context *pipe;
struct cso_context *cso;
 
struct hud_batch_query_context *batch_query;
  

[Mesa-dev] [PATCH 10/11] mesa/glthread: decrease the batch size for better perf scaling

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

This is the key to better performance.
---
 src/mesa/main/glthread.h | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
index 36692fe..dd65931 100644
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -19,30 +19,38 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #ifndef _GLTHREAD_H
 #define _GLTHREAD_H
 
 #include "main/mtypes.h"
 
-/* Command size is a number of bytes stored in a short. */
-#define MARSHAL_MAX_CMD_SIZE 65535
+/* The size of one batch and the maximum size of one call.
+ *
+ * This should be as low as possible, so that:
+ * - multiple synchronizations within a frame don't slow us down much
+ * - a smaller number of calls per frame can still get decent parallelism
+ * - the memory footprint of the queue is low, and with that comes a lower
+ *   chance of experiencing CPU cache thrashing
+ * but it should be high enough so that u_queue overhead remains negligible.
+ */
+#define MARSHAL_MAX_CMD_SIZE (8 * 1024)
 
 /* The number of batch slots in memory.
  *
  * One batch is being executed, one batch is being filled, the rest are
  * waiting batches. There must be at least 1 slot for a waiting batch,
  * so the minimum number of batches is 3.
  */
-#define MARSHAL_MAX_BATCHES 4
+#define MARSHAL_MAX_BATCHES 8
 
 #include 
 #include 
 #include 
 #include "util/u_queue.h"
 
 enum marshal_dispatch_cmd_id;
 
 /** A single batch of commands queued up for execution. */
 struct glthread_batch
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/11] gallium/hud: add hud_pane::hud pointer

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

for later use
---
 src/gallium/auxiliary/hud/hud_context.c | 8 +---
 src/gallium/auxiliary/hud/hud_private.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index 551cea9..ae2e0fb 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -771,29 +771,31 @@ hud_pane_update_dyn_ceiling(struct hud_graph *gr, struct 
hud_pane *pane)
}
 
/*
 * Mark this adjustment run so we could avoid repeating a full update
 * again needlessly in case the pane has more than one graph.
 */
pane->dyn_ceil_last_ran = gr->index;
 }
 
 static struct hud_pane *
-hud_pane_create(unsigned x1, unsigned y1, unsigned x2, unsigned y2,
+hud_pane_create(struct hud_context *hud,
+unsigned x1, unsigned y1, unsigned x2, unsigned y2,
 unsigned period, uint64_t max_value, uint64_t ceiling,
 boolean dyn_ceiling, boolean sort_items)
 {
struct hud_pane *pane = CALLOC_STRUCT(hud_pane);
 
if (!pane)
   return NULL;
 
+   pane->hud = hud;
pane->x1 = x1;
pane->y1 = y1;
pane->x2 = x2;
pane->y2 = y2;
pane->inner_x1 = x1 + 1;
pane->inner_x2 = x2 - 1;
pane->inner_y1 = y1 + 1;
pane->inner_y2 = y2 - 1;
pane->inner_width = pane->inner_x2 - pane->inner_x1;
pane->inner_height = pane->inner_y2 - pane->inner_y1;
@@ -1116,22 +1118,22 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
 _ceiling, _colors, _items);
 
  /*
   * Keep track of overall column width to avoid pane overlapping in case
   * later we create a new column while the bottom pane in the current
   * column is less wide than the rest of the panes in it.
   */
  column_width = width > column_width ? width : column_width;
 
   if (!pane) {
- pane = hud_pane_create(x, y, x + width, y + height, period, 10,
- ceiling, dyn_ceiling, sort_items);
+ pane = hud_pane_create(hud, x, y, x + width, y + height, period, 10,
+ceiling, dyn_ceiling, sort_items);
  if (!pane)
 return;
   }
 
   if (reset_colors) {
  pane->next_color = 0;
  reset_colors = false;
   }
 
   /* Add a graph. */
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index fba919e..580ceb3 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -97,20 +97,21 @@ struct hud_graph {
 
/* mutable variables */
unsigned num_vertices;
unsigned index; /* vertex index being updated */
uint64_t current_value;
FILE *fd;
 };
 
 struct hud_pane {
struct list_head head;
+   struct hud_context *hud;
unsigned x1, y1, x2, y2;
unsigned inner_x1;
unsigned inner_y1;
unsigned inner_x2;
unsigned inner_y2;
unsigned inner_width;
unsigned inner_height;
float yscale;
unsigned max_num_vertices;
unsigned last_line; /* index of the last describing line in the graph */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/11] drirc: whitelist glthread for a few games

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

Performance deltas:
Alien Isolation: +17% (it varies depending on the location)
Borderlands 2: +50% (it varies depending on the location)
BioShock Infinite: +76% (benchmark)
Civilization 6: +20% (benchmark)
---
 src/mesa/drivers/dri/common/drirc | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drirc 
b/src/mesa/drivers/dri/common/drirc
index 494d768..69b735c 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -152,20 +152,36 @@ TODO: document the other workarounds.
 
 
 
 
 
 
 
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 
 
 
 
 

 

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/11] mesa/glthread: add glthread "perf" counters and pass them to gallium HUD

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

for HUD integration in following commits. This valuable profiling data
will allow us to see on the HUD how well glthread is able to utilize
parallelism. This is better than benchmarking, because you can see
exactly what's happening and you don't have to be CPU-bound.

u_threaded_context has the same counters.
---
 src/gallium/auxiliary/hud/hud_context.c |  8 
 src/gallium/auxiliary/hud/hud_context.h |  5 +
 src/gallium/auxiliary/hud/hud_private.h |  2 ++
 src/gallium/include/state_tracker/st_api.h  |  4 +++-
 src/gallium/state_trackers/dri/dri_screen.c |  6 +-
 src/mesa/main/dd.h  |  4 +++-
 src/mesa/main/glthread.c| 21 +++--
 src/mesa/main/glthread.h|  3 +++
 src/mesa/state_tracker/st_context.c |  5 +++--
 src/util/u_queue.h  | 14 ++
 10 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index f32831b..551cea9 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1687,10 +1687,18 @@ hud_destroy(struct hud_context *hud)
}
 
hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
pipe_sampler_view_reference(>font_sampler_view, NULL);
pipe_resource_reference(>font.texture, NULL);
FREE(hud);
 }
+
+void
+hud_add_queue_for_monitoring(struct hud_context *hud,
+ struct util_queue_monitoring *queue_info)
+{
+   assert(!hud->monitored_queue);
+   hud->monitored_queue = queue_info;
+}
diff --git a/src/gallium/auxiliary/hud/hud_context.h 
b/src/gallium/auxiliary/hud/hud_context.h
index abf2ad5..5a7e13b 100644
--- a/src/gallium/auxiliary/hud/hud_context.h
+++ b/src/gallium/auxiliary/hud/hud_context.h
@@ -25,21 +25,26 @@
  *
  **/
 
 #ifndef HUD_CONTEXT_H
 #define HUD_CONTEXT_H
 
 struct hud_context;
 struct cso_context;
 struct pipe_context;
 struct pipe_resource;
+struct util_queue_monitoring;
 
 struct hud_context *
 hud_create(struct pipe_context *pipe, struct cso_context *cso);
 
 void
 hud_destroy(struct hud_context *hud);
 
 void
 hud_draw(struct hud_context *hud, struct pipe_resource *tex);
 
+void
+hud_add_queue_for_monitoring(struct hud_context *hud,
+ struct util_queue_monitoring *queue_info);
+
 #endif
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index f765bd9..fba919e 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -33,20 +33,22 @@
 #include "util/list.h"
 #include "hud/font.h"
 
 struct hud_context {
struct pipe_context *pipe;
struct cso_context *cso;
 
struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
+   struct util_queue_monitoring *monitored_queue;
+
/* states */
struct pipe_blend_state no_blend, alpha_blend;
struct pipe_depth_stencil_alpha_state dsa;
void *fs_color, *fs_text;
struct pipe_rasterizer_state rasterizer, rasterizer_aa_lines;
void *vs;
struct pipe_vertex_element velems[2];
 
/* font */
struct util_font font;
diff --git a/src/gallium/include/state_tracker/st_api.h 
b/src/gallium/include/state_tracker/st_api.h
index 47d06c8..d641092 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -172,20 +172,21 @@ enum st_manager_param {
 *
 * For the mesa state tracker that means that it needs to invalidate
 * the framebuffer in glViewport itself.
 */
ST_MANAGER_BROKEN_INVALIDATE
 };
 
 struct pipe_context;
 struct pipe_resource;
 struct pipe_fence_handle;
+struct util_queue_monitoring;
 
 /**
  * Used in st_context_iface->get_resource_for_egl_image.
  */
 struct st_context_resource
 {
/* these fields are filled in by the caller */
enum st_context_resource_type type;
void *resource;
 
@@ -467,21 +468,22 @@ struct st_manager
/**
 * Query an manager param.
 */
int (*get_param)(struct st_manager *smapi,
 enum st_manager_param param);
 
/**
 * Call the loader function setBackgroundContext. Called from the worker
 * thread.
 */
-   void (*set_background_context)(struct st_context_iface *stctxi);
+   void (*set_background_context)(struct st_context_iface *stctxi,
+  struct util_queue_monitoring *queue_info);
 };
 
 /**
  * Represent a rendering API such as OpenGL or OpenVG.
  *
  * Implemented by the state tracker and used by the state tracker manager.
  */
 struct st_api
 {
/**
diff --git a/src/gallium/state_trackers/dri/dri_screen.c 

[Mesa-dev] [PATCH 08/11] gallium/hud: add API-thread-busy for monitoring the thread load

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/hud/hud_context.c |  5 -
 src/gallium/auxiliary/hud/hud_cpu.c | 19 +--
 src/gallium/auxiliary/hud/hud_private.h |  2 +-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ae2e0fb..9ab7822 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1143,22 +1143,25 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   /* IF YOU CHANGE THIS, UPDATE print_help! */
   if (strcmp(name, "fps") == 0) {
  hud_fps_graph_install(pane);
   }
   else if (strcmp(name, "cpu") == 0) {
  hud_cpu_graph_install(pane, ALL_CPUS);
   }
   else if (sscanf(name, "cpu%u%s", , s) == 1) {
  hud_cpu_graph_install(pane, i);
   }
+  else if (strcmp(name, "API-thread-busy") == 0) {
+ hud_thread_busy_install(pane, name, false);
+  }
   else if (strcmp(name, "main-thread-busy") == 0) {
- hud_main_thread_busy_install(pane, name);
+ hud_thread_busy_install(pane, name, true);
   }
 #if HAVE_GALLIUM_EXTRA_HUD
   else if (sscanf(name, "nic-rx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_RX);
   }
   else if (sscanf(name, "nic-tx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_TX);
   }
   else if (sscanf(name, "nic-rssi-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_RSSI_DBM);
diff --git a/src/gallium/auxiliary/hud/hud_cpu.c 
b/src/gallium/auxiliary/hud/hud_cpu.c
index 26f9fa7..38403f9 100644
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -25,20 +25,21 @@
  *
  **/
 
 /* This file contains code for reading CPU load for displaying on the HUD.
  */
 
 #include "hud/hud_private.h"
 #include "os/os_time.h"
 #include "os/os_thread.h"
 #include "util/u_memory.h"
+#include "util/u_queue.h"
 #include 
 #include 
 #ifdef PIPE_OS_WINDOWS
 #include 
 #endif
 
 
 #ifdef PIPE_OS_WINDOWS
 
 static inline uint64_t
@@ -224,33 +225,46 @@ hud_get_num_cpus(void)
uint64_t busy, total;
int i = 0;
 
while (get_cpu_stats(i, , ))
   i++;
 
return i;
 }
 
 struct thread_info {
+   bool main_thread;
int64_t last_time;
int64_t last_thread_time;
 };
 
 static void
 query_api_thread_busy_status(struct hud_graph *gr)
 {
struct thread_info *info = gr->query_data;
int64_t now = os_time_get_nano();
 
if (info->last_time) {
   if (info->last_time + gr->pane->period*1000 <= now) {
- int64_t thread_now = pipe_current_thread_get_time_nano();
+ int64_t thread_now;
+
+ if (info->main_thread) {
+thread_now = pipe_current_thread_get_time_nano();
+ } else {
+struct util_queue_monitoring *mon = gr->pane->hud->monitored_queue;
+
+if (mon && mon->queue)
+   thread_now = util_queue_get_thread_time_nano(mon->queue, 0);
+else
+   thread_now = 0;
+ }
+
  unsigned percent = (thread_now - info->last_thread_time) * 100 /
 (now - info->last_time);
 
  /* Check if the context changed a thread, so that we don't show
   * a random value. When a thread is changed, the new thread clock
   * is different, which can result in "percent" being very high.
   */
  if (percent > 100)
 percent = 0;
  hud_graph_add_value(gr, percent);
@@ -259,36 +273,37 @@ query_api_thread_busy_status(struct hud_graph *gr)
  info->last_time = now;
   }
} else {
   /* initialize */
   info->last_time = now;
   info->last_thread_time = pipe_current_thread_get_time_nano();
}
 }
 
 void
-hud_main_thread_busy_install(struct hud_pane *pane, const char *name)
+hud_thread_busy_install(struct hud_pane *pane, const char *name, bool main)
 {
struct hud_graph *gr;
 
gr = CALLOC_STRUCT(hud_graph);
if (!gr)
   return;
 
strcpy(gr->name, name);
 
gr->query_data = CALLOC_STRUCT(thread_info);
if (!gr->query_data) {
   FREE(gr);
   return;
}
 
+   ((struct thread_info*)gr->query_data)->main_thread = main;
gr->query_new_value = query_api_thread_busy_status;
 
/* Don't use free() as our callback as that messes up Gallium's
 * memory debugger.  Use simple free_query_data() wrapper.
 */
gr->free_query_data = free_query_data;
 
hud_pane_add_graph(pane, gr);
hud_pane_set_max_value(pane, 100);
 }
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index 580ceb3..b8726da 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -137,21 +137,21 @@ 

[Mesa-dev] [PATCH 05/11] gallium/hud: move struct hud_context to hud_private.h

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/hud/hud_context.c | 46 ---
 src/gallium/auxiliary/hud/hud_private.h | 48 +
 2 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index 922ab96..f32831b 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -32,83 +32,37 @@
  * The HUD is controlled with the GALLIUM_HUD environment variable.
  * Set GALLIUM_HUD=help for more info.
  */
 
 #include 
 #include 
 #include 
 
 #include "hud/hud_context.h"
 #include "hud/hud_private.h"
-#include "hud/font.h"
 
 #include "cso_cache/cso_context.h"
 #include "util/u_draw_quad.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_sampler.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_string.h"
 #include "util/u_upload_mgr.h"
 #include "tgsi/tgsi_text.h"
 #include "tgsi/tgsi_dump.h"
 
 /* Control the visibility of all HUD contexts */
 static boolean huds_visible = TRUE;
 
-struct hud_context {
-   struct pipe_context *pipe;
-   struct cso_context *cso;
-
-   struct hud_batch_query_context *batch_query;
-   struct list_head pane_list;
-
-   /* states */
-   struct pipe_blend_state no_blend, alpha_blend;
-   struct pipe_depth_stencil_alpha_state dsa;
-   void *fs_color, *fs_text;
-   struct pipe_rasterizer_state rasterizer, rasterizer_aa_lines;
-   void *vs;
-   struct pipe_vertex_element velems[2];
-
-   /* font */
-   struct util_font font;
-   struct pipe_sampler_view *font_sampler_view;
-   struct pipe_sampler_state font_sampler_state;
-
-   /* VS constant buffer */
-   struct {
-  float color[4];
-  float two_div_fb_width;
-  float two_div_fb_height;
-  float translate[2];
-  float scale[2];
-  float padding[2];
-   } constants;
-   struct pipe_constant_buffer constbuf;
-
-   unsigned fb_width, fb_height;
-
-   /* vertices for text and background drawing are accumulated here and then
-* drawn all at once */
-   struct vertex_queue {
-  float *vertices;
-  struct pipe_vertex_buffer vbuf;
-  unsigned max_num_vertices;
-  unsigned num_vertices;
-  unsigned buffer_size;
-   } text, bg, whitelines, color_prims;
-
-   bool has_srgb;
-};
 
 #ifdef PIPE_OS_UNIX
 static void
 signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
 {
huds_visible = !huds_visible;
 }
 #endif
 
 static void
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index bf9962d..f765bd9 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -22,21 +22,69 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **/
 
 #ifndef HUD_PRIVATE_H
 #define HUD_PRIVATE_H
 
 #include "pipe/p_context.h"
+#include "pipe/p_state.h"
 #include "util/list.h"
+#include "hud/font.h"
+
+struct hud_context {
+   struct pipe_context *pipe;
+   struct cso_context *cso;
+
+   struct hud_batch_query_context *batch_query;
+   struct list_head pane_list;
+
+   /* states */
+   struct pipe_blend_state no_blend, alpha_blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   void *fs_color, *fs_text;
+   struct pipe_rasterizer_state rasterizer, rasterizer_aa_lines;
+   void *vs;
+   struct pipe_vertex_element velems[2];
+
+   /* font */
+   struct util_font font;
+   struct pipe_sampler_view *font_sampler_view;
+   struct pipe_sampler_state font_sampler_state;
+
+   /* VS constant buffer */
+   struct {
+  float color[4];
+  float two_div_fb_width;
+  float two_div_fb_height;
+  float translate[2];
+  float scale[2];
+  float padding[2];
+   } constants;
+   struct pipe_constant_buffer constbuf;
+
+   unsigned fb_width, fb_height;
+
+   /* vertices for text and background drawing are accumulated here and then
+* drawn all at once */
+   struct vertex_queue {
+  float *vertices;
+  struct pipe_vertex_buffer vbuf;
+  unsigned max_num_vertices;
+  unsigned num_vertices;
+  unsigned buffer_size;
+   } text, bg, whitelines, color_prims;
+
+   bool has_srgb;
+};
 
 struct hud_graph {
/* initialized by common code */
struct list_head head;
struct hud_pane *pane;
float color[3];
float *vertices; /* ring buffer of vertices */
 
/* name and query */
char name[128];
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/11] util: move pipe_thread_is_self from gallium to src/util

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/os/os_thread.h | 11 ---
 src/gallium/state_trackers/nine/nine_state.c |  2 +-
 src/util/u_thread.h  | 12 
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/os/os_thread.h 
b/src/gallium/auxiliary/os/os_thread.h
index 0a238e5..10d4695 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -35,31 +35,20 @@
 
 #ifndef OS_THREAD_H_
 #define OS_THREAD_H_
 
 
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h" /* for assert */
 #include "util/u_thread.h"
 
 
-static inline int pipe_thread_is_self( thrd_t thread )
-{
-#if defined(HAVE_PTHREAD)
-#  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && 
defined(__GLIBC_MINOR__) && \
-  (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
-   return pthread_equal(pthread_self(), thread);
-#  endif
-#endif
-   return 0;
-}
-
 #define pipe_mutex_assert_locked(mutex) \
__pipe_mutex_assert_locked(&(mutex))
 
 static inline void
 __pipe_mutex_assert_locked(mtx_t *mutex)
 {
 #ifdef DEBUG
/* NOTE: this would not work for recursive mutexes, but
 * mtx_t doesn't support those
 */
diff --git a/src/gallium/state_trackers/nine/nine_state.c 
b/src/gallium/state_trackers/nine/nine_state.c
index 3093576..a9a41af 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -286,21 +286,21 @@ nine_context_get_pipe( struct NineDevice9 *device )
 }
 
 struct pipe_context *
 nine_context_get_pipe_multithread( struct NineDevice9 *device )
 {
 struct csmt_context *ctx = device->csmt_ctx;
 
 if (!device->csmt_active)
 return device->context.pipe;
 
-if (!pipe_thread_is_self(ctx->worker))
+if (!u_thread_is_self(ctx->worker))
 nine_csmt_process(device);
 
 return device->context.pipe;
 }
 
 struct pipe_context *
 nine_context_get_pipe_acquire( struct NineDevice9 *device )
 {
 nine_csmt_pause(device);
 return device->context.pipe;
diff --git a/src/util/u_thread.h b/src/util/u_thread.h
index 8eab3a5..6b5458a 100644
--- a/src/util/u_thread.h
+++ b/src/util/u_thread.h
@@ -21,20 +21,21 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  **/
 
 #ifndef U_THREAD_H_
 #define U_THREAD_H_
 
 #include 
+#include 
 
 #include "c11/threads.h"
 
 #ifdef HAVE_PTHREAD
 #include 
 #endif
 
 
 static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
 {
@@ -81,11 +82,22 @@ u_thread_get_time_nano(thrd_t thread)
clockid_t cid;
 
pthread_getcpuclockid(thread, );
clock_gettime(cid, );
return (int64_t)ts.tv_sec * 10 + ts.tv_nsec;
 #else
return 0;
 #endif
 }
 
+static inline bool u_thread_is_self(thrd_t thread)
+{
+#if defined(HAVE_PTHREAD)
+#  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && 
defined(__GLIBC_MINOR__) && \
+  (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
+   return pthread_equal(pthread_self(), thread);
+#  endif
+#endif
+   return false;
+}
+
 #endif /* U_THREAD_H_ */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/11] mesa/glthread: switch to u_queue and redesign the batch management

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

This mirrors exactly how u_threaded_context works.
If you understand this, you also understand u_threaded_context.
---
 src/mesa/main/glthread.c | 191 ++-
 src/mesa/main/glthread.h |  88 --
 src/mesa/main/marshal.h  |  10 ++-
 3 files changed, 91 insertions(+), 198 deletions(-)

diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index e90709c..d467298 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -31,156 +31,97 @@
  * quickly logs the GL commands to a buffer to be processed by a worker
  * thread.
  */
 
 #include "main/mtypes.h"
 #include "main/glthread.h"
 #include "main/marshal.h"
 #include "main/marshal_generated.h"
 #include "util/u_thread.h"
 
-static void
-glthread_allocate_batch(struct gl_context *ctx)
-{
-   struct glthread_state *glthread = ctx->GLThread;
-
-   /* TODO: handle memory allocation failure. */
-   glthread->batch = malloc(sizeof(*glthread->batch));
-   if (!glthread->batch)
-  return;
-   memset(glthread->batch, 0, offsetof(struct glthread_batch, buffer));
-}
 
 static void
-glthread_unmarshal_batch(struct gl_context *ctx, struct glthread_batch *batch,
- const bool release_batch)
+glthread_unmarshal_batch(void *job, int thread_index)
 {
+   struct glthread_batch *batch = (struct glthread_batch*)job;
+   struct gl_context *ctx = batch->ctx;
size_t pos = 0;
 
_glapi_set_dispatch(ctx->CurrentServerDispatch);
 
while (pos < batch->used)
   pos += _mesa_unmarshal_dispatch_cmd(ctx, >buffer[pos]);
 
assert(pos == batch->used);
-
-   if (release_batch)
-  free(batch);
-   else
-  batch->used = 0;
+   batch->used = 0;
 }
 
-static void *
-glthread_worker(void *data)
+static void
+glthread_thread_initialization(void *job, int thread_index)
 {
-   struct gl_context *ctx = data;
-   struct glthread_state *glthread = ctx->GLThread;
+   struct gl_context *ctx = (struct gl_context*)job;
 
ctx->Driver.SetBackgroundContext(ctx);
_glapi_set_context(ctx);
-
-   u_thread_setname("mesa_glthread");
-
-   pthread_mutex_lock(>mutex);
-
-   while (true) {
-  struct glthread_batch *batch;
-
-  /* Block (dropping the lock) until new work arrives for us. */
-  while (!glthread->batch_queue && !glthread->shutdown) {
- pthread_cond_broadcast(>work_done);
- pthread_cond_wait(>new_work, >mutex);
-  }
-
-  batch = glthread->batch_queue;
-
-  if (glthread->shutdown && !batch) {
- pthread_cond_broadcast(>work_done);
- pthread_mutex_unlock(>mutex);
- return NULL;
-  }
-  glthread->batch_queue = batch->next;
-  if (glthread->batch_queue_tail == >next)
- glthread->batch_queue_tail = >batch_queue;
-
-  glthread->busy = true;
-  pthread_mutex_unlock(>mutex);
-
-  glthread_unmarshal_batch(ctx, batch, true);
-
-  pthread_mutex_lock(>mutex);
-  glthread->busy = false;
-   }
-
-   /* UNREACHED */
-   return NULL;
 }
 
 void
 _mesa_glthread_init(struct gl_context *ctx)
 {
struct glthread_state *glthread = calloc(1, sizeof(*glthread));
 
if (!glthread)
   return;
 
+   if (!util_queue_init(>queue, "glthread", MARSHAL_MAX_BATCHES - 2,
+1, 0)) {
+  free(glthread);
+  return;
+   }
+
ctx->MarshalExec = _mesa_create_marshal_table(ctx);
if (!ctx->MarshalExec) {
+  util_queue_destroy(>queue);
   free(glthread);
   return;
}
 
-   ctx->CurrentClientDispatch = ctx->MarshalExec;
-
-   pthread_mutex_init(>mutex, NULL);
-   pthread_cond_init(>new_work, NULL);
-   pthread_cond_init(>work_done, NULL);
+   for (unsigned i = 0; i < MARSHAL_MAX_BATCHES; i++) {
+  glthread->batches[i].ctx = ctx;
+  util_queue_fence_init(>batches[i].fence);
+   }
 
-   glthread->batch_queue_tail = >batch_queue;
+   ctx->CurrentClientDispatch = ctx->MarshalExec;
ctx->GLThread = glthread;
 
-   glthread_allocate_batch(ctx);
-
-   pthread_create(>thread, NULL, glthread_worker, ctx);
+   /* Execute the thread initialization function in the thread. */
+   struct util_queue_fence fence;
+   util_queue_fence_init();
+   util_queue_add_job(>queue, ctx, ,
+  glthread_thread_initialization, NULL);
+   util_queue_fence_wait();
+   util_queue_fence_destroy();
 }
 
 void
 _mesa_glthread_destroy(struct gl_context *ctx)
 {
struct glthread_state *glthread = ctx->GLThread;
 
if (!glthread)
   return;
 
-   _mesa_glthread_flush_batch(ctx);
+   _mesa_glthread_finish(ctx);
+   util_queue_destroy(>queue);
 
-   pthread_mutex_lock(>mutex);
-   glthread->shutdown = true;
-   pthread_cond_broadcast(>new_work);
-   pthread_mutex_unlock(>mutex);
-
-   /* Since this waits for the thread to exit, it means that all queued work
-* will have been completed.
-*/
-   pthread_join(glthread->thread, NULL);
-
-   pthread_cond_destroy(>new_work);
-   

[Mesa-dev] [PATCH 00/11] glthread cleanup, 4 whitelisted games!

2017-06-21 Thread Marek Olšák
Hi,

This series:
- cleans up glthread, mainly switches the implementation to u_queue
  and rearranges it so that it looks like gallium/u_threaded_context
- adds "performance" counters for monitoring glthread behavior and
  display them on the gallium HUD
- decreases the batch size for much better scalability
- whitelists 4 games

Here are the games and the performance deltas:
Alien Isolation: +17% (it varies depending on the location)
Borderlands 2: +50% (it varies depending on the location)
BioShock Infinite: +76% (benchmark)
Civilization 6: +20% (benchmark)

I would add more games, but not all games benefit, some even regress,
and I'm not often CPU-bound.

I'd like to see TF2 and CS:GO on that list eventually, but those are
the only games where glthread gets automatically disabled (as can be
observed on the HUD).

I'd also like to whitelist "Borderlands: The pre-sequel" eventually,
it should benefit too, but we need to test it first.

I personally don't plan to spend more time on glthread again in the near
future. The HUD monitoring can be used for estimating potential
performance improvement for slower CPUs even if you specifically are
not CPU-bound.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/11] gallium/hud: rename API-thread-busy to main-thread-busy

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/hud/hud_context.c | 4 ++--
 src/gallium/auxiliary/hud/hud_cpu.c | 4 ++--
 src/gallium/auxiliary/hud/hud_private.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index cb7ed44..922ab96 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1187,22 +1187,22 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   /* IF YOU CHANGE THIS, UPDATE print_help! */
   if (strcmp(name, "fps") == 0) {
  hud_fps_graph_install(pane);
   }
   else if (strcmp(name, "cpu") == 0) {
  hud_cpu_graph_install(pane, ALL_CPUS);
   }
   else if (sscanf(name, "cpu%u%s", , s) == 1) {
  hud_cpu_graph_install(pane, i);
   }
-  else if (strcmp(name, "API-thread-busy") == 0) {
- hud_api_thread_busy_install(pane);
+  else if (strcmp(name, "main-thread-busy") == 0) {
+ hud_main_thread_busy_install(pane, name);
   }
 #if HAVE_GALLIUM_EXTRA_HUD
   else if (sscanf(name, "nic-rx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_RX);
   }
   else if (sscanf(name, "nic-tx-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_DIRECTION_TX);
   }
   else if (sscanf(name, "nic-rssi-%s", arg_name) == 1) {
  hud_nic_graph_install(pane, arg_name, NIC_RSSI_DBM);
diff --git a/src/gallium/auxiliary/hud/hud_cpu.c 
b/src/gallium/auxiliary/hud/hud_cpu.c
index 302445d..26f9fa7 100644
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -259,29 +259,29 @@ query_api_thread_busy_status(struct hud_graph *gr)
  info->last_time = now;
   }
} else {
   /* initialize */
   info->last_time = now;
   info->last_thread_time = pipe_current_thread_get_time_nano();
}
 }
 
 void
-hud_api_thread_busy_install(struct hud_pane *pane)
+hud_main_thread_busy_install(struct hud_pane *pane, const char *name)
 {
struct hud_graph *gr;
 
gr = CALLOC_STRUCT(hud_graph);
if (!gr)
   return;
 
-   strcpy(gr->name, "API-thread-busy");
+   strcpy(gr->name, name);
 
gr->query_data = CALLOC_STRUCT(thread_info);
if (!gr->query_data) {
   FREE(gr);
   return;
}
 
gr->query_new_value = query_api_thread_busy_status;
 
/* Don't use free() as our callback as that messes up Gallium's
diff --git a/src/gallium/auxiliary/hud/hud_private.h 
b/src/gallium/auxiliary/hud/hud_private.h
index bbc5ec7..bf9962d 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -86,21 +86,21 @@ void hud_graph_add_value(struct hud_graph *gr, uint64_t 
value);
 
 /* graphs/queries */
 struct hud_batch_query_context;
 
 #define ALL_CPUS ~0 /* optionally set as cpu_index */
 
 int hud_get_num_cpus(void);
 
 void hud_fps_graph_install(struct hud_pane *pane);
 void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
-void hud_api_thread_busy_install(struct hud_pane *pane);
+void hud_main_thread_busy_install(struct hud_pane *pane, const char *name);
 void hud_pipe_query_install(struct hud_batch_query_context **pbq,
 struct hud_pane *pane, struct pipe_context *pipe,
 const char *name, unsigned query_type,
 unsigned result_index,
 uint64_t max_value,
 enum pipe_driver_query_type type,
 enum pipe_driver_query_result_type result_type,
 unsigned flags);
 boolean hud_driver_query_install(struct hud_batch_query_context **pbq,
  struct hud_pane *pane,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/11] mesa/glthread: remove HAVE_PTHREAD guards

2017-06-21 Thread Marek Olšák
From: Marek Olšák 

we are switching to util_queue.
---
 src/mapi/glapi/gen/gl_marshal.py |  5 +
 src/mesa/main/glthread.c |  4 
 src/mesa/main/glthread.h | 30 --
 src/mesa/main/marshal.c  |  4 
 src/mesa/main/marshal.h  | 27 ---
 5 files changed, 1 insertion(+), 69 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_marshal.py b/src/mapi/glapi/gen/gl_marshal.py
index f52b9b7..062afe5 100644
--- a/src/mapi/glapi/gen/gl_marshal.py
+++ b/src/mapi/glapi/gen/gl_marshal.py
@@ -59,34 +59,31 @@ def indent(delta = 3):
 class PrintCode(gl_XML.gl_print_base):
 def __init__(self):
 super(PrintCode, self).__init__()
 
 self.name = 'gl_marshal.py'
 self.license = license.bsd_license_template % (
 'Copyright (C) 2012 Intel Corporation', 'INTEL CORPORATION')
 
 def printRealHeader(self):
 print header
-print '#ifdef HAVE_PTHREAD'
-print
 print 'static inline int safe_mul(int a, int b)'
 print '{'
 print 'if (a < 0 || b < 0) return -1;'
 print 'if (a == 0 || b == 0) return 0;'
 print 'if (a > INT_MAX / b) return -1;'
 print 'return a * b;'
 print '}'
 print
 
 def printRealFooter(self):
-print
-print '#endif'
+pass
 
 def print_sync_call(self, func):
 call = 'CALL_{0}(ctx->CurrentServerDispatch, ({1}))'.format(
 func.name, func.get_called_parameter_string())
 if func.return_type == 'void':
 out('{0};'.format(call))
 else:
 out('return {0};'.format(call))
 
 def print_sync_dispatch(self, func):
diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index 455b829..e90709c 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -31,22 +31,20 @@
  * quickly logs the GL commands to a buffer to be processed by a worker
  * thread.
  */
 
 #include "main/mtypes.h"
 #include "main/glthread.h"
 #include "main/marshal.h"
 #include "main/marshal_generated.h"
 #include "util/u_thread.h"
 
-#ifdef HAVE_PTHREAD
-
 static void
 glthread_allocate_batch(struct gl_context *ctx)
 {
struct glthread_state *glthread = ctx->GLThread;
 
/* TODO: handle memory allocation failure. */
glthread->batch = malloc(sizeof(*glthread->batch));
if (!glthread->batch)
   return;
memset(glthread->batch, 0, offsetof(struct glthread_batch, buffer));
@@ -277,12 +275,10 @@ _mesa_glthread_finish(struct gl_context *ctx)
  _glapi_set_dispatch(dispatch);
   }
} else {
   _mesa_glthread_flush_batch_locked(ctx);
   while (glthread->batch_queue || glthread->busy)
  pthread_cond_wait(>work_done, >mutex);
}
 
pthread_mutex_unlock(>mutex);
 }
-
-#endif
diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
index 50c1db2..07bed38 100644
--- a/src/mesa/main/glthread.h
+++ b/src/mesa/main/glthread.h
@@ -22,22 +22,20 @@
  */
 
 #ifndef _GLTHREAD_H
 #define _GLTHREAD_H
 
 #include "main/mtypes.h"
 
 /* Command size is a number of bytes stored in a short. */
 #define MARSHAL_MAX_CMD_SIZE 65535
 
-#ifdef HAVE_PTHREAD
-
 #include 
 #include 
 #include 
 
 enum marshal_dispatch_cmd_id;
 
 struct glthread_state
 {
/** The worker thread that asynchronously processes our GL commands. */
pthread_t thread;
@@ -117,39 +115,11 @@ struct glthread_batch
uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
 };
 
 void _mesa_glthread_init(struct gl_context *ctx);
 void _mesa_glthread_destroy(struct gl_context *ctx);
 
 void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
 void _mesa_glthread_flush_batch(struct gl_context *ctx);
 void _mesa_glthread_finish(struct gl_context *ctx);
 
-#else /* HAVE_PTHREAD */
-
-static inline void
-_mesa_glthread_init(struct gl_context *ctx)
-{
-}
-
-static inline void
-_mesa_glthread_destroy(struct gl_context *ctx)
-{
-}
-
-static inline void
-_mesa_glthread_finish(struct gl_context *ctx)
-{
-}
-
-static inline void
-_mesa_glthread_restore_dispatch(struct gl_context *ctx)
-{
-}
-
-static inline void
-_mesa_glthread_flush_batch(struct gl_context *ctx)
-{
-}
-
-#endif /* !HAVE_PTHREAD */
 #endif /* _GLTHREAD_H*/
diff --git a/src/mesa/main/marshal.c b/src/mesa/main/marshal.c
index ae4efb5..4840f32 100644
--- a/src/mesa/main/marshal.c
+++ b/src/mesa/main/marshal.c
@@ -26,22 +26,20 @@
  * Custom functions for marshalling GL calls from the main thread to a worker
  * thread when automatic code generation isn't appropriate.
  */
 
 #include "main/enums.h"
 #include "main/macros.h"
 #include "marshal.h"
 #include "dispatch.h"
 #include "marshal_generated.h"
 
-#ifdef HAVE_PTHREAD
-
 struct marshal_cmd_Flush
 {
struct marshal_cmd_base cmd_base;
 };
 
 
 void
 _mesa_unmarshal_Flush(struct gl_context *ctx,
   const struct marshal_cmd_Flush *cmd)
 {
@@ -474,12 +472,10 @@ 

Re: [Mesa-dev] [PATCH 00/30] i965: Add support for I915_FORMAT_MOD_Y_TILED_CCS

2017-06-21 Thread Jason Ekstrand
On Fri, Jun 16, 2017 at 3:41 PM, Jason Ekstrand 
wrote:

> This series is a rework of Ben's series to enable the CCS format modifier.
> It started as an attempt to rebase his original patches on top of my
> resolve reworks inside the miptree code.  However, as I started to dive
> deeper, I found a number of subtle issues:
>
>  1) Thanks to the terrible set of INTEL_AUX_DISABLE_* flags that we use to
> choose what aux buffers to use, we were set up to never use CCS_E for
> any external buffers.  Even when Y-tiled on gen9, the most they would
> ever get was CCS_D.
>
>  2) Whether to do a full or partial resolve or not was based on is_scanout
> and not on the actual modifier.  If we use I915_FORMAT_MOD_Y_TILED (not
> the CCS modifier) and choose to use CCS_E with it, it would only get
> partial resolves and not full resolves.  Of course, this wasn't
> actually a problem thanks to problem 1 above.
>
>  3) If a user ever imported an image with I915_FORMAT_MOD_Y_TILED_CCS
> through EGL or GBM and did glClear on it, they would get a fast clear
> with no way to force a resolve before handing it off to the other
> process.  Since the other process doesn't know the clear color, this
> means that any blocks in the clear state in the surface will get
> whatever random clear color process B thinks it has.
>
>  4) There were three different places where we computed the pitch of the
> CCS and they all did so differently.  When we go to create the image,
> we would allocate the CCS with the same pitch as the main surface.  We
> would then calculate the CCS pitch with ISL when we created
> mt->mcs_buf.
> Finally, we had a different mechanism to compute the pitch when we pass
> it back to the user.  Fortunately, the first only caused us to over-
> allocate and I think the last two were equivalent (at least for the
> simple case) so nothing exploded.
>
>  5) Thanks again to our confusing aux enable/disable, we haven't been doing
> multisample fast-clears since cec30a666930ddb8476a9452a89364a24979ff62
> around a year ago.
>
> This series takes a bit more round-about approach to enabling the CCS
> modifier that should fix these issues:
>
>  * Patches 1-5 do a bit of refactoring and then rework the way we choose
>the type of aux compression to use.  They move us away from the crazy
>enable/disable system to a simple choice system.  This fixes (1) and (5)
>above.
>
>  * Patches 6-15 refactor things so that we have only one path for going
>from a __DRIimage to an intel_mipmap_tree.  This was rather painful
>because we have to be careful to take into account the differences
>between window system images regular images.
>
>  * Patches 16-22 rework image creation and import to use ISL to do their
>surface layout calculations.  Previously, all of the surface layout
>calculations were simply hand-rolled here.  In the particular case of
>images, the hand-rolling was fairly safe because they were only ever
>simple 2D non-array images.  However, with the addition of CCS, things
>were going to get a bit tricky.
>
>  * Patches 23-30 add support for I915_FORMAT_MOD_Y_TILED.
>
> I've tested this series on our Jenkins system which runs piglit as well as
> the OpenGL and OpenGL ES test suites.  Both piglit and the OpenGL ES suite
> have some number of EGL tests which I hope have tested some of this.  I've
> also tested with kmscube and have verified that I get basically the same
> bandwidth numbers as Ben got on his original series, so I think CCS is
> working properly.
>
> This series can be found here:
>
> https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/i965-ccs-mod
>

I've rebased on top of Topi's ISL reworks and force-pushed that branch.
You can see the result of the rebase there.


> Cc: Ben Widawsky 
> Cc: Daniel Stone 
> Cc: Varad Gautam 
> Cc: Chad Versace 
> Cc: Topi Pohjolainen 
>
> Ben Widawsky (6):
>   i965/miptree: Add a return for updating of winsys
>   i965/miptree: Allocate mt earlier in update winsys
>   i965: Support images with aux buffers
>   i965/miptree: Allocate mcs_buf for an image's CCS
>   i965: Pretend that CCS modified images are two planes
>   i965: Advertise the CCS modifier
>
> Jason Ekstrand (24):
>   i965/miptree: Delete the layered rendering resolve
>   i965/miptree: Rename the non_msrt_mcs functions to _ccs
>   i965: Don't bother with HiZ in renderbuffer_move_to_temp
>   i965: Clamp clear colors to the representable range
>   i965/miptree: Rework aux enabling
>   i965: Move the DRIimage -> miptree code to intel_mipmap_tree.c
>   i965/miptree: Pass the offset into create_for_bo in
> create_for_dri_image
>   i965/miptree: Add tile_x/y to total_width/height
>   i965/miptree: Set level_x/h in create_for_dri_image
>   i965: Use 

Re: [Mesa-dev] [PATCH 3/5] st/dri: add a drirc workaround for Rocket League

2017-06-21 Thread Marek Olšák
On Wed, Jun 21, 2017 at 4:32 PM, Roland Scheidegger  wrote:
> Am 21.06.2017 um 00:54 schrieb Marek Olšák:
>> From: Marek Olšák 
>>
>> This needs to be passed to gallium drivers.
>> ---
>>  src/gallium/include/pipe/p_defines.h| 6 ++
>>  src/gallium/state_trackers/dri/dri_screen.c | 5 +
>>  src/mesa/drivers/dri/common/drirc   | 4 
>>  src/mesa/drivers/dri/common/xmlpool/t_options.h | 4 
>>  4 files changed, 19 insertions(+)
>>
>> diff --git a/src/gallium/include/pipe/p_defines.h 
>> b/src/gallium/include/pipe/p_defines.h
>> index b195af4..2ccdf44 100644
>> --- a/src/gallium/include/pipe/p_defines.h
>> +++ b/src/gallium/include/pipe/p_defines.h
>> @@ -383,20 +383,26 @@ enum pipe_flush_flags
>>   */
>>  #define PIPE_CONTEXT_ROBUST_BUFFER_ACCESS (1 << 2)
>>
>>  /**
>>   * Prefer threaded pipe_context. It also implies that video codec functions
>>   * will not be used. (they will be either no-ops or NULL when threading is
>>   * enabled)
>>   */
>>  #define PIPE_CONTEXT_PREFER_THREADED   (1 << 3)
>>
>> +/**
>> + * Implicit and explicit derivatives after KILL behave as if KILL didn't
>> + * happen.
>> + */
>> +#define PIPE_SCREEN_ENABLE_CORRECT_TGSI_DERIVATIVES_AFTER_KILL (1 << 0)
> Nothing against descriptive names, but isn't that a bit excessive?

At least it doesn't need any comments or documentation. :)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] drirc: Add glsl_correct_derivatives_after_discard for The Witcher 2

2017-06-21 Thread Marek Olšák
Thanks. Applied locally to my branch. If I get an OK to push my
series, I'll push your patch too.

Marek

On Wed, Jun 21, 2017 at 10:40 PM, Edmondo Tommasina
 wrote:
> This fixes the long-standing problem with black transitions in The Wicher 2.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98238
> ---
> This patch depends on Marek's series:
> https://patchwork.freedesktop.org/series/26089/
>
>  src/mesa/drivers/dri/common/drirc | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/common/drirc 
> b/src/mesa/drivers/dri/common/drirc
> index 7d73b1218b..494d768312 100644
> --- a/src/mesa/drivers/dri/common/drirc
> +++ b/src/mesa/drivers/dri/common/drirc
> @@ -155,6 +155,10 @@ TODO: document the other workarounds.
>  
>   value="true"/>
>  
> +
> +
> + value="true"/>
> +
>  
>  
>  
> --
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 08/13] anv/blorp: Remove 3D subresource transition workaround

2017-06-21 Thread Nanley Chery
For 3D image subresources undergoing a layout transition via
PipelineBarrier, we increase the number of fast-cleared layers to match
the intended behaviour of KHR_maintenance1. When such subresources
undergo layout transitions between subpasses, we don't do this to avoid
failing incorrect CTS tests. Instead, unify the behaviour in both
scenarios, and wait for the CTS tests to catch up. See CL  for the
test fix and Vulkan issue #849 for more information.

On SKL+, this causes 3 test failures under:
dEQP-VK.pipeline.render_to_image.3d.*

v2: Add a reference to the Vulkan issue (Iago Toral).

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga  (v1)
Reviewed-by: Jason Ekstrand  (v1)
---
 src/intel/vulkan/anv_blorp.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 34a591815f..f583b8bd23 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1477,12 +1477,12 @@ anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
 
   /* Blorp likes to treat 2D_ARRAY and 3D the same. */
   uint32_t blorp_base_layer, blorp_layer_count;
-  if (view) {
- blorp_base_layer = view->base_array_layer;
- blorp_layer_count = view->array_len;
-  } else if (image->type == VK_IMAGE_TYPE_3D) {
+  if (image->type == VK_IMAGE_TYPE_3D) {
  blorp_base_layer = 0;
  blorp_layer_count = extent.depth;
+  } else if (view) {
+ blorp_base_layer = view->base_array_layer;
+ blorp_layer_count = view->array_len;
   } else {
  blorp_base_layer = subresourceRange->baseArrayLayer;
  blorp_layer_count = anv_get_layerCount(image, subresourceRange);
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Gallium: Passing drirc options to create_screen() and fixing Rocket League

2017-06-21 Thread Marek Olšák
On Wed, Jun 21, 2017 at 10:37 PM, Rob Clark  wrote:
> On Tue, Jun 20, 2017 at 6:54 PM, Marek Olšák  wrote:
>> Hi,
>>
>> This series updates pipe loaders so that flags such as drirc options
>> can be passed to create_screen(). I have compile-tested everything
>> except clover.
>>
>> The first pipe_screen flag is a drirc option to fix incorrect grass
>> rendering in Rocket League for radeonsi. Rocket League expects DirectX
>> behavior for partial derivative computations after discard/kill, but
>> radeonsi implements the more efficient but stricter OpenGL behavior
>> and that will remain our default behavior. The new screen flag forces
>> radeonsi to use the DX behavior for that game.
>>
>
> do we really want this to be a *global* option for the screen?

Yes. Shaders are pipe_screen (global) objects in radeonsi, so a
compiler option also has to be global. We can't look at the context
during the TGSI->LLVM translation.

>
> I'm just thinking, some drivers use lowering passes that internally
> generate kill's.  I *guess* it would only matter if they also had
> ddx/ddy instructions, but not sure.
>
> not really sure if this would actually be a problem or not..

Whether or not this affects you depends on how your hardware
implements kill/discard. Not just ddx/ddy, texture instructions
computing derivatives internally are affected by kill/discard too.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 11/13] anv/blorp: Add a surface-state-based CCS resolve function

2017-06-21 Thread Nanley Chery
This will be used in the next patch.

v2:
- Omit BLORP_BATCH_NO_EMIT_DEPTH_STENCIL (Jason Ekstrand)
- Update commit message.

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/vulkan/anv_blorp.c   | 38 ++
 src/intel/vulkan/anv_private.h |  6 ++
 2 files changed, 44 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 60ebbd9dec..c4c744868c 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1736,3 +1736,41 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer 
*cmd_buffer,
blorp_hiz_op(, , 0, 0, 1, op);
blorp_batch_finish();
 }
+
+void
+anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
+const struct anv_state surface_state,
+const struct anv_image * const image,
+const uint8_t level, const uint32_t layer_count,
+const enum blorp_fast_clear_op op)
+{
+   assert(cmd_buffer && image);
+
+   /* The resolved subresource range must have a CCS buffer. */
+   assert(level < anv_image_aux_levels(image));
+   assert(layer_count <= anv_image_aux_layers(image, level));
+   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->samples == 1);
+
+   /* Create a binding table for this surface state. */
+   uint32_t binding_table;
+   VkResult result =
+  binding_table_for_surface_state(cmd_buffer, surface_state,
+  _table);
+   if (result != VK_SUCCESS)
+  return;
+
+   struct blorp_batch batch;
+   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
+
+   struct blorp_surf surf;
+   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
+image->aux_usage == ISL_AUX_USAGE_CCS_E ?
+ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
+);
+
+   blorp_ccs_resolve_attachment(, binding_table, , level,
+layer_count, image->color_surface.isl.format,
+op);
+
+   blorp_batch_finish();
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index e538507fc2..303fe8e31f 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2116,6 +2116,12 @@ void
 anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
 enum blorp_hiz_op op);
+void
+anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
+const struct anv_state surface_state,
+const struct anv_image * const image,
+const uint8_t level, const uint32_t layer_count,
+const enum blorp_fast_clear_op op);
 
 void
 anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/13] anv: Adjust params of color buffer transitioning functions

2017-06-21 Thread Nanley Chery
Splitting out these fields will make the color buffer transitioning
function simpler when it gains more features.

v2: Remove unintended blank line (Iago Toral)

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/vulkan/anv_blorp.c   | 33 -
 src/intel/vulkan/anv_private.h |  4 ++--
 src/intel/vulkan/genX_cmd_buffer.c | 38 +-
 3 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index f583b8bd23..60ebbd9dec 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1433,11 +1433,16 @@ void anv_CmdResolveImage(
 void
 anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
-const struct isl_view *view,
-const VkImageSubresourceRange *subresourceRange)
+const uint32_t base_level, const uint32_t level_count,
+const uint32_t base_layer, uint32_t layer_count)
 {
assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
 
+   if (image->type == VK_IMAGE_TYPE_3D) {
+  assert(base_layer == 0);
+  assert(layer_count == anv_minify(image->extent.depth, base_level));
+   }
+
struct blorp_batch batch;
blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
 
@@ -1463,11 +1468,8 @@ anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pending_pipe_bits |=
   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
 
-   const uint32_t level_count =
-  view ? view->levels : anv_get_levelCount(image, subresourceRange);
for (uint32_t l = 0; l < level_count; l++) {
-  const uint32_t level =
- (view ? view->base_level : subresourceRange->baseMipLevel) + l;
+  const uint32_t level = base_level + l;
 
   const VkExtent3D extent = {
  .width = anv_minify(image->extent.width, level),
@@ -1475,24 +1477,13 @@ anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
  .depth = anv_minify(image->extent.depth, level),
   };
 
-  /* Blorp likes to treat 2D_ARRAY and 3D the same. */
-  uint32_t blorp_base_layer, blorp_layer_count;
-  if (image->type == VK_IMAGE_TYPE_3D) {
- blorp_base_layer = 0;
- blorp_layer_count = extent.depth;
-  } else if (view) {
- blorp_base_layer = view->base_array_layer;
- blorp_layer_count = view->array_len;
-  } else {
- blorp_base_layer = subresourceRange->baseArrayLayer;
- blorp_layer_count = anv_get_layerCount(image, subresourceRange);
-  }
+  if (image->type == VK_IMAGE_TYPE_3D)
+ layer_count = extent.depth;
 
   assert(level < anv_image_aux_levels(image));
-  assert(blorp_base_layer + blorp_layer_count <=
- anv_image_aux_layers(image, level));
+  assert(base_layer + layer_count <= anv_image_aux_layers(image, level));
   blorp_fast_clear(, , surf.surf->format,
-   level, blorp_base_layer, blorp_layer_count,
+   level, base_layer, layer_count,
0, 0, extent.width, extent.height);
}
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 2b9331b25e..e538507fc2 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2120,8 +2120,8 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
 void
 anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
-const struct isl_view *view,
-const VkImageSubresourceRange *subresourceRange);
+const uint32_t base_level, const uint32_t level_count,
+const uint32_t base_layer, uint32_t layer_count);
 
 enum isl_aux_usage
 anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index c9e5fac3d6..6a3e525eb3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -387,10 +387,10 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
 static void
 transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
 const struct anv_image *image,
+const uint32_t base_level, uint32_t level_count,
+uint32_t base_layer, uint32_t layer_count,
 VkImageLayout initial_layout,
-VkImageLayout final_layout,
-const struct isl_view *view,
-const VkImageSubresourceRange *subresourceRange)
+VkImageLayout final_layout)
 {
if (image->aux_usage != ISL_AUX_USAGE_CCS_E)
   return;
@@ -399,13 +399,20 @@ 

[Mesa-dev] [PATCH v2 07/13] anv/cmd_buffer: Adjust the image view reloc function

2017-06-21 Thread Nanley Chery
Make the function take in an image instead of an image view. This
enables us to record relocations for surfaces states created outside of
the anv_CreateImageView path.

v2 (Jason Ekstrand):
- Use image->offset instead of surf_offset in aux_offset calculation.

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/vulkan/genX_cmd_buffer.c | 45 +-
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 69b4812185..c9e5fac3d6 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -179,17 +179,20 @@ add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
 }
 
 static void
-add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
-  const struct anv_image_view *iview,
-  enum isl_aux_usage aux_usage,
-  struct anv_state state)
+add_image_relocs(struct anv_cmd_buffer * const cmd_buffer,
+ const struct anv_image * const image,
+ const VkImageAspectFlags aspect_mask,
+ const enum isl_aux_usage aux_usage,
+ const struct anv_state state)
 {
const struct isl_device *isl_dev = _buffer->device->isl_dev;
+   const uint32_t surf_offset = image->offset +
+  anv_image_get_surface_for_aspect_mask(image, aspect_mask)->offset;
 
-   add_surface_state_reloc(cmd_buffer, state, iview->bo, iview->offset);
+   add_surface_state_reloc(cmd_buffer, state, image->bo, surf_offset);
 
if (aux_usage != ISL_AUX_USAGE_NONE) {
-  uint32_t aux_offset = iview->offset + iview->image->aux_surface.offset;
+  uint32_t aux_offset = image->offset + image->aux_surface.offset;
 
   /* On gen7 and prior, the bottom 12 bits of the MCS base address are
* used to store other information.  This should be ok, however, because
@@ -203,7 +206,7 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
  anv_reloc_list_add(_buffer->surface_relocs,
 _buffer->pool->alloc,
 state.offset + isl_dev->ss.aux_addr_offset,
-iview->bo, aux_offset);
+image->bo, aux_offset);
   if (result != VK_SUCCESS)
  anv_batch_set_error(_buffer->batch, result);
}
@@ -542,9 +545,9 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
 .clear_color = clear_color,
 .mocs = cmd_buffer->device->default_mocs);
 
-add_image_view_relocs(cmd_buffer, iview,
-  state->attachments[i].aux_usage,
-  state->attachments[i].color_rt_state);
+add_image_relocs(cmd_buffer, iview->image, iview->aspect_mask,
+ state->attachments[i].aux_usage,
+ state->attachments[i].color_rt_state);
  } else {
 /* This field will be initialized after the first subpass
  * transition.
@@ -566,9 +569,9 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer 
*cmd_buffer,
 .clear_color = clear_color,
 .mocs = cmd_buffer->device->default_mocs);
 
-add_image_view_relocs(cmd_buffer, iview,
-  state->attachments[i].input_aux_usage,
-  state->attachments[i].input_att_state);
+add_image_relocs(cmd_buffer, iview->image, iview->aspect_mask,
+ state->attachments[i].input_aux_usage,
+ state->attachments[i].input_att_state);
  }
   }
 
@@ -1191,8 +1194,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 desc->image_view->no_aux_sampler_surface_state :
 desc->image_view->sampler_surface_state;
  assert(surface_state.alloc_size);
- add_image_view_relocs(cmd_buffer, desc->image_view,
-   desc->aux_usage, surface_state);
+ add_image_relocs(cmd_buffer, desc->image_view->image,
+  desc->image_view->aspect_mask,
+  desc->aux_usage, surface_state);
  break;
   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
  assert(stage == MESA_SHADER_FRAGMENT);
@@ -1204,8 +1208,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
desc->image_view->no_aux_sampler_surface_state :
desc->image_view->sampler_surface_state;
 assert(surface_state.alloc_size);
-add_image_view_relocs(cmd_buffer, desc->image_view,
-  desc->aux_usage, surface_state);
+add_image_relocs(cmd_buffer, desc->image_view->image,
+

[Mesa-dev] [PATCH v2 12/13] anv/blorp: Provide surface states for CCS resolves

2017-06-21 Thread Nanley Chery
In the future, we plan on using this method to resolve images whose
surface state fast-clear value is dynamically updated during command
buffer execution. Start using it now for testing and to reduce churn
later on.

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/vulkan/anv_blorp.c | 29 ++---
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index c4c744868c..7f6ed0efe4 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1493,7 +1493,6 @@ anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
 
 static void
 ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
-   struct blorp_batch *batch,
uint32_t att)
 {
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
@@ -1589,12 +1588,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
   return;
 
-   struct blorp_surf surf;
-   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
-att_state->aux_usage, );
-   if (att_state->fast_clear)
-  surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
-
/* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
 *
 *"When performing a render target resolve, PIPE_CONTROL with end of
@@ -1610,12 +1603,8 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pending_pipe_bits |=
   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
 
-   for (uint32_t layer = 0; layer < fb->layers; layer++) {
-  blorp_ccs_resolve(batch, ,
-iview->isl.base_level,
-iview->isl.base_array_layer + layer,
-iview->isl.format, resolve_op);
-   }
+   anv_ccs_resolve(cmd_buffer, att_state->color_rt_state, image,
+   iview->isl.base_level, fb->layers, resolve_op);
 
cmd_buffer->state.pending_pipe_bits |=
   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
@@ -1633,16 +1622,13 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
struct anv_subpass *subpass = cmd_buffer->state.subpass;
 
 
-   struct blorp_batch batch;
-   blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
-
for (uint32_t i = 0; i < subpass->color_count; ++i) {
   const uint32_t att = subpass->color_attachments[i].attachment;
   if (att == VK_ATTACHMENT_UNUSED)
  continue;
 
   assert(att < cmd_buffer->state.pass->attachment_count);
-  ccs_resolve_attachment(cmd_buffer, , att);
+  ccs_resolve_attachment(cmd_buffer, att);
}
 
if (subpass->has_resolve) {
@@ -1681,6 +1667,10 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
  const VkRect2D render_area = cmd_buffer->state.render_area;
 
  assert(src_iview->aspect_mask == dst_iview->aspect_mask);
+
+ struct blorp_batch batch;
+ blorp_batch_init(_buffer->device->blorp, , cmd_buffer, 0);
+
  resolve_image(, src_iview->image,
src_iview->isl.base_level,
src_iview->isl.base_array_layer,
@@ -1692,11 +1682,12 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer 
*cmd_buffer)
render_area.offset.x, render_area.offset.y,
render_area.extent.width, render_area.extent.height);
 
- ccs_resolve_attachment(cmd_buffer, , dst_att);
+ blorp_batch_finish();
+
+ ccs_resolve_attachment(cmd_buffer, dst_att);
   }
}
 
-   blorp_batch_finish();
 }
 
 void
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 10/13] blorp/clear: Add a binding-table-based CCS resolve function

2017-06-21 Thread Nanley Chery
v2:
- Do layered resolves.
(Jason Ekstrand):
- Replace "bt" suffix with "attachment".
- Rename helper function to prepare_ccs_resolve.
- Move blorp_params_init() into helper function.

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/blorp/blorp.h   | 11 
 src/intel/blorp/blorp_clear.c | 63 +++
 2 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 744c1b1ea0..d5226c2248 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -191,6 +191,17 @@ blorp_ccs_resolve(struct blorp_batch *batch,
   enum isl_format format,
   enum blorp_fast_clear_op resolve_op);
 
+/* Resolves subresources of the image subresource range specified in the
+ * binding table.
+ */
+void
+blorp_ccs_resolve_attachment(struct blorp_batch *batch,
+ const uint32_t binding_table_offset,
+ struct blorp_surf * const surf,
+ const uint32_t level, const uint32_t num_layers,
+ const enum isl_format format,
+ const enum blorp_fast_clear_op resolve_op);
+
 /**
  * For an overview of the HiZ operations, see the following sections of the
  * Sandy Bridge PRM, Volume 1, Part2:
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 0e523dfe5b..581cf63c49 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -703,16 +703,16 @@ blorp_clear_attachments(struct blorp_batch *batch,
batch->blorp->exec(batch, );
 }
 
-void
-blorp_ccs_resolve(struct blorp_batch *batch,
-  struct blorp_surf *surf, uint32_t level, uint32_t layer,
-  enum isl_format format,
-  enum blorp_fast_clear_op resolve_op)
+static void
+prepare_ccs_resolve(struct blorp_batch * const batch,
+struct blorp_params * const params,
+const struct blorp_surf * const surf,
+const uint32_t level, const uint32_t layer,
+const enum isl_format format,
+const enum blorp_fast_clear_op resolve_op)
 {
-   struct blorp_params params;
-   blorp_params_init();
-
-   brw_blorp_surface_info_init(batch->blorp, , surf,
+   blorp_params_init(params);
+   brw_blorp_surface_info_init(batch->blorp, >dst, surf,
level, layer, format, true);
 
/* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
@@ -725,7 +725,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
 * multiply by 8 and 16. On Sky Lake, we multiply by 8.
 */
const struct isl_format_layout *aux_fmtl =
-  isl_format_get_layout(params.dst.aux_surf.format);
+  isl_format_get_layout(params->dst.aux_surf.format);
assert(aux_fmtl->txc == ISL_TXC_CCS);
 
unsigned x_scaledown, y_scaledown;
@@ -739,11 +739,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
   x_scaledown = aux_fmtl->bw / 2;
   y_scaledown = aux_fmtl->bh / 2;
}
-   params.x0 = params.y0 = 0;
-   params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, level);
-   params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, level);
-   params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
-   params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
+   params->x0 = params->y0 = 0;
+   params->x1 = minify(params->dst.aux_surf.logical_level0_px.width, level);
+   params->y1 = minify(params->dst.aux_surf.logical_level0_px.height, level);
+   params->x1 = ALIGN(params->x1, x_scaledown) / x_scaledown;
+   params->y1 = ALIGN(params->y1, y_scaledown) / y_scaledown;
 
if (batch->blorp->isl_dev->info->gen >= 9) {
   assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL ||
@@ -752,7 +752,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
   /* Broadwell and earlier do not have a partial resolve */
   assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
}
-   params.fast_clear_op = resolve_op;
+   params->fast_clear_op = resolve_op;
 
/* Note: there is no need to initialize push constants because it doesn't
 * matter what data gets dispatched to the render target.  However, we must
@@ -760,8 +760,37 @@ blorp_ccs_resolve(struct blorp_batch *batch,
 * color" message.
 */
 
-   if (!blorp_params_get_clear_kernel(batch->blorp, , true))
+   if (!blorp_params_get_clear_kernel(batch->blorp, params, true))
   return;
+}
+
+void
+blorp_ccs_resolve(struct blorp_batch *batch,
+  struct blorp_surf *surf, uint32_t level, uint32_t layer,
+  enum isl_format format,
+  enum blorp_fast_clear_op resolve_op)
+{
+   struct blorp_params params;
+
+   prepare_ccs_resolve(batch, , surf, level, layer, format, resolve_op);
+
+   batch->blorp->exec(batch, );
+}
+
+void

[Mesa-dev] [PATCH v2 05/13] anv: Add and use color auxiliary buffer helpers

2017-06-21 Thread Nanley Chery
v2:
- Check for aux levels in layer helper (Jason Ekstrand)
- Don't assert aux is present, return 0 if it isn't.
- Use the helpers.
v3:
- Make the helpers aspect-agnostic (Jason Ekstrand)
- Drop anv_image_has_color_aux()

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga  (v2)
---
 src/intel/vulkan/anv_blorp.c   |  3 +++
 src/intel/vulkan/anv_private.h | 29 +
 2 files changed, 32 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index a869eebc24..34a591815f 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1488,6 +1488,9 @@ anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
  blorp_layer_count = anv_get_layerCount(image, subresourceRange);
   }
 
+  assert(level < anv_image_aux_levels(image));
+  assert(blorp_base_layer + blorp_layer_count <=
+ anv_image_aux_layers(image, level));
   blorp_fast_clear(, , surf.surf->format,
level, blorp_base_layer, blorp_layer_count,
0, 0, extent.width, extent.height);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index fe6ac3bc1b..2b9331b25e 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2071,6 +2071,35 @@ struct anv_image {
struct anv_surface aux_surface;
 };
 
+/* Returns the number of auxiliary buffer levels attached to an image. */
+static inline uint8_t
+anv_image_aux_levels(const struct anv_image * const image)
+{
+   assert(image);
+   return image->aux_surface.isl.size > 0 ? image->aux_surface.isl.levels : 0;
+}
+
+/* Returns the number of auxiliary buffer layers attached to an image. */
+static inline uint32_t
+anv_image_aux_layers(const struct anv_image * const image,
+ const uint8_t miplevel)
+{
+   assert(image);
+
+   /* The miplevel must exist in the main buffer. */
+   assert(miplevel < image->levels);
+
+   if (miplevel >= anv_image_aux_levels(image)) {
+  /* There are no layers with auxiliary data because the miplevel has no
+   * auxiliary data.
+   */
+  return 0;
+   } else {
+  return MAX2(image->aux_surface.isl.logical_level0_px.array_len,
+  image->aux_surface.isl.logical_level0_px.depth >> miplevel);
+   }
+}
+
 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
 static inline bool
 anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 13/13] anv/gpu_memcpy: Rename the gpu_memcpy function

2017-06-21 Thread Nanley Chery
A GPU memcpy function could alternatively be implemented using MI_*
commands. Provide more detail into how this one operates in case another
memcpy function is created.

v2:
- Update the commit message.
v3:
- Use 'memcpy' instead of 'cpy' (Jason Ekstrand)
- Shorten 'streamout' to 'so'

Suggested-by: Jason Ekstrand 
Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga  (v2)
---
 src/intel/vulkan/anv_genX.h| 8 
 src/intel/vulkan/genX_cmd_buffer.c | 6 +++---
 src/intel/vulkan/genX_gpu_memcpy.c | 8 
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index 67147b0e92..8da5e075dc 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -64,10 +64,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct 
anv_batch *batch,
  VkShaderStageFlags active_stages,
  const unsigned entry_size[4]);
 
-void genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_bo *dst, uint32_t dst_offset,
- struct anv_bo *src, uint32_t src_offset,
- uint32_t size);
+void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+struct anv_bo *dst, uint32_t dst_offset,
+struct anv_bo *src, uint32_t src_offset,
+uint32_t size);
 
 void genX(blorp_exec)(struct blorp_batch *batch,
   const struct blorp_params *params);
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 6a3e525eb3..53c58ca5b3 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -710,9 +710,9 @@ genX(CmdExecuteCommands)(
  struct anv_state dst_state = secondary->state.render_pass_states;
  assert(src_state.alloc_size == dst_state.alloc_size);
 
- genX(cmd_buffer_gpu_memcpy)(primary, ss_bo, dst_state.offset,
- ss_bo, src_state.offset,
- src_state.alloc_size);
+ genX(cmd_buffer_so_memcpy)(primary, ss_bo, dst_state.offset,
+ss_bo, src_state.offset,
+src_state.alloc_size);
   }
 
   anv_cmd_buffer_add_secondary(primary, secondary);
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
b/src/intel/vulkan/genX_gpu_memcpy.c
index 3cbc7235cf..5ef35e6283 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -52,10 +52,10 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
 }
 
 void
-genX(cmd_buffer_gpu_memcpy)(struct anv_cmd_buffer *cmd_buffer,
-struct anv_bo *dst, uint32_t dst_offset,
-struct anv_bo *src, uint32_t src_offset,
-uint32_t size)
+genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
+   struct anv_bo *dst, uint32_t dst_offset,
+   struct anv_bo *src, uint32_t src_offset,
+   uint32_t size)
 {
if (size == 0)
   return;
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 04/13] intel/isl: Only create a CCS buffer if the image supports rendering

2017-06-21 Thread Nanley Chery
v2: Omit the commit message.

Signed-off-by: Nanley Chery 
Reviewed-by: Jason Ekstrand  (v1)
Reviewed-by: Iago Toral Quiroga 
---
 src/intel/isl/isl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index ba56d86c17..bbbdb19df2 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -1699,7 +1699,7 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
if (ISL_DEV_GEN(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
   return false;
 
-   if (isl_format_is_compressed(surf->format))
+   if (!isl_format_supports_rendering(dev->info, surf->format))
   return false;
 
/* TODO: More conditions where it can fail. */
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 02/13] intel/blorp: Check for layer fast-clear restriction

2017-06-21 Thread Nanley Chery
v2: Update commit title (Jason Ekstrand)

Signed-off-by: Nanley Chery 
Reviewed-by: Iago Toral Quiroga  (v1)
---
 src/intel/blorp/blorp_clear.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index 369e18726f..0e523dfe5b 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -308,6 +308,11 @@ blorp_fast_clear(struct blorp_batch *batch,
  uint32_t level, uint32_t start_layer, uint32_t num_layers,
  uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
 {
+   /* Ensure that all layers undergoing the clear have an auxiliary buffer. */
+   assert(start_layer + num_layers <=
+  MAX2(surf->aux_surf->logical_level0_px.depth >> level,
+   surf->aux_surf->logical_level0_px.array_len));
+
struct blorp_params params;
blorp_params_init();
params.num_layers = num_layers;
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/11] intel/genxml: Add better support for MI_MATH in gen10

2017-06-21 Thread Rafael Antognolli
Reviewed-by: Rafael Antognolli 

On Tue, Jun 13, 2017 at 11:28:20AM -0700, Anuj Phogat wrote:
> Signed-off-by: Anuj Phogat 
> ---
>  src/intel/genxml/gen10.xml | 69 
> +++---
>  1 file changed, 65 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
> index ebeb5da8f..26dba22 100644
> --- a/src/intel/genxml/gen10.xml
> +++ b/src/intel/genxml/gen10.xml
> @@ -968,6 +968,69 @@
>   type="s1.6"/>
>
>  
> +  
> +
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +
> +
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +
> +
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +  
> +
> +  
> +
>
>  
>   default="3"/>
> @@ -3175,10 +3238,8 @@
>  
>   default="26"/>
>  
> -
> -
> -
> -  
> +
> +   type="MI_MATH_ALU_INSTRUCTION"/>
>  
>
>  
> -- 
> 2.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/13] anv/blorp: Remove 3D subresource transition workaround

2017-06-21 Thread Nanley Chery
On Wed, Jun 21, 2017 at 09:04:01AM +0200, Iago Toral wrote:
> On Tue, 2017-06-20 at 12:21 -0700, Nanley Chery wrote:
> > On Mon, Jun 19, 2017 at 04:19:36PM -0700, Jason Ekstrand wrote:
> > > On Wed, Jun 14, 2017 at 3:06 PM, Nanley Chery  > > m> wrote:
> > > 
> > > > On Wed, Jun 14, 2017 at 09:32:22AM +0200, Iago Toral wrote:
> > > > > On Tue, 2017-06-13 at 11:41 -0700, Nanley Chery wrote:
> > > > > > For 3D image subresources undergoing a layout transition via
> > > > > > PipelineBarrier, we increase the number of fast-cleared
> > > > > > layers to
> > > > > > match
> > > > > > the intended behaviour of KHR_maintenance1. When such
> > > > > > subresources
> > > > > > undergo layout transitions between subpasses, we don't do
> > > > > > this to
> > > > > > avoid
> > > > > > failing incorrect CTS tests. Instead, unify the behaviour in
> > > > > > both
> > > > > > scenarios, and wait for the CTS tests to catch up. See CL
> > > > > >  for
> > > > > > the
> > > > > > test fix.
> > > > > > 
> > > > > > On SKL+, this causes 3 test failures under:
> > > > > > dEQP-VK.pipeline.render_to_image.3d.*
> > > > > > 
> > > > > > Signed-off-by: Nanley Chery 
> > > > > > ---
> > > > > >  src/intel/vulkan/anv_blorp.c | 8 
> > > > > >  1 file changed, 4 insertions(+), 4 deletions(-)
> > > > > > 
> > > > > > diff --git a/src/intel/vulkan/anv_blorp.c
> > > > > > b/src/intel/vulkan/anv_blorp.c
> > > > > > index 421f860428..ff3d7b126f 100644
> > > > > > --- a/src/intel/vulkan/anv_blorp.c
> > > > > > +++ b/src/intel/vulkan/anv_blorp.c
> > > > > > @@ -1478,12 +1478,12 @@ anv_image_ccs_clear(struct
> > > > > > anv_cmd_buffer
> > > > > > *cmd_buffer,
> > > > > > 
> > > > > >    /* Blorp likes to treat 2D_ARRAY and 3D the same. */
> > > > > >    uint32_t blorp_base_layer, blorp_layer_count;
> > > > > > -  if (view) {
> > > > > > - blorp_base_layer = view->base_array_layer;
> > > > > > - blorp_layer_count = view->array_len;
> > > > > > -  } else if (image->type == VK_IMAGE_TYPE_3D) {
> > > > > 
> > > > > Maybe add a comment referencing the requirement from
> > > > > VK_KHR_maintenance1 so it is clear why we ignore the view for
> > > > > 3D images
> > > > > here?
> > > > > 
> > > > 
> > > > Thank you for suggesting I add a comment. I actually meant to
> > > > double-check this before sending it out, but forgot. In the
> > > > process of
> > > > writing the comment, I discovered that the desired behaviour for
> > > > this
> > > > part of the extension is still being determined (Vulkan issue
> > > > #849).
> > > > 
> > > 
> > > Issue #849 was resolved today.  This patch is correct.
> > > 
> > > Reviewed-by: Jason Ekstrand 
> > > 
> > > --Jason
> > > 
> > > 
> > 
> > Thanks for the update and review! Given how recent the resolution is,
> > I
> > don't have any supporting specification text to reference in a code
> > comment so I plan to leave the patch as is.
> 
> Ok, I think that is fine for now.
> 
> If you think it is worth it, maybe add a comment saying that this is
> the result of an issue filed against against VK_KHR_maintenance1, so
> people know where it comes from.
> 
> Iago
> 

I'll update the comment to reference the issue number.

> > -Nanley
> > 
> > > > > > +  if (image->type == VK_IMAGE_TYPE_3D) {
> > > > > >   blorp_base_layer = 0;
> > > > > >   blorp_layer_count = extent.depth;
> > > > > > +  } else if (view) {
> > > > > > + blorp_base_layer = view->base_array_layer;
> > > > > > + blorp_layer_count = view->array_len;
> > > > > >    } else {
> > > > > >   blorp_base_layer = subresourceRange-
> > > > > > >baseArrayLayer;
> > > > > >   blorp_layer_count = anv_get_layerCount(image,
> > > > > > subresourceRange);
> > > > 
> > > > ___
> > > > mesa-dev mailing list
> > > > mesa-dev@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > > > 
> > 
> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: use the correct LLVMTargetMachineRef in si_build_shader_variant

2017-06-21 Thread Marek Olšák
On Tue, Jun 20, 2017 at 10:56 PM, Samuel Pitoiset
 wrote:
>
>
> On 06/20/2017 10:53 PM, Samuel Pitoiset wrote:
>>
>> I have just tested this patch with Hero Siege, it still crashes.
>>
>> As you said, it's definitely a multithreading issue because it crashes
>> differently all the time.
>>
>> Let me know if you need more information.
>
>
> FWIW, "Peace, Death!" and "Riptale" [1] are also affected by the issue (all
> three games use the same engine).
>
> [1] https://www.gamingonlinux.com/wiki/Games_broken_on_Mesa

You said "it still crashes". Does that mean there was a point in the
past when it didn't crash?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/CFL: Add PCI Ids for Coffee Lake.

2017-06-21 Thread Srivatsa, Anusha


>-Original Message-
>From: Anuj Phogat [mailto:anuj.pho...@gmail.com]
>Sent: Wednesday, June 21, 2017 12:32 PM
>To: Srivatsa, Anusha 
>Cc: mesa-dev@lists.freedesktop.org; Vivi, Rodrigo ;
>Phogat, Anuj ; Widawsky, Benjamin
>
>Subject: Re: [Mesa-dev] [PATCH] i965/CFL: Add PCI Ids for Coffee Lake.
>
>On Wed, Jun 21, 2017 at 11:19 AM, Anusha Srivatsa 
>wrote:
>> Coffee Lake has a gen9 graphics following KBL.
>> From 3D perspective, CFL is a clone of KBL/SKL features.
>>
>> v2: Change commit message, correct alignment 
>> v3: Update IDs.
>>
>> Cc: Benjamin Widawsky 
>> Cc: Anuj Phogat 
>> Cc: Rodrigo Vivi 
>> Signed-off-by: Anusha Srivatsa 
>> ---
>>  include/pci_ids/i965_pci_ids.h | 11 +++
>>  src/intel/common/gen_device_info.c | 23 +++
>> src/intel/common/gen_device_info.h |  1 +
>>  3 files changed, 35 insertions(+)
>>
>> diff --git a/include/pci_ids/i965_pci_ids.h
>> b/include/pci_ids/i965_pci_ids.h index b296359..5e9566c 100644
>> --- a/include/pci_ids/i965_pci_ids.h
>> +++ b/include/pci_ids/i965_pci_ids.h
>> @@ -165,6 +165,17 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus
>> Graphics 650 (Kaby Lake GT3)")  CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake
>GT4")
>>  CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
>>  CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
>> +CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 6x1)")
>> +CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 4x1)")
>(Coffeelake 2x6 GT1). All CFL GT1 have 2 subslices and 6 EU/subslice.
Oops Thanks for pointing out.
Will send revised patch soon.

>> +CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 4x2)")
>> +CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
>> +CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
>> +CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
>(Coffeelake 3x8 GT2). All CFL GT2 have 3 subslices and 8 EU/subslice.
>> +CHIPSET(0x3E94, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
>This is GT2 with 24 EUs.
>> +CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
>> +CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
>> +CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
>> +CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
>(Coffeelake 3x8 GT3). All CFL GT2 have 3 subslices and 8 EU/subslice.
>>  CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8
>> GT0.5)")  CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake
>> 2x8 GT0.5)")  CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics
>> (Cannonlake 3x8 GT1)") diff --git a/src/intel/common/gen_device_info.c
>> b/src/intel/common/gen_device_info.c
>> index 423748e..d49beaa 100644
>> --- a/src/intel/common/gen_device_info.c
>> +++ b/src/intel/common/gen_device_info.c
>> @@ -607,6 +607,29 @@ static const struct gen_device_info
>gen_device_info_glk_2x6 = {
>> .is_geminilake = true,
>>  };
>>
>> +static const struct gen_device_info gen_device_info_cfl_gt1 = {
>> +   GEN9_FEATURES,
>> +   .is_coffeelake = true,
>> +   .gt = 1,
>> +
>> +   .num_slices = 1,
>Also initialize l3_banks variable.
Sure. Thanks again,

Anusha
>> +};
>> +static const struct gen_device_info gen_device_info_cfl_gt2 = {
>> +   GEN9_FEATURES,
>> +   .is_coffeelake = true,
>> +   .gt = 2,
>> +
>> +   .num_slices = 1,
>Here too.
>> +};
>> +
>> +static const struct gen_device_info gen_device_info_cfl_gt3 = {
>> +   GEN9_FEATURES,
>> +   .is_coffeelake = true,
>> +   .gt = 3,
>> +
>> +   .num_slices = 2,
>and here.
>> +};
>> +
>>  #define GEN10_HW_INFO   \
>> .gen = 10,   \
>> .num_thread_per_eu = 7,  \
>> diff --git a/src/intel/common/gen_device_info.h
>> b/src/intel/common/gen_device_info.h
>> index cc83857..a83251c 100644
>> --- a/src/intel/common/gen_device_info.h
>> +++ b/src/intel/common/gen_device_info.h
>> @@ -46,6 +46,7 @@ struct gen_device_info
>> bool is_broxton;
>> bool is_kabylake;
>> bool is_geminilake;
>> +   bool is_coffeelake;
>> bool is_cannonlake;
>>
>> bool has_hiz_and_separate_stencil;
>> --
>> 2.7.4
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Mike Lothian
On Thu, 22 Jun 2017 at 00:12 Matt Turner  wrote:

> On Wed, Jun 21, 2017 at 3:50 PM, Mike Lothian  wrote:
> > Do intel run mesa through any of their test boxes like they do with
> kernel
> > patches?
>
> Don't top quote.
>
> Yes, we have a CI system that we use extensively and has massively
> reduced the number of regressions we have. Immense thanks to Mark
> Janes for all of the work he's done on it.
>
> In this instance, the CI didn't catch it because it tests debug builds
> (as you'd expect), so the assert and the function it called were
> executed. In release builds, however, the assert and that necessary
> function call were removed.
>

Sorry, top posting is the default in Inbox and there doesn't appear to be
away to change it

I wasn't criticising, it's very rare that a serious bug creaps into master,
having said that it would have been nice to have a response on IRC when the
issue was pointed out

Least it's fixed now and I'm back to testing master again

Mike

>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] egl/dri2: add image extension to swrast_core_extensions

2017-06-21 Thread Gurchetan Singh
Emil,

If I understand you correctly, you're proposing to add the ability to use
the kms_swrast driver in platform_x11.c (the host is a standard Ubuntu box
for the emulator use case, not CrOS) alongside swrast.

In that case, we would need to:

1) Have a dri2_initialize_x11_kms_swrast function that's called when some
environment variable is set instead of dri2_initialize_x11_swrast.
2) dri2_initialize_x11_kms_swrast would need access to the host card fd
(dri_kms_init_screen requires this) and call dri2_load_driver instead of
dri2_load_driver_swrast .
3) Use dri2_loader_extensions instead of swrast_loader_extensions,
dri2_x11_display_vtbl instead dri2_x11_swrast_display_vtbl etc.

I'm having trouble getting this to work, and I was wondering if what I'm
trying to do is what you want.  Attached is the patch I'm trying (it
compiles, but will crash your display).

Regarding the issues with the emulator, I filed a bug based your comments
and the emulator team has started looking at it (see
https://android-review.googlesource.com/#/c/418541/).


On Tue, Jun 20, 2017 at 1:19 AM, Emil Velikov 
wrote:

> On 19 June 2017 at 20:46, Chad Versace  wrote:
> > On Thu 15 Jun 2017, Gurchetan Singh wrote:
> >> Emil, would you be fine with leaving the image extension in dri2.c but
> still
> >> adding it as a drisw extension?  That solution would look like:
> >>
> >> [1]https://patchwork.freedesktop.org/patch/154807/
> >
> > Observations:
> > - src/gallium/state_trackers/dri/dri2.c:dri2ImageExtension
> advertises v15 of __DRI_IMAGE.
> > - egl_dri2.c requires only v1 of __DRI_IMAGE. Maybe a higher version
> >   is required in practive, but the egl_dri2.c code checks only for
> v1.
> >
> > Questions:
> > 1. All functions implemented in dri2.c:dri2ImageExtensions, do they
> >under swrast? Honest question, because I'm no expert on
> >gallium.
> >
> > If question #1 is true, then I see no problem with your latest plan. But
> > maybe Emil does.
> >
> > If question #1 is false, it should be straightforward to implement in
> > drisw.c the small subset of __DRI_IMAGE functions required for v1.
>
> While I haven't checked how much [or well] DRI_IMAGE works with
> swrast, there's no need to actually add it there.
> An alternative is to add kms_swrast support for EGL like we already do
> for GBM, as mentioned earlier [1].
>
> Gents, keep in mind that:
>  - one cannot pull DRM specifics (dri2.c) code within drisw.c, and
>  - DRI_IMAGE pulls DRM specifics, hence adding it into drisw.c is
> again a no-go :-\
>
> FWIW the above architectural split applies for classic drivers as
> well. swrast_dri.so simply cannot depend on anything DRM related.
>
> -Emil
>
> [1] https://lists.freedesktop.org/archives/mesa-dev/2017-June/159519.html
>
From cf984192dba114a91630f5d9fb6cf46061e64d68 Mon Sep 17 00:00:00 2001
From: Gurchetan Singh 
Date: Mon, 19 Jun 2017 16:09:09 -0700
Subject: [PATCH] egl/dri2: Add kms_swrast in platform_x11

---
 src/egl/drivers/dri2/platform_x11.c | 100 +++-
 1 file changed, 53 insertions(+), 47 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 95e560a32a..19e4b0c5ca 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1224,52 +1224,6 @@ disconnect:
return _eglError(EGL_BAD_ALLOC, msg);
 }
 
-static EGLBoolean
-dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
-{
-   struct dri2_egl_display *dri2_dpy;
-
-   dri2_dpy = calloc(1, sizeof *dri2_dpy);
-   if (!dri2_dpy)
-  return _eglError(EGL_BAD_ALLOC, "eglInitialize");
-
-   dri2_dpy->fd = -1;
-   if (!dri2_get_xcb_connection(drv, disp, dri2_dpy))
-  goto cleanup;
-
-   /*
-* Every hardware driver_name is set using strdup. Doing the same in
-* here will allow is to simply free the memory at dri2_terminate().
-*/
-   dri2_dpy->driver_name = strdup("swrast");
-   if (!dri2_load_driver_swrast(disp))
-  goto cleanup;
-
-   dri2_dpy->loader_extensions = swrast_loader_extensions;
-
-   if (!dri2_create_screen(disp))
-  goto cleanup;
-
-   if (!dri2_setup_extensions(disp))
-  goto cleanup;
-
-   dri2_setup_screen(disp);
-
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
-  goto cleanup;
-
-   /* Fill vtbl last to prevent accidentally calling virtual function during
-* initialization.
-*/
-   dri2_dpy->vtbl = _x11_swrast_display_vtbl;
-
-   return EGL_TRUE;
-
- cleanup:
-   dri2_display_destroy(disp);
-   return EGL_FALSE;
-}
-
 static void
 dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
 {
@@ -1422,6 +1376,58 @@ static const __DRIextension *dri2_loader_extensions[] = {
NULL,
 };
 
+static EGLBoolean
+dri2_initialize_x11_kms_swrast(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy;
+
+   dri2_dpy = calloc(1, sizeof *dri2_dpy);

[Mesa-dev] [RFC mesa] travis: add couple missing proto+lib

2017-06-21 Thread Eric Engestrom
> configure: error: Package requirements (x11 xext xdamage >= 1.1 xfixes
> x11-xcb xcb xcb-glx >= 1.8.1 xcb-dri2 >= 1.8) were not met:
> No package 'xdamage' found
> No package 'xfixes' found

They each depend on their own *proto.

Signed-off-by: Eric Engestrom 
---
 .travis.yml | 20 
 1 file changed, 20 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 82d21eb539..f73f3c699f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,6 +19,10 @@ env:
 - XCBPROTO_VERSION=xcb-proto-1.11
 - LIBXCB_VERSION=libxcb-1.11
 - LIBXSHMFENCE_VERSION=libxshmfence-1.2
+- FIXESPROTO_VERSION=fixesproto-5.0
+- LIBXFIXES_VERSION=libXfixes-5.0.3
+- DAMAGEPROTO_VERSION=damageproto-1.2.1
+- LIBXDAMAGE_VERSION=libXdamage-1.1.4
 - LIBTXC_DXTN_VERSION=libtxc_dxtn-1.0.1
 - LIBVDPAU_VERSION=libvdpau-1.1
 - LIBVA_VERSION=libva-1.6.2
@@ -317,6 +321,22 @@ install:
   - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
   - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make 
install)
 
+  - wget 
https://xorg.freedesktop.org/releases/individual/proto/$FIXESPROTO_VERSION.tar.bz2
+  - tar -jxvf $FIXESPROTO_VERSION.tar.bz2
+  - (cd $FIXESPROTO_VERSION && ./configure --prefix=$HOME/prefix && make 
install)
+
+  - wget 
https://xorg.freedesktop.org/releases/individual/lib/$LIBXFIXES_VERSION.tar.bz2
+  - tar -jxvf $FIXESPROTO_VERSION.tar.bz2
+  - (cd $FIXESPROTO_VERSION && ./configure --prefix=$HOME/prefix && make 
install)
+
+  - wget 
https://xorg.freedesktop.org/releases/individual/proto/$DAMAGEPROTO_VERSION.tar.bz2
+  - tar -jxvf $DAMAGEPROTO_VERSION.tar.bz2
+  - (cd $DAMAGEPROTO_VERSION && ./configure --prefix=$HOME/prefix && make 
install)
+
+  - wget 
https://xorg.freedesktop.org/releases/individual/lib/$LIBXDAMAGE_VERSION.tar.bz2
+  - tar -jxvf $LIBXDAMAGE_VERSION.tar.bz2
+  - (cd $LIBXDAMAGE_VERSION && ./configure --prefix=$HOME/prefix && make 
install)
+
   # libtxc-dxtn uses the patented S3 Texture Compression
   # algorithm. Therefore, we don't want to use this library but it is
   # still possible through setting the USE_TXC_DXTN variable to yes in
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: do not reset prog->TransformFeedback.BufferStride

2017-06-21 Thread Timothy Arceri



On 22/06/17 02:41, Juan A. Suarez Romero wrote:

On Wed, 2017-06-21 at 20:24 +1000, Timothy Arceri wrote:

On 21/06/17 18:13, Juan A. Suarez Romero wrote:

link_xfb_stride_layout_qualifiers() can be called multiple times, and
each time we call prog->TransformFeedback.BufferStride is reset to 0.

Thus it is loosing the values set in previous call.

Do not perform such reset.

Fixes:
KHR-GL45.enhanced_layouts.xfb_stride_of_empty_list
KHR-GL45.enhanced_layouts.xfb_stride_of_empty_list_and_api

Signed-off-by: Juan A. Suarez Romero 
---
   src/compiler/glsl/linker.cpp | 4 
   1 file changed, 4 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index adfa3b7b1d..1fe0ccc496 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -1623,10 +1623,6 @@ link_xfb_stride_layout_qualifiers(struct gl_context *ctx,
 struct gl_shader **shader_list,
 unsigned num_shaders)
   {
-   for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
-  prog->TransformFeedback.BufferStride[i] = 0;
-   }


I think this probably needs to be moved rather than just deleted. I
think the idea is to reset things to 0 in case we are re-linking an
existing program with different shaders attached.



Checking how this was done prior the refactoring in 4d65f68 (mesa/glsl:
move TransformFeedbackBufferStride to gl_shader), actually the reset
should be done if the new shaders provide an explicit xfb_stride.
Otherwise we should kept the old value.



No, that's not how it worked. The related change in 4d65f68 was:

for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
-  linked_shader->info.TransformFeedback.BufferStride[i] = 0;
+  prog->TransformFeedback.BufferStride[i] = 0;
}

There was no check previously we just reset everything.

The problem is that previously each stage had its own copy of a 
BufferStride array. There was no need for this as xfb only applies to 
the last stage in the pipeline before the fragment shader.


4d65f68 changed it so that we share a single BufferStride array across 
the program instead, this allowed simplifications elsewhere and reduced 
the amount of data needed for shader cache. I assume the issue is that 
if we have a fragment shader attached then the array would get 
incorrectly reset.


Please see my reply to v2 for a suggested fix. Please let me know if 
that doesn't fix it.




I'm sending a new version.



-
  for (unsigned i = 0; i < num_shaders; i++) {
 struct gl_shader *shader = shader_list[i];
   





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: reset BufferStride with explicit xfb_stride

2017-06-21 Thread Timothy Arceri

On 22/06/17 02:41, Juan A. Suarez Romero wrote:

link_xfb_stride_layout_qualifiers() can be called multiple times, and
each time we call prog->TransformFeedback.BufferStride is reset to 0.

But we should only reset it if the shaders provide an explicit stride.

Fixes:
KHR-GL45.enhanced_layouts.xfb_stride_of_empty_list
KHR-GL45.enhanced_layouts.xfb_stride_of_empty_list_and_api

v2: do reset only if shaders provide an explicit stride

Signed-off-by: Juan A. Suarez Romero 
---
  src/compiler/glsl/linker.cpp | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index adfa3b7b1d..5a261a64f5 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -1623,8 +1623,14 @@ link_xfb_stride_layout_qualifiers(struct gl_context *ctx,
struct gl_shader **shader_list,
unsigned num_shaders)
  {
-   for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
-  prog->TransformFeedback.BufferStride[i] = 0;
+   for (unsigned s = 0; s < num_shaders; s++) {
+  struct gl_shader *shader = shader_list[s];
+
+  for (unsigned i = 0; i < MAX_FEEDBACK_BUFFERS; i++) {
+ if (shader->TransformFeedbackBufferStride[i]) { > +
prog->TransformFeedback.BufferStride[i] = 0;
+ }
+  }
 }
  
 for (unsigned i = 0; i < num_shaders; i++) {




There should be no reason to a this extra loop/check. You will end up 
looping over the buffer in every stage for every shader source attached 
to the program in that stage.


The only stage xfb applies to is the last stage in the pipeline before 
the fragment shader. So you should be able to just do:


   if (linked->Stage != MESA_SHADER_FRAGMENT) {
  link_xfb_stride_layout_qualifiers(ctx, prog, linked, shader_list,
num_shaders);
   }

The stride will get reset for earlier stages but that shouldn't matter 
because the last stage before the frag shader will be the last to set it 
and that should be all we care about.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Matt Turner
On Wed, Jun 21, 2017 at 3:50 PM, Mike Lothian  wrote:
> Do intel run mesa through any of their test boxes like they do with kernel
> patches?

Don't top quote.

Yes, we have a CI system that we use extensively and has massively
reduced the number of regressions we have. Immense thanks to Mark
Janes for all of the work he's done on it.

In this instance, the CI didn't catch it because it tests debug builds
(as you'd expect), so the assert and the function it called were
executed. In release builds, however, the assert and that necessary
function call were removed.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] GL_OES_required_internalformat

2017-06-21 Thread Eric Anholt
Tapani Pälli  writes:

> On 06/14/2017 01:12 AM, Eric Anholt wrote:
>> Tapani Pälli  writes:
>>
>>> On 06/12/2017 09:52 AM, Tapani Pälli wrote:

 On 05/18/2017 09:39 PM, Eric Anholt wrote:
> Eric Anholt  writes:
>
>> This series came out of fixing dEQP failures on vc4's GLES2 context.
>> Mesa was allowing RGB565 textures, which is only valid with
>> GL_OES_required_internalformat.  Rather than disable RGB565, I decided
>> the extension was easy enough to support.
>>
>> I've sent one piglit test for renderbuffer sizing, and dEQP has tests
>> for whether enums get accepted for TexImage.
>>
>> There's a functional question in patch #2, see the comment there, and
>> there's a question of whether the extension should be dummy_true in
>> patch #5.
>>
>> branch: https://github.com/anholt/mesa/commits/required-internalformat
> I would still love review on this series.
>
 Earlier I took a brief look at series and run through our CI, there were
 many failing tests but t seems I forgot to reply/report .. I'll run it
 again and let you know what's the result.

>>> '36 failures' (many likely duplicates of some same issue), these ones ..
>>>
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.bdwm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pbo.bdwm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pixelstore.hswm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.sklm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pbo.sklm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.bdwm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pixelstore.bdwm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels.hswm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pixelstore.sklm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.color0.rgb10.hswm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.hswm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.hswm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels_pbo.hswm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.color0.rgb10.bdwm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels.bdwm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.sklm64
>>> ES3-CTS.functional.fbo.completeness.renderable.texture.color0.rgb10.sklm64
>>> ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels.sklm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.bdwm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.sklm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.bdwm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.depth.rgb10.hswm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.hswm64
>>> ES2-CTS.functional.fbo.completeness.renderable.texture.stencil.rgb10.sklm64
>>>
>>> piglit.spec.oes_texture_float.oes_texture_float half.g965m64
>>> piglit.spec.oes_texture_float.oes_texture_float.g965m64
>>> piglit.spec.oes_texture_float.oes_texture_float half linear.g965m64
>>> piglit.spec.oes_texture_float.oes_texture_float linear.g965m64
>>> piglit.spec.oes_texture_float.oes_texture_float half.g45m64
>>> piglit.spec.oes_texture_float.oes_texture_float half linear.g45m64
>>> piglit.spec.oes_texture_float.oes_texture_float linear.g45m64
>>> piglit.spec.oes_texture_float.oes_texture_float.g45m64
>>> piglit.spec.oes_texture_float.oes_texture_float.ilkm64
>>> piglit.spec.oes_texture_float.oes_texture_float half linear.ilkm64
>>> piglit.spec.oes_texture_float.oes_texture_float half.ilkm64
>>> piglit.spec.oes_texture_float.oes_texture_float linear.ilkm64
>>>
>>> Are you able to reproduce/run these tests on some machine?
>> I have a SKL desktop, so I reproduced the GLES2 rgb10 failure and fixed
>> it, and I think I've fixed the pre-snb failures in piglit.  New branch
>> up at https://github.com/anholt/mesa/commits/required-internalformat
>> which I'll piglit now.
>
>
> OK, seems the packed_pixels ones are still failing. I'll try to debug 
> this a bit to see what's going on.

Have you had a chance to look at this at all?  Or could you give me a
command line for reproducing failure?  I've gone through my VK-GL-CTS
tree and DEQP trees trying various test runners with various manglings
of the names, with no luck.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Mike Lothian
Do intel run mesa through any of their test boxes like they do with kernel
patches?

On Wed, 21 Jun 2017 at 19:23 Jason Ekstrand  wrote:

> On Wed, Jun 21, 2017 at 11:20 AM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
>
>> On Wed, Jun 21, 2017 at 11:16:39AM -0700, Jason Ekstrand wrote:
>> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101538
>> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101539
>>
>> I guess also:
>>
>> https://bugs.freedesktop.org/show_bug.cgi?id=101535
>>
>> Thanks for the quick fix!!
>>
>
> No problem.  I've landed it now so peoples desktops can start working
> again.
>
>
>> Reviewed-by: Topi Pohjolainen 
>>
>> > Cc: Topi Pohjolainen 
>> > ---
>> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 ++
>> >  1 file changed, 6 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > index abc7f98..3b7262f 100644
>> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> > @@ -1672,8 +1672,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>> >  * calculate equivalent MCS surface against it.
>> >  */
>> > intel_miptree_get_isl_surf(brw, mt, _main_surf);
>> > -   assert(isl_surf_get_mcs_surf(>isl_dev, _main_surf,
>> > -_mcs_surf));
>> > +   MAYBE_UNUSED bool ok =
>> > +  isl_surf_get_mcs_surf(>isl_dev, _main_surf,
>> _mcs_surf);
>> > +   assert(ok);
>> >
>> > /* Buffer needs to be initialised requiring the buffer to be
>> immediately
>> >  * mapped to cpu space for writing. Therefore do not use the gpu
>> access
>> > @@ -1832,8 +1833,9 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>> > struct isl_surf temp_hiz_surf;
>> >
>> > intel_miptree_get_isl_surf(brw, mt, _main_surf);
>> > -   assert(isl_surf_get_hiz_surf(>isl_dev, _main_surf,
>> > -_hiz_surf));
>> > +   MAYBE_UNUSED bool ok =
>> > +  isl_surf_get_hiz_surf(>isl_dev, _main_surf,
>> _hiz_surf);
>> > +   assert(ok);
>> >
>> > const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
>> > mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
>> > --
>> > 2.5.0.400.gff86faf
>> >
>> > ___
>> > mesa-dev mailing list
>> > mesa-dev@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radv: Rename winsys enums.

2017-06-21 Thread Dave Airlie
Oops missed these,

Reviewed-by: Dave Airlie 

On 11 June 2017 at 08:02, Bas Nieuwenhuizen  wrote:
> Don't rename the enums and constants used for metadata.
>
> Signed-off-by: Bas Nieuwenhuizen 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  |  4 +--
>  src/amd/vulkan/radv_descriptor_set.c  |  2 +-
>  src/amd/vulkan/radv_device.c  | 48 
> +--
>  src/amd/vulkan/radv_image.c   |  2 +-
>  src/amd/vulkan/radv_pipeline.c|  2 +-
>  src/amd/vulkan/radv_pipeline_cache.c  |  2 +-
>  src/amd/vulkan/radv_query.c   |  2 +-
>  src/amd/vulkan/radv_winsys.h  | 28 ++--
>  src/amd/vulkan/si_cmd_buffer.c|  4 +--
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 22 ++--
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h |  2 +-
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 14 
>  12 files changed, 63 insertions(+), 69 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index b57ce9fd1de..0a82bf08ec6 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -258,8 +258,8 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer 
> *cmd_buffer,
>
> bo = device->ws->buffer_create(device->ws,
>new_size, 4096,
> -  RADEON_DOMAIN_GTT,
> -  RADEON_FLAG_CPU_ACCESS);
> +  RADV_DOMAIN_GTT,
> +  RADV_FLAG_CPU_ACCESS);
>
> if (!bo) {
> cmd_buffer->record_fail = true;
> diff --git a/src/amd/vulkan/radv_descriptor_set.c 
> b/src/amd/vulkan/radv_descriptor_set.c
> index 3ea4936bfae..8d2623acd1b 100644
> --- a/src/amd/vulkan/radv_descriptor_set.c
> +++ b/src/amd/vulkan/radv_descriptor_set.c
> @@ -427,7 +427,7 @@ VkResult radv_CreateDescriptorPool(
>
> if (bo_size) {
> pool->bo = device->ws->buffer_create(device->ws, bo_size,
> -   32, 
> RADEON_DOMAIN_VRAM, 0);
> +   32, RADV_DOMAIN_VRAM, 
> 0);
> pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
> }
> pool->size = bo_size;
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 52de47f4bdc..63634e0db3d 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1126,7 +1126,7 @@ VkResult radv_CreateDevice(
>
> if (getenv("RADV_TRACE_FILE")) {
> device->trace_bo = device->ws->buffer_create(device->ws, 
> 4096, 8,
> -
> RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
> +
> RADV_DOMAIN_VRAM, RADV_FLAG_CPU_ACCESS);
> if (!device->trace_bo)
> goto fail;
>
> @@ -1550,8 +1550,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
> scratch_bo = 
> queue->device->ws->buffer_create(queue->device->ws,
>   scratch_size,
>   4096,
> - 
> RADEON_DOMAIN_VRAM,
> - 
> RADEON_FLAG_NO_CPU_ACCESS);
> + 
> RADV_DOMAIN_VRAM,
> + 
> RADV_FLAG_NO_CPU_ACCESS);
> if (!scratch_bo)
> goto fail;
> } else
> @@ -1561,8 +1561,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
> compute_scratch_bo = 
> queue->device->ws->buffer_create(queue->device->ws,
>   
> compute_scratch_size,
>   4096,
> - 
> RADEON_DOMAIN_VRAM,
> - 
> RADEON_FLAG_NO_CPU_ACCESS);
> + 
> RADV_DOMAIN_VRAM,
> + 
> RADV_FLAG_NO_CPU_ACCESS);
> if (!compute_scratch_bo)
> goto fail;
>
> @@ -1573,8 +1573,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
> esgs_ring_bo = 
> queue->device->ws->buffer_create(queue->device->ws,
> 
> esgs_ring_size,
>   

Re: [Mesa-dev] [PATCH] i915: Fix gl_Fragcoord interpolation

2017-06-21 Thread Ian Romanick
On 06/21/2017 10:38 AM, ville.syrj...@linux.intel.com wrote:
> From: Ville Syrjälä 
> 
> gl_FragCoord contains the window coordinates so it seems to me that
> we should not use perspective correct interpolation for it. At least
> now I get similar output as i965/swrast/llvmpipe produce.
> 
> This fixes dEQP-GLES2.functional.shaders.builtin_variable.fragcoord_w.
> dEQP-GLES2.functional.shaders.builtin_variable.fragcoord_xyz was already
> passing, though I'm not quite sure how it managed to do that.

I suspect all the vertices had the same wrong w value, so the
interpolation just worked out.

> Signed-off-by: Ville Syrjälä 

One tiny comment below, but also

Reviewed-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org

> ---
>  src/mesa/drivers/dri/i915/i915_context.h  | 13 +++--
>  src/mesa/drivers/dri/i915/i915_fragprog.c |  4 
>  src/mesa/drivers/dri/i915/i915_state.c|  7 ---
>  src/mesa/drivers/dri/i915/i915_vtbl.c |  7 +--
>  src/mesa/drivers/dri/i915/intel_reg.h |  3 ++-
>  5 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i915/i915_context.h 
> b/src/mesa/drivers/dri/i915/i915_context.h
> index f95b531a413e..4e68d1193ca6 100644
> --- a/src/mesa/drivers/dri/i915/i915_context.h
> +++ b/src/mesa/drivers/dri/i915/i915_context.h
> @@ -79,12 +79,13 @@
>  #define I915_CTXREG_STATE4   0
>  #define I915_CTXREG_LI   1
>  #define I915_CTXREG_LIS2 2
> -#define I915_CTXREG_LIS4 3
> -#define I915_CTXREG_LIS5 4
> -#define I915_CTXREG_LIS6 5
> -#define I915_CTXREG_BF_STENCIL_OPS   6
> -#define I915_CTXREG_BF_STENCIL_MASKS 7
> -#define I915_CTX_SETUP_SIZE  8
> +#define I915_CTXREG_LIS3 3
> +#define I915_CTXREG_LIS4 4
> +#define I915_CTXREG_LIS5 5
> +#define I915_CTXREG_LIS6 6
> +#define I915_CTXREG_BF_STENCIL_OPS   7
> +#define I915_CTXREG_BF_STENCIL_MASKS 8
> +#define I915_CTX_SETUP_SIZE  9
>  
>  #define I915_BLENDREG_IAB0
>  #define I915_BLENDREG_BLENDCOLOR01
> diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c 
> b/src/mesa/drivers/dri/i915/i915_fragprog.c
> index 3657b2d82565..2e0431951217 100644
> --- a/src/mesa/drivers/dri/i915/i915_fragprog.c
> +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
> @@ -1238,6 +1238,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
> const GLbitfield64 inputsRead = p->FragProg.info.inputs_read;
> GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
> GLuint s2 = S2_TEXCOORD_NONE;
> +   GLuint s3 = 0;
> int i, offset = 0;
>  
> /* Important:
> @@ -1301,6 +1302,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
>*/
>   s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
>   s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
> + s3 |= S3_TEXCOORD_PERSPECTIVE_DISABLE(i);
>  
>   intel->wpos_offset = offset;
>   EMIT_PAD(wpos_size);
> @@ -1308,6 +1310,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
> }
>  
> if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
> +   s3 != i915->state.Ctx[I915_CTXREG_LIS3] ||
> s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
>I915_STATECHANGE(i915, I915_UPLOAD_CTX);
>  
> @@ -1326,6 +1329,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
>intel->vertex_size >>= 2;
>  
>i915->state.Ctx[I915_CTXREG_LIS2] = s2;
> +  i915->state.Ctx[I915_CTXREG_LIS3] = s3;
>i915->state.Ctx[I915_CTXREG_LIS4] = s4;
>  
>assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
> diff --git a/src/mesa/drivers/dri/i915/i915_state.c 
> b/src/mesa/drivers/dri/i915/i915_state.c
> index 715db1fffa3d..4c4d95c420a1 100644
> --- a/src/mesa/drivers/dri/i915/i915_state.c
> +++ b/src/mesa/drivers/dri/i915/i915_state.c
> @@ -925,11 +925,12 @@ i915_init_packets(struct i915_context *i915)
> * piece changes.
> */
>i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
> - I1_LOAD_S(2) |
> - I1_LOAD_S(4) |
> - I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
> + I1_LOAD_S(2) | I1_LOAD_S(3) |
> + I1_LOAD_S(4) | I1_LOAD_S(5) |
> + I1_LOAD_S(6) | (4));
>i915->state.Ctx[I915_CTXREG_LIS2] = 0;
>i915->state.Ctx[I915_CTXREG_LIS4] = 0;
> +  i915->state.Ctx[I915_CTXREG_LIS3] = 0;
>i915->state.Ctx[I915_CTXREG_LIS5] = 0;
>  
>if (i915->intel.ctx.Visual.rgbBits == 16)
> diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c 
> b/src/mesa/drivers/dri/i915/i915_vtbl.c
> index c41cd37bcc23..6a0a121856d7 100644
> --- 

Re: [Mesa-dev] [PATCH 3/3] glsl: relax loop unrolling restrictions

2017-06-21 Thread Timothy Arceri

On 22/06/17 01:42, Eero Tamminen wrote:

Hi,

On 21.06.2017 13:12, Timothy Arceri wrote:

The main reason these restriction exist is because glsl the loop
unrolling pass is super slow with large loops.

be5f27a84d0d fixed things so that expression trees were counted
against the limit, however it left the limit as
max_iterations * 5 which is actually fine for most shaders but
probably over conservative.

This change relaxes the limit to allow more loops to unroll in the
Unigine Superposition benchmark.

Results from Unigine Superposition @ 1920x1080 - High - Fullscreen
On radeonsi (RX480)

Before:

Average: 28.20 Frames Per Second

After:

Average: 28.60 Frames Per Second
---

 The increase is small but it seems to be consistent, I would be
 interested in the results if others were interested in testing it.


Did testing on few different Intel machines with larger set of 
benchmarks (older than Superposition).


Didn't see any statistically significant performance changes in either 
direction (due to large number of tests, can't run many rounds so 
variance is fairly high).


Hi,

I should have mentioned that this won't change anything for drivers that 
use NIR for loop unrolling such as i965.





Another change you could consider for larger loops is partial unrolling. 
  Unrolling several rounds, and then looping that.


The last time I checked with shader-db the nir unrolling pass unrolled 
everything without hitting the equivalent limit, although someone might 
want to check Superposition.


The limit is mainly in place due to the GLSL IR pass being slow, this is 
something that is not an issue with the NIR pass so it could be relaxed 
without much issue if needed.


Thanks,
Tim




 - Eero


 src/compiler/glsl/loop_unroll.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/loop_unroll.cpp 
b/src/compiler/glsl/loop_unroll.cpp

index bc377df..64ebf0f 100644
--- a/src/compiler/glsl/loop_unroll.cpp
+++ b/src/compiler/glsl/loop_unroll.cpp
@@ -350,21 +350,21 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
/* Don't try to unroll loops that have zillions of iterations either.
 */
if (iterations > max_iterations)
   return visit_continue;

/* Don't try to unroll nested loops and loops with a huge body.
 */
loop_unroll_count count(>body_instructions, ls, options);

bool loop_too_large =
-  count.nested_loop || count.nodes * iterations > max_iterations 
* 5;
+  count.nested_loop || count.nodes * iterations > max_iterations 
* 10;


if (loop_too_large && !count.unsupported_variable_indexing &&
!count.array_indexed_by_induction_var_with_exact_iterations)
   return visit_continue;

/* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
 * We'll be removing the limiting terminator before we unroll.
 */
assert(ls->num_loop_jumps > 0);
unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Use isl-based miptree also for stencil rbs

2017-06-21 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Wed, Jun 21, 2017 at 12:35 PM, Topi Pohjolainen <
topi.pohjolai...@gmail.com> wrote:

> Fixes dEQP-EGL.functional.image.render_multiple_contexts.
> gles2_renderbuffer_stencil_stencil_buffer
>
> CC: Mark Janes 
> CC: Jason Ekstrand 
> CC: Kenneth Graunke 
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ---
>  1 file changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index abc7f989db..69b02ead78 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -897,7 +897,22 @@ intel_miptree_create_for_bo(struct brw_context *brw,
>  {
> struct intel_mipmap_tree *mt;
> uint32_t tiling, swizzle;
> -   GLenum target;
> +   const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
> +
> +   if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8) {
> +  mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
> +0, 0, width, height, depth, 1, ISL_TILING_W,
> +ISL_SURF_USAGE_STENCIL_BIT |
> +ISL_SURF_USAGE_TEXTURE_BIT,
> +BO_ALLOC_FOR_RENDER, bo);
> +  if (!mt)
> + return NULL;
> +
> +  assert(bo->size >= mt->surf.size);
> +
> +  brw_bo_reference(bo);
> +  return mt;
> +   }
>
> brw_bo_get_tiling(bo, , );
>
> @@ -912,8 +927,6 @@ intel_miptree_create_for_bo(struct brw_context *brw,
>  */
> assert(pitch >= 0);
>
> -   target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
> -
> /* The BO already has a tiling format and we shouldn't confuse the
> lower
>  * layers by making it try to find a tiling format again.
>  */
> --
> 2.11.0
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] egl/android: Rename var in droid_add_configs_for_visuals()

2017-06-21 Thread Eric Engestrom
On Friday, 2017-06-16 19:37:48 -0700, Chad Versace wrote:
> Rename 'config' to 'config_count'. I didn't understand what the variable

I think you mean "Rename 'count' to 'config_count'" :)

Agreed with Emil about making the same change across all platforms (one
of these days I'll dedup those, although I kinda hope someone will beat
me to it :)
Fair warning though, this will conflict will a patch of mine; the
slowest one of us will have to resolve it :P

> did until I untangled the for-loops. Now the next person won't have that
> problem.
> ---
>  src/egl/drivers/dri2/platform_android.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/src/egl/drivers/dri2/platform_android.c 
> b/src/egl/drivers/dri2/platform_android.c
> index f309fcea11f..9dc2d831b49 100644
> --- a/src/egl/drivers/dri2/platform_android.c
> +++ b/src/egl/drivers/dri2/platform_android.c
> @@ -1036,7 +1036,7 @@ droid_add_configs_for_visuals(_EGLDriver *drv, 
> _EGLDisplay *dpy)
> };
>  
> unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
> -   int count = 0;
> +   int config_count = 0;
>  
> for (int i = 0; dri2_dpy->driver_configs[i]; i++) {
>const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
> @@ -1051,11 +1051,11 @@ droid_add_configs_for_visuals(_EGLDriver *drv, 
> _EGLDisplay *dpy)
>   };
>  
>   struct dri2_egl_config *dri2_conf =
> - dri2_add_config(dpy, dri2_dpy->driver_configs[i], count + 1,
> - surface_type, config_attrs,
> + dri2_add_config(dpy, dri2_dpy->driver_configs[i],
> + config_count + 1, surface_type, config_attrs,
>   visuals[j].rgba_masks);
>   if (dri2_conf) {
> -count++;
> +config_count++;
>  format_count[j]++;
>   }
>}
> @@ -1068,7 +1068,7 @@ droid_add_configs_for_visuals(_EGLDriver *drv, 
> _EGLDisplay *dpy)
>}
> }
>  
> -   return (count != 0);
> +   return (config_count != 0);
>  }
>  
>  static int
> -- 
> 2.13.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] egl/android: Change order of EGLConfig generation

2017-06-21 Thread Eric Engestrom
On Friday, 2017-06-16 19:37:50 -0700, Chad Versace wrote:
> Many Android apps (such as Google's official NDK GLES2 example app), and
> even portions the core framework code (such as SystemServiceManager in
> Nougat), incorrectly choose their EGLConfig.  They neglect to match the
> EGLConfig's EGL_NATIVE_VISUAL_ID against the window's native format, and
> instead choose the first EGLConfig whose channel sizes match those of
> the native window format while ignoring the channel *ordering*.
> 
> We can detect such buggy clients in logcat when they call
> eglCreateSurface, by detecting the mismatch between the EGLConfig's
> format and the window's format.
> 
> As a workaround, this patch changes the order of EGLConfig generation
> such that all EGLConfigs for HAL pixel format i precede those for HAL
> pixel format i+1. In my (chadversary) testing on Android Nougat, this
> was good enough to pacify the buggy clients.

I had to double-check with the spec, but this is indeed allowed:
  > EGL_NATIVE_VISUAL_TYPE (the actual sort order is
  > implementation-defined, depending on the meaning of
  > native visual types).

Series is:
Reviewed-by: Eric Engestrom 

> ---
>  src/egl/drivers/dri2/platform_android.c | 32 +---
>  1 file changed, 25 insertions(+), 7 deletions(-)
> 
> diff --git a/src/egl/drivers/dri2/platform_android.c 
> b/src/egl/drivers/dri2/platform_android.c
> index fcf29bce713..c294691f291 100644
> --- a/src/egl/drivers/dri2/platform_android.c
> +++ b/src/egl/drivers/dri2/platform_android.c
> @@ -1039,23 +1039,41 @@ droid_add_configs_for_visuals(_EGLDriver *drv, 
> _EGLDisplay *dpy)
> unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
> int config_count = 0;
>  
> -   for (int i = 0; dri2_dpy->driver_configs[i]; i++) {
> -  for (int j = 0; j < ARRAY_SIZE(visuals); j++) {
> +   /* The nesting of loops is significant here. Also significant is the order
> +* of the HAL pixel formats. Many Android apps (such as Google's official
> +* NDK GLES2 example app), and even portions the core framework code (such
> +* as SystemServiceManager in Nougat), incorrectly choose their EGLConfig.
> +* They neglect to match the EGLConfig's EGL_NATIVE_VISUAL_ID against the
> +* window's native format, and instead choose the first EGLConfig whose
> +* channel sizes match those of the native window format while ignoring 
> the
> +* channel *ordering*.
> +*
> +* We can detect such buggy clients in logcat when they call
> +* eglCreateSurface, by detecting the mismatch between the EGLConfig's
> +* format and the window's format.
> +*
> +* As a workaround, we generate EGLConfigs such that all EGLConfigs for 
> HAL
> +* pixel format i precede those for HAL pixel format i+1. In my
> +* (chadversary) testing on Android Nougat, this was good enough to pacify
> +* the buggy clients.
> +*/
> +   for (int i = 0; i < ARRAY_SIZE(visuals); i++) {
> +  for (int j = 0; dri2_dpy->driver_configs[j]; j++) {
>   const EGLint config_attrs[] = {
> -   EGL_NATIVE_VISUAL_ID,   visuals[j].format,
> -   EGL_NATIVE_VISUAL_TYPE, visuals[j].format,
> +   EGL_NATIVE_VISUAL_ID,   visuals[i].format,
> +   EGL_NATIVE_VISUAL_TYPE, visuals[i].format,
> EGL_FRAMEBUFFER_TARGET_ANDROID, EGL_TRUE,
> EGL_RECORDABLE_ANDROID, EGL_TRUE,
> EGL_NONE
>   };
>  
>   struct dri2_egl_config *dri2_conf =
> - dri2_add_config(dpy, dri2_dpy->driver_configs[i],
> + dri2_add_config(dpy, dri2_dpy->driver_configs[j],
>   config_count + 1, surface_type, config_attrs,
> - visuals[j].rgba_masks);
> + visuals[i].rgba_masks);
>   if (dri2_conf) {
>  config_count++;
> -format_count[j]++;
> +format_count[i]++;
>   }
>}
> }
> -- 
> 2.13.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling

2017-06-21 Thread Chad Versace
On Wed 21 Jun 2017, Jason Ekstrand wrote:
> On Wed, Jun 21, 2017 at 12:33 PM, Chad Versace <[1]chadvers...@chromium.org>
> wrote:
> 
> On Fri 16 Jun 2017, Jason Ekstrand wrote:
> > This commit replaces the complex and confusing set of disable flags with
> > two fairly straightforward fields which describe the intended auxiliary
> > surface usage and whether or not the miptree supports fast clears.
> > Right now, supports_fast_clear can be entirely derived from aux_usage
> > but that will not always be the case.
> >
> > This commit makes functional changes.  One of these changes is that it
> > re-enables multisampled fast-clears which were accidentally disabled in
> > cec30a666930ddb8476a9452a89364a24979ff62 around a year ago.  It should
> > also enable CCS_E for window-system buffers which are Y-tiled.  They
> > will still get a full resolve like CCS_D but we will at least get some
> > of the advantage of compression.
> > ---
> >  src/mesa/drivers/dri/i965/brw_blorp.c         |   4 +-
> >  src/mesa/drivers/dri/i965/intel_fbo.c         |   2 +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190
> +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  43 +++---
> >  4 files changed, 120 insertions(+), 119 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri
> /i965/brw_blorp.c
> > index 00092ee..9bd25f0 100644
> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> > @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, 
> struct
> gl_framebuffer *fb,
> >     if (set_write_disables(irb, ctx->Color.ColorMask[buf],
> color_write_disable))
> >        can_fast_clear = false;
> >
> > -   if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
> > +   if (!irb->mt->supports_fast_clear ||
> >         !brw_is_color_fast_clear_compatible(brw, irb->mt, >
> Color.ClearColor))
> >        can_fast_clear = false;
> >
> > @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, 
> struct
> gl_framebuffer *fb,
> >         */
> >        if (!irb->mt->mcs_buf) {
> >           assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
> > -         if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
> > +         if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
> >              /* MCS allocation failed--probably this will only happen in
> >               * out-of-memory conditions.  But in any case, try to
> recover
> >               * by falling back to a non-blorp clear technique.
> > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> b/src/mesa/drivers/dri
> /i965/intel_fbo.c
> > index ee4aba9..6a64bcb 100644
> > --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> > @@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct brw_context
> *brw,
> >
> >     intel_renderbuffer_set_draw_offset(irb);
> >
> > -   if (intel_miptree_wants_hiz_buffer(brw, mt)) {
> > +   if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) {
> >        intel_miptree_alloc_hiz(brw, mt);
> >        if (!mt->hiz_buf)
> >        return false;
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/
> drivers/dri/i965/intel_mipmap_tree.c
> > index 0f6d542..101317f 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> >   */
> >  static enum intel_msaa_layout
> >  compute_msaa_layout(struct brw_context *brw, mesa_format format,
> > -                    enum intel_aux_disable aux_disable)
> > +                    uint32_t layout_flags)
> >  {
> >     /* Prior to Gen7, all MSAA surfaces used IMS layout. */
> >     if (brw->gen < 7)
> > @@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw,
> mesa_format format,
> >         */
> >        if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT)
> {
> >           return INTEL_MSAA_LAYOUT_UMS;
> > -      } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
> > +      } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
> >           /* We can't use the CMS layout because it uses an aux buffer,
> the MCS
> >            * buffer. So fallback to UMS, which is identical to CMS
> without the
> >            * MCS. */
> > @@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw,
> >     if (brw->gen < 7)
> >        return false;
> >
> > -   if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
> > -      return false;
> > -
> >     /* This function applies only to 

[Mesa-dev] [PATCH mesa v2] egl: properly count configs

2017-06-21 Thread Eric Engestrom
dri2_conf represents another config (which shouldn't be counted)
if it doesn't have the requested ID.

Reported-by: Liu Zhiquan 
Signed-off-by: Eric Engestrom 
---
v2: use original `count+1` comparison.
---
 src/egl/drivers/dri2/platform_android.c | 3 ++-
 src/egl/drivers/dri2/platform_drm.c | 3 ++-
 src/egl/drivers/dri2/platform_surfaceless.c | 3 ++-
 src/egl/drivers/dri2/platform_wayland.c | 3 ++-
 src/egl/drivers/dri2/platform_x11.c | 6 --
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_android.c 
b/src/egl/drivers/dri2/platform_android.c
index 5550f580a8..f560d8ab40 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -1061,7 +1061,8 @@ droid_add_configs_for_visuals(_EGLDriver *drv, 
_EGLDisplay *dpy)
  dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[i],
count + 1, surface_type, config_attrs, visuals[j].rgba_masks);
  if (dri2_conf) {
-count++;
+if (dri2_conf->base.ConfigID == count + 1)
+   count++;
 format_count[j]++;
  }
   }
diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index 8b0562c75d..9731d6ea19 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -631,7 +631,8 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay 
*disp)
  dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
count + 1, EGL_WINDOW_BIT, attr_list, NULL);
  if (dri2_conf) {
-count++;
+if (dri2_conf->base.ConfigID == count + 1)
+   count++;
 format_count[j]++;
  }
   }
diff --git a/src/egl/drivers/dri2/platform_surfaceless.c 
b/src/egl/drivers/dri2/platform_surfaceless.c
index 0eb3fb7505..e17002806b 100644
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -212,7 +212,8 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, 
_EGLDisplay *dpy)
count + 1, EGL_PBUFFER_BIT, NULL, visuals[j].rgba_masks);
 
  if (dri2_conf) {
-count++;
+if (dri2_conf->base.ConfigID == count + 1)
+   count++;
 format_count[j]++;
  }
   }
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index f746f0bfd1..bb1bb6067d 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1131,7 +1131,8 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, 
_EGLDisplay *disp)
  dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
count + 1, EGL_WINDOW_BIT, NULL, visuals[j].rgba_masks);
  if (dri2_conf) {
-count++;
+if (dri2_conf->base.ConfigID == count + 1)
+   count++;
 format_count[j]++;
  }
   }
diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index 74d3a164b6..a3ff33edeb 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -773,7 +773,8 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display 
*dri2_dpy,
 dri2_conf = dri2_add_config(disp, config, count + 1, surface_type,
 config_attrs, rgba_masks);
 if (dri2_conf)
-   count++;
+   if (dri2_conf->base.ConfigID == count + 1)
+  count++;
 
 /* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig.
  * Otherwise it will only match a 32-bit RGBA visual.  On a
@@ -788,7 +789,8 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display 
*dri2_dpy,
dri2_conf = dri2_add_config(disp, config, count + 1, 
surface_type,
config_attrs, rgba_masks);
if (dri2_conf)
-  count++;
+  if (dri2_conf->base.ConfigID == count + 1)
+ count++;
 }
 }
   }
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] drirc: Add glsl_correct_derivatives_after_discard for The Witcher 2

2017-06-21 Thread Edmondo Tommasina
This fixes the long-standing problem with black transitions in The Wicher 2.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98238
---
This patch depends on Marek's series:   
 
https://patchwork.freedesktop.org/series/26089/

 src/mesa/drivers/dri/common/drirc | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drirc 
b/src/mesa/drivers/dri/common/drirc
index 7d73b1218b..494d768312 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -155,6 +155,10 @@ TODO: document the other workarounds.
 
 
 
+
+
+
+
 
 
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] Gallium: Passing drirc options to create_screen() and fixing Rocket League

2017-06-21 Thread Rob Clark
On Tue, Jun 20, 2017 at 6:54 PM, Marek Olšák  wrote:
> Hi,
>
> This series updates pipe loaders so that flags such as drirc options
> can be passed to create_screen(). I have compile-tested everything
> except clover.
>
> The first pipe_screen flag is a drirc option to fix incorrect grass
> rendering in Rocket League for radeonsi. Rocket League expects DirectX
> behavior for partial derivative computations after discard/kill, but
> radeonsi implements the more efficient but stricter OpenGL behavior
> and that will remain our default behavior. The new screen flag forces
> radeonsi to use the DX behavior for that game.
>

do we really want this to be a *global* option for the screen?

I'm just thinking, some drivers use lowering passes that internally
generate kill's.  I *guess* it would only matter if they also had
ddx/ddy instructions, but not sure.

not really sure if this would actually be a problem or not..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] etnaviv: add R8G8 texture support

2017-06-21 Thread Christian Gmeiner
Passes texwrap GL_ARB_texture_rg piglit (with faked full texture rg support).

Signed-off-by: Christian Gmeiner 
---
 src/gallium/drivers/etnaviv/etnaviv_format.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
b/src/gallium/drivers/etnaviv/etnaviv_format.c
index 02b8d52..c9a8ce2 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -120,7 +120,7 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
_T(B5G5R5A1_UNORM, A1R5G5B5, SWIZ(X, Y, Z, W), A1R5G5B5),
_T(B5G5R5X1_UNORM, X1R5G5B5, SWIZ(X, Y, Z, W), X1R5G5B5),
 
-   V_(R8G8_UNORM,   UNSIGNED_BYTE,  NONE),
+   VT(R8G8_UNORM,   UNSIGNED_BYTE,  EXT_G8R8 | EXT_FORMAT, SWIZ(X, Y, 0, 1), 
NONE),
V_(R8G8_SNORM,   BYTE,   NONE),
V_(R8G8_UINT,UNSIGNED_BYTE,  NONE),
V_(R8G8_SINT,BYTE,   NONE),
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] etnaviv: add support for extended texture formats

2017-06-21 Thread Christian Gmeiner
Signed-off-by: Christian Gmeiner 
---
 src/gallium/drivers/etnaviv/etnaviv_format.c  | 2 +-
 src/gallium/drivers/etnaviv/etnaviv_format.h  | 1 +
 src/gallium/drivers/etnaviv/etnaviv_screen.c  | 3 +++
 src/gallium/drivers/etnaviv/etnaviv_texture.c | 8 +---
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
b/src/gallium/drivers/etnaviv/etnaviv_format.c
index 7c24386..e9cd104 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -231,7 +231,7 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
 uint32_t
 translate_texture_format(enum pipe_format fmt)
 {
-   /* XXX with TEXTURE_FORMAT_EXT and swizzle on newer chips we can
+   /* XXX with swizzle on newer chips we can
 * support much more */
if (!formats[fmt].present)
   return ETNA_NO_MATCH;
diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.h 
b/src/gallium/drivers/etnaviv/etnaviv_format.h
index 549dfda..1170d79 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.h
@@ -31,6 +31,7 @@
 #include 
 
 #define ETNA_NO_MATCH (~0)
+#define EXT_FORMAT (1 << 31)
 
 uint32_t
 translate_texture_format(enum pipe_format fmt);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 6c0735e..a69aef0 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -467,6 +467,9 @@ gpu_supports_texure_format(struct etna_screen *screen, 
uint32_t fmt)
if (fmt >= TEXTURE_FORMAT_DXT1 && fmt <= TEXTURE_FORMAT_DXT4_DXT5)
   return VIV_FEATURE(screen, chipFeatures, DXT_TEXTURE_COMPRESSION);
 
+   if (fmt & EXT_FORMAT)
+  return VIV_FEATURE(screen, chipMinorFeatures1, HALTI0);
+
return true;
 }
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_texture.c 
b/src/gallium/drivers/etnaviv/etnaviv_texture.c
index df77829..f973bcb 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_texture.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_texture.c
@@ -161,6 +161,8 @@ etna_create_sampler_view(struct pipe_context *pctx, struct 
pipe_resource *prsc,
struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view);
struct etna_resource *res = etna_resource(prsc);
struct etna_context *ctx = etna_context(pctx);
+   const uint32_t format = translate_texture_format(so->format);
+   const bool ext = !!(format & EXT_FORMAT);
 
if (!sv)
   return NULL;
@@ -191,8 +193,7 @@ etna_create_sampler_view(struct pipe_context *pctx, struct 
pipe_resource *prsc,
sv->base.context = pctx;
 
/* merged with sampler state */
-   sv->TE_SAMPLER_CONFIG0 =
-  
VIVS_TE_SAMPLER_CONFIG0_FORMAT(translate_texture_format(sv->base.format));
+   sv->TE_SAMPLER_CONFIG0 = COND(!ext, VIVS_TE_SAMPLER_CONFIG0_FORMAT(format));
sv->TE_SAMPLER_CONFIG0_MASK = 0x;
 
switch (sv->base.target) {
@@ -215,7 +216,8 @@ etna_create_sampler_view(struct pipe_context *pctx, struct 
pipe_resource *prsc,
   return NULL;
}
 
-   sv->TE_SAMPLER_CONFIG1 = VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_R(so->swizzle_r) |
+   sv->TE_SAMPLER_CONFIG1 = COND(ext, 
VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) |
+VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_R(so->swizzle_r) |
 VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_G(so->swizzle_g) |
 VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_B(so->swizzle_b) |
 VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A(so->swizzle_a) |
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] etnaviv: add support for swizzled texture formats

2017-06-21 Thread Christian Gmeiner
Passes all ext_texture_swizzle piglits.

Signed-off-by: Christian Gmeiner 
---
 src/gallium/drivers/etnaviv/etnaviv_format.c  | 91 ++-
 src/gallium/drivers/etnaviv/etnaviv_format.h  |  4 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c  |  2 +-
 src/gallium/drivers/etnaviv/etnaviv_texture.c |  9 ++-
 4 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
b/src/gallium/drivers/etnaviv/etnaviv_format.c
index e9cd104..02b8d52 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -40,6 +40,7 @@ struct etna_format {
unsigned tex;
unsigned rs;
boolean present;
+   const unsigned char tex_swiz[4];
 };
 
 #define RS_FORMAT_NONE ~0
@@ -51,22 +52,31 @@ struct etna_format {
 #define RS_FORMAT_X8B8G8R8(RS_FORMAT_X8R8G8B8 | RS_FORMAT_RB_SWAP)
 #define RS_FORMAT_A8B8G8R8(RS_FORMAT_A8R8G8B8 | RS_FORMAT_RB_SWAP)
 
+#define SWIZ(x,y,z,w) {\
+   PIPE_SWIZZLE_##x,   \
+   PIPE_SWIZZLE_##y,   \
+   PIPE_SWIZZLE_##z,   \
+   PIPE_SWIZZLE_##w\
+}
+
 /* vertex + texture */
-#define VT(pipe, vtxfmt, texfmt, rsfmt)   \
+#define VT(pipe, vtxfmt, texfmt, texswiz, rsfmt)  \
[PIPE_FORMAT_##pipe] = {   \
   .vtx = VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_##vtxfmt, \
   .tex = TEXTURE_FORMAT_##texfmt, \
   .rs = RS_FORMAT_##rsfmt,\
   .present = 1,   \
+  .tex_swiz = texswiz,\
}
 
 /* texture-only */
-#define _T(pipe, fmt, rsfmt)   \
+#define _T(pipe, fmt, swiz, rsfmt) \
[PIPE_FORMAT_##pipe] = {\
   .vtx = ETNA_NO_MATCH,\
   .tex = TEXTURE_FORMAT_##fmt, \
   .rs = RS_FORMAT_##rsfmt, \
   .present = 1,\
+  .tex_swiz = swiz,\
}
 
 /* vertex-only */
@@ -87,9 +97,9 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
V_(R8_USCALED, UNSIGNED_BYTE, NONE),
V_(R8_SSCALED, BYTE,  NONE),
 
-   _T(A8_UNORM, A8, NONE),
-   _T(L8_UNORM, L8, NONE),
-   _T(I8_UNORM, I8, NONE),
+   _T(A8_UNORM, A8, SWIZ(X, Y, Z, W), NONE),
+   _T(L8_UNORM, L8, SWIZ(X, Y, Z, W), NONE),
+   _T(I8_UNORM, I8, SWIZ(X, Y, Z, W), NONE),
 
/* 16-bit */
V_(R16_UNORM,   UNSIGNED_SHORT, NONE),
@@ -100,15 +110,15 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
V_(R16_SSCALED, SHORT,  NONE),
V_(R16_FLOAT,   HALF_FLOAT, NONE),
 
-   _T(B4G4R4A4_UNORM, A4R4G4B4, A4R4G4B4),
-   _T(B4G4R4X4_UNORM, X4R4G4B4, X4R4G4B4),
+   _T(B4G4R4A4_UNORM, A4R4G4B4, SWIZ(X, Y, Z, W), A4R4G4B4),
+   _T(B4G4R4X4_UNORM, X4R4G4B4, SWIZ(X, Y, Z, W), X4R4G4B4),
 
-   _T(L8A8_UNORM, A8L8, NONE),
+   _T(L8A8_UNORM, A8L8, SWIZ(X, Y, Z, W), NONE),
 
-   _T(Z16_UNORM,  D16,  A4R4G4B4),
-   _T(B5G6R5_UNORM,   R5G6B5,   R5G6B5),
-   _T(B5G5R5A1_UNORM, A1R5G5B5, A1R5G5B5),
-   _T(B5G5R5X1_UNORM, X1R5G5B5, X1R5G5B5),
+   _T(Z16_UNORM,  D16,  SWIZ(X, Y, Z, W), A4R4G4B4),
+   _T(B5G6R5_UNORM,   R5G6B5,   SWIZ(X, Y, Z, W), R5G6B5),
+   _T(B5G5R5A1_UNORM, A1R5G5B5, SWIZ(X, Y, Z, W), A1R5G5B5),
+   _T(B5G5R5X1_UNORM, X1R5G5B5, SWIZ(X, Y, Z, W), X1R5G5B5),
 
V_(R8G8_UNORM,   UNSIGNED_BYTE,  NONE),
V_(R8G8_SNORM,   BYTE,   NONE),
@@ -147,25 +157,25 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
 
V_(R8G8B8A8_UNORM,   UNSIGNED_BYTE, A8B8G8R8),
V_(R8G8B8A8_SNORM,   BYTE,  A8B8G8R8),
-   _T(R8G8B8X8_UNORM,   X8B8G8R8,  X8B8G8R8),
+   _T(R8G8B8X8_UNORM,   X8B8G8R8,  SWIZ(X, Y, Z, W), X8B8G8R8),
V_(R8G8B8A8_UINT,UNSIGNED_BYTE, A8B8G8R8),
V_(R8G8B8A8_SINT,BYTE,  A8B8G8R8),
V_(R8G8B8A8_USCALED, UNSIGNED_BYTE, A8B8G8R8),
V_(R8G8B8A8_SSCALED, BYTE,  A8B8G8R8),
 
-   _T(R8G8B8A8_UNORM, A8B8G8R8, A8B8G8R8),
-   _T(R8G8B8X8_UNORM, X8B8G8R8, X8B8G8R8),
+   _T(R8G8B8A8_UNORM, A8B8G8R8, SWIZ(X, Y, Z, W), A8B8G8R8),
+   _T(R8G8B8X8_UNORM, X8B8G8R8, SWIZ(X, Y, Z, W), X8B8G8R8),
 
-   _T(B8G8R8A8_UNORM, A8R8G8B8, A8R8G8B8),
-   _T(B8G8R8X8_UNORM, X8R8G8B8, X8R8G8B8),
+   _T(B8G8R8A8_UNORM, A8R8G8B8, SWIZ(X, Y, Z, W), A8R8G8B8),
+   _T(B8G8R8X8_UNORM, X8R8G8B8, SWIZ(X, Y, Z, W), X8R8G8B8),
 
V_(R10G10B10A2_UNORM,   UNSIGNED_INT_10_10_10_2, NONE),
V_(R10G10B10A2_SNORM,   INT_10_10_10_2,  NONE),
V_(R10G10B10A2_USCALED, UNSIGNED_INT_10_10_10_2, NONE),
V_(R10G10B10A2_SSCALED, INT_10_10_10_2,  NONE),
 
-   _T(X8Z24_UNORM,   D24S8, A8R8G8B8),
-   _T(S8_UINT_Z24_UNORM, D24S8, A8R8G8B8),
+   _T(X8Z24_UNORM,   D24S8, SWIZ(X, Y, Z, W), A8R8G8B8),
+   _T(S8_UINT_Z24_UNORM, D24S8, SWIZ(X, Y, Z, W), A8R8G8B8),
 
/* 48-bit */
V_(R16G16B16_UNORM,   UNSIGNED_SHORT, NONE),
@@ -215,24 +225,22 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {

[Mesa-dev] [PATCH 4/4] etnaviv: add support for snorm textures

2017-06-21 Thread Christian Gmeiner
Based on a patch from Wladimir J. van der Laan and untested due
to lack of hardware. Binary blob emits those formats if GPU supports
HALTI1 (faked with ibvivhook).

Signed-off-by: Christian Gmeiner 
---
 src/gallium/drivers/etnaviv/etnaviv_format.c | 7 ---
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 8 ++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
b/src/gallium/drivers/etnaviv/etnaviv_format.c
index c9a8ce2..47f80ac 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -91,7 +91,7 @@ struct etna_format {
 static struct etna_format formats[PIPE_FORMAT_COUNT] = {
/* 8-bit */
V_(R8_UNORM,   UNSIGNED_BYTE, NONE),
-   V_(R8_SNORM,   BYTE,  NONE),
+   VT(R8_SNORM,   BYTE,  EXT_R8_SNORM | EXT_FORMAT, SWIZ(X, 0, 0, 1), 
NONE),
V_(R8_UINT,UNSIGNED_BYTE, NONE),
V_(R8_SINT,BYTE,  NONE),
V_(R8_USCALED, UNSIGNED_BYTE, NONE),
@@ -121,7 +121,7 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
_T(B5G5R5X1_UNORM, X1R5G5B5, SWIZ(X, Y, Z, W), X1R5G5B5),
 
VT(R8G8_UNORM,   UNSIGNED_BYTE,  EXT_G8R8 | EXT_FORMAT, SWIZ(X, Y, 0, 1), 
NONE),
-   V_(R8G8_SNORM,   BYTE,   NONE),
+   VT(R8G8_SNORM,   BYTE,   EXT_G8R8_SNORM | EXT_FORMAT, SWIZ(X, Y, 0, 
1), NONE),
V_(R8G8_UINT,UNSIGNED_BYTE,  NONE),
V_(R8G8_SINT,BYTE,   NONE),
V_(R8G8_USCALED, UNSIGNED_BYTE,  NONE),
@@ -156,8 +156,9 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
V_(A8B8G8R8_UNORM,   UNSIGNED_BYTE, NONE),
 
V_(R8G8B8A8_UNORM,   UNSIGNED_BYTE, A8B8G8R8),
-   V_(R8G8B8A8_SNORM,   BYTE,  A8B8G8R8),
+   VT(R8G8B8A8_SNORM,   BYTE,  EXT_A8B8G8R8_SNORM | EXT_FORMAT, 
SWIZ(X, Y, Z, 1), NONE),
_T(R8G8B8X8_UNORM,   X8B8G8R8,  SWIZ(X, Y, Z, W), X8B8G8R8),
+   _T(R8G8B8X8_SNORM,   EXT_X8B8G8R8_SNORM | EXT_FORMAT, SWIZ(X, Y, Z, W), 
NONE),
V_(R8G8B8A8_UINT,UNSIGNED_BYTE, A8B8G8R8),
V_(R8G8B8A8_SINT,BYTE,  A8B8G8R8),
V_(R8G8B8A8_USCALED, UNSIGNED_BYTE, A8B8G8R8),
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index e310c16..c7c4c95 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -459,7 +459,8 @@ etna_screen_get_timestamp(struct pipe_screen *pscreen)
 }
 
 static bool
-gpu_supports_texure_format(struct etna_screen *screen, uint32_t fmt)
+gpu_supports_texure_format(struct etna_screen *screen, uint32_t fmt,
+   enum pipe_format format)
 {
if (fmt == TEXTURE_FORMAT_ETC1)
   return VIV_FEATURE(screen, chipFeatures, ETC1_TEXTURE_COMPRESSION);
@@ -470,6 +471,9 @@ gpu_supports_texure_format(struct etna_screen *screen, 
uint32_t fmt)
if (fmt & EXT_FORMAT)
   return VIV_FEATURE(screen, chipMinorFeatures1, HALTI0);
 
+   if (util_format_is_snorm(format))
+  return VIV_FEATURE(screen, chipMinorFeatures2, HALTI1);
+
return true;
 }
 
@@ -514,7 +518,7 @@ etna_screen_is_format_supported(struct pipe_screen *pscreen,
if (usage & PIPE_BIND_SAMPLER_VIEW) {
   uint32_t fmt = translate_texture_format(format);
 
-  if (!gpu_supports_texure_format(screen, fmt))
+  if (!gpu_supports_texure_format(screen, fmt, format))
  fmt = ETNA_NO_MATCH;
 
   if (sample_count < 2 && fmt != ETNA_NO_MATCH)
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] i965: Add a RGBX->RGBA fallback for glEGLImageTextureTarget2D()

2017-06-21 Thread Chad Versace
On Tue 20 Jun 2017, Jason Ekstrand wrote:
> On Wed, Jun 7, 2017 at 4:45 PM, Chad Versace <[1]chadvers...@chromium.org>
> wrote:
> 
> On Tue 06 Jun 2017, Daniel Stone wrote:
> > Hi Chad,
> >
> > On 6 June 2017 at 21:36, Chad Versace <[2]chadvers...@chromium.org>
> wrote:
> > > @@ -254,8 +255,22 @@ create_mt_for_dri_image(struct brw_context *brw,
> > >     struct gl_context *ctx = >ctx;
> > >     struct intel_mipmap_tree *mt;
> > >     uint32_t draw_x, draw_y;
> > > +   mesa_format format = image->format;
> > > +
> > > +   if (!ctx->TextureFormatSupported[format]) {
> > > +      /* The texture storage paths in core Mesa detect if the driver
> does not
> > > +       * support the user-requested format, and then searches for a
> > > +       * fallback format. The DRIimage code bypasses core Mesa,
> though. So we
> > > +       * do the fallbacks here for important formats.
> > > +       *
> > > +       * We must support DRM_FOURCC_XBGR textures because the
> Android
> > > +       * framework produces HAL_PIXEL_FORMAT_RGBX winsys 
> surfaces,
> which
> > > +       * the Chrome OS compositor consumes as dma_buf EGLImages.
> > > +       */
> > > +      format = _mesa_format_fallback_rgbx_to_rgba(format);
> > > +   }
> > >
> > > -   if (!ctx->TextureFormatSupported[image->format])
> > > +   if (!ctx->TextureFormatSupported[format])
> > >        return NULL;
> 
> I dislike what I wrote above. There's a much better way to do the
> fallback, a way that handles more types of fallback than rgbx->rgba and
> that's the same as the fallback used by glTexStorage2D(). The better way
> is to re-use the core Mesa code that the comment refers to, like this:
> 
>     mesa_format format = ctx->Driver.ChooseTextureFormat(ctx,
> GL_TEXTURE_2D,
>                                                          internalFormat,
> GL_NONE, GL_NONE);
> 
> As precedent, that's exactly what intel_renderbuffer_format() does.
> 
> 
> Does this mean we're dropping patch 1?  If not, I sent out a new version which
> I find much easier to comprehend.

We still need patch 1 for the intelCreateBuffer paths, which have no
current context, and therefore no ctx->Driver.ChooseTextureFormat.

> > >
> > >     /* Disable creation of the texture's aux buffers because the 
> driver
> exposes
> > > @@ -263,7 +278,7 @@ create_mt_for_dri_image(struct brw_context *brw,
> > >      * buffer's content to the main buffer nor for invalidating the 
> aux
> buffer's
> > >      * content.
> > >      */
> > > -   mt = intel_miptree_create_for_bo(brw, image->bo, image->format,
> > > +   mt = intel_miptree_create_for_bo(brw, image->bo, format,
> > >                                      0, image->width, image->height, 
> 1,
> > >                                      image->pitch,
> > >                                      MIPTREE_LAYOUT_DISABLE_AUX);
> >
> > I wonder if it wouldn't be better to do this in
> > intel_create_image_from_name. That way it would be more obvious
> > up-front what's happening,
> 
> I agree that the intent would become more obvious if the format fallback
> were done at time of import instead of gl*Storage. But I see two
> arguments against it:
> 
>     1. First, the weaker argument.
> 
>        The chosen fallback format,
>        and even the choice to do a fallback at all, is a property of the
>        image's usage and not a property of the image itself. A single
>        image can have multiple uses during its lifetime, and the driver
>        may need a different fallback or no fallback for each. I'm
>        defining "image usage" here in terms of
>        glEGLImageTargetTexture2DOES, glEGLImageTargetRenderbufferSt
> orageOES, and
>        GL_TEXURE_EXTERNAL_OES vs GL_TEXTURE_2D.
> 
>        Which reminds me... I should have submitted an analgous patch for
>        glEGLImageTargetRenderbufferStorageOES().
> 
>        Since the driver may support a given format for texturing but not
>        rendering, or for rendering but not texturing, we would need to do
> at
>        least two format fallbacks during image import, and cache the
> fallback
>        results in the image struct. This approach is possible, but...
>        onto the next bullet.
> 
> 
> I don't think that argument is all that weak
>  
> 
>     2. A more practical argument.
> 
>        If possible, it's better to do the fallback for
>        glEGLImageTextureTarget2DOES() in the same way as for
>        glTexStorage2D(), as I explained above. But that requires access
>        to a GL context; eglCreateImage may be called without
>        a context. [EGL_EXT_image_dma_buf_import explicitly requires that
>        

Re: [Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling

2017-06-21 Thread Jason Ekstrand
On Wed, Jun 21, 2017 at 12:33 PM, Chad Versace 
wrote:

> On Fri 16 Jun 2017, Jason Ekstrand wrote:
> > This commit replaces the complex and confusing set of disable flags with
> > two fairly straightforward fields which describe the intended auxiliary
> > surface usage and whether or not the miptree supports fast clears.
> > Right now, supports_fast_clear can be entirely derived from aux_usage
> > but that will not always be the case.
> >
> > This commit makes functional changes.  One of these changes is that it
> > re-enables multisampled fast-clears which were accidentally disabled in
> > cec30a666930ddb8476a9452a89364a24979ff62 around a year ago.  It should
> > also enable CCS_E for window-system buffers which are Y-tiled.  They
> > will still get a full resolve like CCS_D but we will at least get some
> > of the advantage of compression.
> > ---
> >  src/mesa/drivers/dri/i965/brw_blorp.c |   4 +-
> >  src/mesa/drivers/dri/i965/intel_fbo.c |   2 +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190
> +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  43 +++---
> >  4 files changed, 120 insertions(+), 119 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> > index 00092ee..9bd25f0 100644
> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> > @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw,
> struct gl_framebuffer *fb,
> > if (set_write_disables(irb, ctx->Color.ColorMask[buf],
> color_write_disable))
> >can_fast_clear = false;
> >
> > -   if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
> > +   if (!irb->mt->supports_fast_clear ||
> > !brw_is_color_fast_clear_compatible(brw, irb->mt,
> >Color.ClearColor))
> >can_fast_clear = false;
> >
> > @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw,
> struct gl_framebuffer *fb,
> > */
> >if (!irb->mt->mcs_buf) {
> >   assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
> > - if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
> > + if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
> >  /* MCS allocation failed--probably this will only happen in
> >   * out-of-memory conditions.  But in any case, try to
> recover
> >   * by falling back to a non-blorp clear technique.
> > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> > index ee4aba9..6a64bcb 100644
> > --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> > @@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct
> brw_context *brw,
> >
> > intel_renderbuffer_set_draw_offset(irb);
> >
> > -   if (intel_miptree_wants_hiz_buffer(brw, mt)) {
> > +   if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) {
> >intel_miptree_alloc_hiz(brw, mt);
> >if (!mt->hiz_buf)
> >return false;
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 0f6d542..101317f 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> >   */
> >  static enum intel_msaa_layout
> >  compute_msaa_layout(struct brw_context *brw, mesa_format format,
> > -enum intel_aux_disable aux_disable)
> > +uint32_t layout_flags)
> >  {
> > /* Prior to Gen7, all MSAA surfaces used IMS layout. */
> > if (brw->gen < 7)
> > @@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw,
> mesa_format format,
> > */
> >if (brw->gen == 7 && _mesa_get_format_datatype(format) ==
> GL_INT) {
> >   return INTEL_MSAA_LAYOUT_UMS;
> > -  } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
> > +  } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
> >   /* We can't use the CMS layout because it uses an aux buffer,
> the MCS
> >* buffer. So fallback to UMS, which is identical to CMS
> without the
> >* MCS. */
> > @@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw,
> > if (brw->gen < 7)
> >return false;
> >
> > -   if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
> > -  return false;
> > -
> > /* This function applies only to non-multisampled render targets. */
> > if (mt->num_samples > 1)
> >return false;
> > @@ -215,6 +212,26 @@ intel_miptree_supports_ccs(struct brw_context *brw,
> >return true;
> >  }
> >
> > +static bool
> > +intel_miptree_supports_hiz(struct brw_context *brw,
> > +   struct intel_mipmap_tree *mt)
> > +{
> > +   if (!brw->has_hiz)
> > +  return false;
> > +
> > +   switch (mt->format) {
> > +   case 

Re: [Mesa-dev] [PATCH 0/5] Gallium: Passing drirc options to create_screen() and fixing Rocket League

2017-06-21 Thread Edmondo Tommasina
Hi Marek

In patch 5 you say the words KILL and WQM and I automatically
think at Witcher 2.

This series with a the drirc option set for Witcher 2 fixes the
longstanding black transition bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98238

It's quite probable that VP will not update TW2, so a drirc driven fix like
this would be great to have.

I'll will send the drirc patch on your series for TW2 later after some
more testing.

Thanks
edmondo



On Wed, Jun 21, 2017 at 12:54 AM, Marek Olšák  wrote:
> Hi,
>
> This series updates pipe loaders so that flags such as drirc options
> can be passed to create_screen(). I have compile-tested everything
> except clover.
>
> The first pipe_screen flag is a drirc option to fix incorrect grass
> rendering in Rocket League for radeonsi. Rocket League expects DirectX
> behavior for partial derivative computations after discard/kill, but
> radeonsi implements the more efficient but stricter OpenGL behavior
> and that will remain our default behavior. The new screen flag forces
> radeonsi to use the DX behavior for that game.
>
> Please review.
>
> Thanks,
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling

2017-06-21 Thread Chad Versace
On Fri 16 Jun 2017, Jason Ekstrand wrote:
> This commit replaces the complex and confusing set of disable flags with
> two fairly straightforward fields which describe the intended auxiliary
> surface usage and whether or not the miptree supports fast clears.
> Right now, supports_fast_clear can be entirely derived from aux_usage
> but that will not always be the case.
> 
> This commit makes functional changes.  One of these changes is that it
> re-enables multisampled fast-clears which were accidentally disabled in
> cec30a666930ddb8476a9452a89364a24979ff62 around a year ago.  It should
> also enable CCS_E for window-system buffers which are Y-tiled.  They
> will still get a full resolve like CCS_D but we will at least get some
> of the advantage of compression.
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c |   4 +-
>  src/mesa/drivers/dri/i965/intel_fbo.c |   2 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190 
> +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  43 +++---
>  4 files changed, 120 insertions(+), 119 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 00092ee..9bd25f0 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
> if (set_write_disables(irb, ctx->Color.ColorMask[buf], 
> color_write_disable))
>can_fast_clear = false;
>  
> -   if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
> +   if (!irb->mt->supports_fast_clear ||
> !brw_is_color_fast_clear_compatible(brw, irb->mt, 
> >Color.ClearColor))
>can_fast_clear = false;
>  
> @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
> */
>if (!irb->mt->mcs_buf) {
>   assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
> - if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
> + if (!intel_miptree_alloc_ccs(brw, irb->mt)) {

The above assert is useless post-patch, because it occurs inside if
(!irb->mt->mcs_buf) and the top of intel_miptree_is_lossless_compressed
looks like this:

/* first check */
if (brw->gen < 9)
return false;

/* second check */
if (!mt->mcs_buf)
return false;

...

Just an observation.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/gen6: Use isl-based miptree also for stencil rbs

2017-06-21 Thread Topi Pohjolainen
Fixes dEQP-EGL.functional.image.render_multiple_contexts.
gles2_renderbuffer_stencil_stencil_buffer

CC: Mark Janes 
CC: Jason Ekstrand 
CC: Kenneth Graunke 
Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index abc7f989db..69b02ead78 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -897,7 +897,22 @@ intel_miptree_create_for_bo(struct brw_context *brw,
 {
struct intel_mipmap_tree *mt;
uint32_t tiling, swizzle;
-   GLenum target;
+   const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
+
+   if (brw->gen == 6 && format == MESA_FORMAT_S_UINT8) {
+  mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
+0, 0, width, height, depth, 1, ISL_TILING_W,
+ISL_SURF_USAGE_STENCIL_BIT |
+ISL_SURF_USAGE_TEXTURE_BIT,
+BO_ALLOC_FOR_RENDER, bo);
+  if (!mt)
+ return NULL;
+
+  assert(bo->size >= mt->surf.size);
+
+  brw_bo_reference(bo);
+  return mt;
+   }
 
brw_bo_get_tiling(bo, , );
 
@@ -912,8 +927,6 @@ intel_miptree_create_for_bo(struct brw_context *brw,
 */
assert(pitch >= 0);
 
-   target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
-
/* The BO already has a tiling format and we shouldn't confuse the lower
 * layers by making it try to find a tiling format again.
 */
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/30] i965/miptree: Rework aux enabling

2017-06-21 Thread Chad Versace
On Fri 16 Jun 2017, Jason Ekstrand wrote:
> This commit replaces the complex and confusing set of disable flags with
> two fairly straightforward fields which describe the intended auxiliary
> surface usage and whether or not the miptree supports fast clears.
> Right now, supports_fast_clear can be entirely derived from aux_usage
> but that will not always be the case.
> 
> This commit makes functional changes.  One of these changes is that it
> re-enables multisampled fast-clears which were accidentally disabled in
> cec30a666930ddb8476a9452a89364a24979ff62 around a year ago.  It should
> also enable CCS_E for window-system buffers which are Y-tiled.  They
> will still get a full resolve like CCS_D but we will at least get some
> of the advantage of compression.
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c |   4 +-
>  src/mesa/drivers/dri/i965/intel_fbo.c |   2 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 190 
> +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  43 +++---
>  4 files changed, 120 insertions(+), 119 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 00092ee..9bd25f0 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -762,7 +762,7 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
> if (set_write_disables(irb, ctx->Color.ColorMask[buf], 
> color_write_disable))
>can_fast_clear = false;
>  
> -   if (irb->mt->aux_disable & INTEL_AUX_DISABLE_CCS ||
> +   if (!irb->mt->supports_fast_clear ||
> !brw_is_color_fast_clear_compatible(brw, irb->mt, 
> >Color.ClearColor))
>can_fast_clear = false;
>  
> @@ -785,7 +785,7 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
> */
>if (!irb->mt->mcs_buf) {
>   assert(!intel_miptree_is_lossless_compressed(brw, irb->mt));
> - if (!intel_miptree_alloc_ccs(brw, irb->mt, false)) {
> + if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
>  /* MCS allocation failed--probably this will only happen in
>   * out-of-memory conditions.  But in any case, try to recover
>   * by falling back to a non-blorp clear technique.
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> index ee4aba9..6a64bcb 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -555,7 +555,7 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw,
>  
> intel_renderbuffer_set_draw_offset(irb);
>  
> -   if (intel_miptree_wants_hiz_buffer(brw, mt)) {
> +   if (mt->aux_usage == ISL_AUX_USAGE_HIZ && !mt->hiz_buf) {
>intel_miptree_alloc_hiz(brw, mt);
>if (!mt->hiz_buf)
>return false;
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 0f6d542..101317f 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -64,7 +64,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>   */
>  static enum intel_msaa_layout
>  compute_msaa_layout(struct brw_context *brw, mesa_format format,
> -enum intel_aux_disable aux_disable)
> +uint32_t layout_flags)
>  {
> /* Prior to Gen7, all MSAA surfaces used IMS layout. */
> if (brw->gen < 7)
> @@ -90,7 +90,7 @@ compute_msaa_layout(struct brw_context *brw, mesa_format 
> format,
> */
>if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
>   return INTEL_MSAA_LAYOUT_UMS;
> -  } else if (aux_disable & INTEL_AUX_DISABLE_MCS) {
> +  } else if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) {
>   /* We can't use the CMS layout because it uses an aux buffer, the 
> MCS
>* buffer. So fallback to UMS, which is identical to CMS without the
>* MCS. */
> @@ -148,9 +148,6 @@ intel_miptree_supports_ccs(struct brw_context *brw,
> if (brw->gen < 7)
>return false;
>  
> -   if (mt->aux_disable & INTEL_AUX_DISABLE_MCS)
> -  return false;
> -
> /* This function applies only to non-multisampled render targets. */
> if (mt->num_samples > 1)
>return false;
> @@ -215,6 +212,26 @@ intel_miptree_supports_ccs(struct brw_context *brw,
>return true;
>  }
>  
> +static bool
> +intel_miptree_supports_hiz(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt)
> +{
> +   if (!brw->has_hiz)
> +  return false;
> +
> +   switch (mt->format) {
> +   case MESA_FORMAT_Z_FLOAT32:
> +   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
> +   case MESA_FORMAT_Z24_UNORM_X8_UINT:
> +   case MESA_FORMAT_Z24_UNORM_S8_UINT:
> +   case MESA_FORMAT_Z_UNORM16:
> +  return true;
> +   default:
> +  return false;
> +   }
> +}
> +
> +
>  /* On Gen9 

Re: [Mesa-dev] [PATCH] i965/CFL: Add PCI Ids for Coffee Lake.

2017-06-21 Thread Anuj Phogat
On Wed, Jun 21, 2017 at 11:19 AM, Anusha Srivatsa
 wrote:
> Coffee Lake has a gen9 graphics following KBL.
> From 3D perspective, CFL is a clone of KBL/SKL features.
>
> v2: Change commit message, correct alignment 
> v3: Update IDs.
>
> Cc: Benjamin Widawsky 
> Cc: Anuj Phogat 
> Cc: Rodrigo Vivi 
> Signed-off-by: Anusha Srivatsa 
> ---
>  include/pci_ids/i965_pci_ids.h | 11 +++
>  src/intel/common/gen_device_info.c | 23 +++
>  src/intel/common/gen_device_info.h |  1 +
>  3 files changed, 35 insertions(+)
>
> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
> index b296359..5e9566c 100644
> --- a/include/pci_ids/i965_pci_ids.h
> +++ b/include/pci_ids/i965_pci_ids.h
> @@ -165,6 +165,17 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 
> 650 (Kaby Lake GT3)")
>  CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
>  CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
>  CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
> +CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 6x1)")
> +CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 4x1)")
(Coffeelake 2x6 GT1). All CFL GT1 have 2 subslices and 6 EU/subslice.
> +CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 4x2)")
> +CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
> +CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
> +CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
(Coffeelake 3x8 GT2). All CFL GT2 have 3 subslices and 8 EU/subslice.
> +CHIPSET(0x3E94, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
This is GT2 with 24 EUs.
> +CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
> +CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
> +CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
> +CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
(Coffeelake 3x8 GT3). All CFL GT2 have 3 subslices and 8 EU/subslice.
>  CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
>  CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
>  CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
> diff --git a/src/intel/common/gen_device_info.c 
> b/src/intel/common/gen_device_info.c
> index 423748e..d49beaa 100644
> --- a/src/intel/common/gen_device_info.c
> +++ b/src/intel/common/gen_device_info.c
> @@ -607,6 +607,29 @@ static const struct gen_device_info 
> gen_device_info_glk_2x6 = {
> .is_geminilake = true,
>  };
>
> +static const struct gen_device_info gen_device_info_cfl_gt1 = {
> +   GEN9_FEATURES,
> +   .is_coffeelake = true,
> +   .gt = 1,
> +
> +   .num_slices = 1,
Also initialize l3_banks variable.
> +};
> +static const struct gen_device_info gen_device_info_cfl_gt2 = {
> +   GEN9_FEATURES,
> +   .is_coffeelake = true,
> +   .gt = 2,
> +
> +   .num_slices = 1,
Here too.
> +};
> +
> +static const struct gen_device_info gen_device_info_cfl_gt3 = {
> +   GEN9_FEATURES,
> +   .is_coffeelake = true,
> +   .gt = 3,
> +
> +   .num_slices = 2,
and here.
> +};
> +
>  #define GEN10_HW_INFO   \
> .gen = 10,   \
> .num_thread_per_eu = 7,  \
> diff --git a/src/intel/common/gen_device_info.h 
> b/src/intel/common/gen_device_info.h
> index cc83857..a83251c 100644
> --- a/src/intel/common/gen_device_info.h
> +++ b/src/intel/common/gen_device_info.h
> @@ -46,6 +46,7 @@ struct gen_device_info
> bool is_broxton;
> bool is_kabylake;
> bool is_geminilake;
> +   bool is_coffeelake;
> bool is_cannonlake;
>
> bool has_hiz_and_separate_stencil;
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 3/3] i965/i915: Add UYVY as the supported format

2017-06-21 Thread Kristian H. Kristensen
Johnson Lin  writes:

> Trigger the correct sampler options for it. Similar with YUYV
> ---
>  src/intel/compiler/brw_compiler.h| 1 +
>  src/intel/compiler/brw_nir.c | 1 +
>  src/mesa/drivers/dri/i915/intel_screen.c | 9 ++---
>  src/mesa/drivers/dri/i965/brw_wm.c   | 7 +++
>  src/mesa/drivers/dri/i965/intel_screen.c | 9 ++---
>  5 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/compiler/brw_compiler.h 
> b/src/intel/compiler/brw_compiler.h
> index 78873744ce5f..3f383403883c 100644
> --- a/src/intel/compiler/brw_compiler.h
> +++ b/src/intel/compiler/brw_compiler.h
> @@ -168,6 +168,7 @@ struct brw_sampler_prog_key_data {
> uint32_t y_u_v_image_mask;
> uint32_t y_uv_image_mask;
> uint32_t yx_xuxv_image_mask;
> +   uint32_t xy_uxvx_image_mask;
>  };
>  
>  /**
> diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
> index de8f519b4e10..49d3cf365647 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -770,6 +770,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
> tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
> tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
> tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
> +   tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask;
>  
> if (nir_lower_tex(nir, _options)) {
>nir_validate_shader(nir);
> diff --git a/src/mesa/drivers/dri/i915/intel_screen.c 
> b/src/mesa/drivers/dri/i915/intel_screen.c
> index cba5434b5e1b..a81c7eb07d6a 100644
> --- a/src/mesa/drivers/dri/i915/intel_screen.c
> +++ b/src/mesa/drivers/dri/i915/intel_screen.c
> @@ -227,17 +227,20 @@ static struct intel_image_format intel_image_formats[] 
> = {
>   { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
> { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
>  
> -   /* For YUYV buffers, we set up two overlapping DRI images and treat
> +   /* For YUYV buffers, we set up two overlapping DRI images and treat

Spaces around the '&', but just write out 'and' instead.

>  * them as planar buffers in the compositors.  Plane 0 is GR88 and
>  * samples YU or YV pairs and places Y into the R component, while
> -* plane 1 is ARGB and samples YUYV clusters and places pairs and
> +* plane 1 is ARGB and samples YUYV/UYVY clusters and places pairs and
>  * places U into the G component and V into A.  This lets the
>  * texture sampler interpolate the Y components correctly when
>  * sampling from plane 0, and interpolate U and V correctly when
>  * sampling from plane 1. */
> { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
>   { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
> -   { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } }
> +   { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
> +   { __DRI_IMAGE_FOURCC_UYVY, __DRI_IMAGE_COMPONENTS_Y_UXVX, 2,
> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
> +   { 0, 1, 0, __DRI_IMAGE_FORMAT_ABGR, 4 } } }
>  };
>  
>  static __DRIimage *
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index 0f075a11f756..a8ec1f5c2368 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -270,6 +270,10 @@ brw_debug_recompile_sampler_key(struct brw_context *brw,
> found |= key_debug(brw, "yx_xuxv image bound",
>old_key->yx_xuxv_image_mask,
>key->yx_xuxv_image_mask);
> +   found |= key_debug(brw, "xy_uxvx image bound",
> +  old_key->xy_uxvx_image_mask,
> +  key->xy_uxvx_image_mask);
> +
>  
> for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
>found |= key_debug(brw, "textureGather workarounds",
> @@ -412,6 +416,9 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
>  case __DRI_IMAGE_COMPONENTS_Y_XUXV:
> key->yx_xuxv_image_mask |= 1 << s;
> break;
> +case __DRI_IMAGE_COMPONENTS_Y_UXVX:
> +   key->xy_uxvx_image_mask |= 1 << s;
> +   break;
>  default:
> break;
>  }
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 83b8a24509a4..4ffedf1cc07f 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -278,17 +278,20 @@ static struct intel_image_format intel_image_formats[] 
> = {
>   { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
> { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
>  
> -   /* For YUYV buffers, we set up two overlapping DRI images and treat
> +   /* For YUYV buffers, we set up two overlapping DRI images and treat

Same here.

>  * them as planar buffers in the compositors.  Plane 0 is GR88 and
>  * samples YU or YV pairs and places Y into the R component, while
> -   

Re: [Mesa-dev] [PATCH v3 1/3] dri: Add UYVY as available format

2017-06-21 Thread Kristian H. Kristensen
Johnson Lin  writes:

> UYVY is diffrent with YUYV in byte order.
> YUYV is already declared in dri_interface.h,
> this CL add the difinitions for UYVY.
> Drivers can add UYVY as supported format

This series looks good now. There are a few stylistic nits in the
comment formatting below, but with that fixed, this is

Reviewed-by: Kristian H. Kristensen 

> ---
>  include/GL/internal/dri_interface.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/include/GL/internal/dri_interface.h 
> b/include/GL/internal/dri_interface.h
> index fc2d4bbe22ef..6992da16d5f8 100644
> --- a/include/GL/internal/dri_interface.h
> +++ b/include/GL/internal/dri_interface.h
> @@ -1211,6 +1211,7 @@ struct __DRIdri2ExtensionRec {
>  #define __DRI_IMAGE_FOURCC_NV12  0x3231564e
>  #define __DRI_IMAGE_FOURCC_NV16  0x3631564e
>  #define __DRI_IMAGE_FOURCC_YUYV  0x56595559
> +#define __DRI_IMAGE_FOURCC_UYVY  0x59565955
>  
>  #define __DRI_IMAGE_FOURCC_YVU4100x39555659
>  #define __DRI_IMAGE_FOURCC_YVU4110x31315659
> @@ -1224,7 +1225,7 @@ struct __DRIdri2ExtensionRec {
>   * RGB and RGBA are may be usable directly as images but its still
>   * recommended to call fromPlanar with plane == 0.
>   *
> - * Y_U_V, Y_UV and Y_XUXV all requires call to fromPlanar to create
> + * Y_U_V, Y_UV,Y_XUXV and Y_UXVX all requires call to fromPlanar to create
  ^
Space after the comma here.

>   * usable sub-images, sampling from images return raw YUV data and
>   * color conversion needs to be done in the shader.
>   *
> @@ -1236,6 +1237,7 @@ struct __DRIdri2ExtensionRec {
>  #define __DRI_IMAGE_COMPONENTS_Y_U_V 0x3003
>  #define __DRI_IMAGE_COMPONENTS_Y_UV  0x3004
>  #define __DRI_IMAGE_COMPONENTS_Y_XUXV0x3005
> +#define __DRI_IMAGE_COMPONENTS_Y_UXVX0x3008
>  #define __DRI_IMAGE_COMPONENTS_R 0x3006
>  #define __DRI_IMAGE_COMPONENTS_RG0x3007
>  
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/30] i965: Move the DRIimage -> miptree code to intel_mipmap_tree.c

2017-06-21 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:28PM -0700, Jason Ekstrand wrote:
> This is mostly a direct port.  The only bit of refactoring that was done
> was to make creating a planar miptree be an early return from the
> non-planar case.  Alternatively, we could have three functions: two
> helpers and a main function to just call the right helper.  Making the
> planar case an early return seemed cleaner.
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 91 +
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  5 ++
>  src/mesa/drivers/dri/i965/intel_tex_image.c   | 97 
> +--
>  3 files changed, 97 insertions(+), 96 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 101317f..023c6aa 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -27,6 +27,7 @@
>  #include 
>  
>  #include "intel_batchbuffer.h"
> +#include "intel_image.h"
>  #include "intel_mipmap_tree.h"
>  #include "intel_tex.h"
>  #include "intel_blit.h"
> @@ -856,6 +857,96 @@ intel_miptree_create_for_bo(struct brw_context *brw,
> return mt;
>  }
>  
> +static struct intel_mipmap_tree *
> +miptree_create_for_planar_image(struct brw_context *brw,
> +__DRIimage *image, GLenum target)
> +{
> +   struct intel_image_format *f = image->planar_format;
> +   struct intel_mipmap_tree *planar_mt;
> +
> +   for (int i = 0; i < f->nplanes; i++) {
> +  const int index = f->planes[i].buffer_index;
> +  const uint32_t dri_format = f->planes[i].dri_format;
> +  const mesa_format format = driImageFormatToGLFormat(dri_format);
> +  const uint32_t width = image->width >> f->planes[i].width_shift;
> +  const uint32_t height = image->height >> f->planes[i].height_shift;
> +
> +  /* Disable creation of the texture's aux buffers because the driver
> +   * exposes no EGL API to manage them. That is, there is no API for
> +   * resolving the aux buffer's content to the main buffer nor for
> +   * invalidating the aux buffer's content.
> +   */
> +  struct intel_mipmap_tree *mt =
> + intel_miptree_create_for_bo(brw, image->bo, format,
> + image->offsets[index],
> + width, height, 1,
> + image->strides[index],
> + MIPTREE_LAYOUT_DISABLE_AUX);
> +  if (mt == NULL)
> + return NULL;
> +
> +  mt->target = target;
> +  mt->total_width = width;
> +  mt->total_height = height;
> +
> +  if (i == 0)
> + planar_mt = mt;
> +  else
> + planar_mt->plane[i - 1] = mt;
> +   }
> +
> +   return planar_mt;
> +}
> +
> +struct intel_mipmap_tree *
> +intel_miptree_create_for_dri_image(struct brw_context *brw,
> +   __DRIimage *image, GLenum target)
> +{
> +   if (image->planar_format && image->planar_format->nplanes > 0)
> +  return miptree_create_for_planar_image(brw, image, target);
> +
> +   if (!brw->ctx.TextureFormatSupported[image->format])
> +  return NULL;
> +
> +   /* Disable creation of the texture's aux buffers because the driver 
> exposes
> +* no EGL API to manage them. That is, there is no API for resolving the 
> aux
> +* buffer's content to the main buffer nor for invalidating the aux 
> buffer's
> +* content.
> +*/
> +   struct intel_mipmap_tree *mt =
> +  intel_miptree_create_for_bo(brw, image->bo, image->format,
> +  0, image->width, image->height, 1,
> +  image->pitch,
> +  MIPTREE_LAYOUT_DISABLE_AUX);
> +   if (mt == NULL)
> +  return NULL;
> +
> +   mt->target = target;
> +   mt->total_width = image->width;
> +   mt->total_height = image->height;
> +   mt->level[0].slice[0].x_offset = image->tile_x;
> +   mt->level[0].slice[0].y_offset = image->tile_y;
> +
> +   /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
> +* for EGL images from non-tile aligned sufaces in gen4 hw and earlier 
> which has
> +* trouble resolving back to destination image due to alignment issues.
> +*/
> +   if (!brw->has_surface_tile_offset) {
> +  uint32_t draw_x, draw_y;
> +  intel_miptree_get_tile_offsets(mt, 0, 0, _x, _y);
> +
> +  if (draw_x != 0 || draw_y != 0) {
> + _mesa_error(>ctx, GL_INVALID_OPERATION, __func__);
> + intel_miptree_release();
> + return NULL;
> +  }
> +   }
> +
> +   mt->offset = image->offset;
> +
> +   return mt;
> +}
> +
>  /**
>   * For a singlesample renderbuffer, this simply wraps the given BO with a
>   * miptree.
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> 

Re: [Mesa-dev] [PATCH] i965: Remove spurious mutex frobbing around call to intel_miptree_blit

2017-06-21 Thread Kristian H. Kristensen
Ian Romanick  writes:

> On 06/20/2017 01:33 PM, Kristian Høgsberg wrote:
>> On Mon, Jun 19, 2017 at 2:33 PM, Ian Romanick  wrote:
>>> From: Ian Romanick 
>>>
>>> These locks were added in 2f28a0dc, but I don't see anything in the
>>> intel_miptree_blit path that should make this necessary.
>> 
>> I doubt it's needed now with the new blorp. If I remember correctly, I
>> had to drop the lock there since intel_miptree_blit() could hit the XY
>> blit path that requires a fast clear resolve. The fast resolve being
>> meta, would then try to lock the texture again.
>
> I figured it was something like that.  If I add that commentary to the
> commit message, can I call that a Reviewed-by?

Certainly.

Kristian

>> Kristian
>> 
>>> Signed-off-by: Ian Romanick 
>>> Cc: Kristian Høgsberg 
>>> ---
>>>  src/mesa/drivers/dri/i965/intel_tex_copy.c | 19 ++-
>>>  1 file changed, 6 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c 
>>> b/src/mesa/drivers/dri/i965/intel_tex_copy.c
>>> index 9c255ae..e0d5cad 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
>>> @@ -51,7 +51,6 @@ intel_copy_texsubimage(struct brw_context *brw,
>>> GLint x, GLint y, GLsizei width, GLsizei height)
>>>  {
>>> const GLenum internalFormat = intelImage->base.Base.InternalFormat;
>>> -   bool ret;
>>>
>>> /* No pixel transfer operations (zoom, bias, mapping), just a blit */
>>> if (brw->ctx._ImageTransferState)
>>> @@ -83,19 +82,13 @@ intel_copy_texsubimage(struct brw_context *brw,
>>> int dst_slice = slice + intelImage->base.Base.Face +
>>> intelImage->base.Base.TexObject->MinLayer;
>>>
>>> -   _mesa_unlock_texture(>ctx, intelImage->base.Base.TexObject);
>>> -
>>> /* blit from src buffer to texture */
>>> -   ret = intel_miptree_blit(brw,
>>> -irb->mt, irb->mt_level, irb->mt_layer,
>>> -x, y, irb->Base.Base.Name == 0,
>>> -intelImage->mt, dst_level, dst_slice,
>>> -dstx, dsty, false,
>>> -width, height, GL_COPY);
>>> -
>>> -   _mesa_lock_texture(>ctx, intelImage->base.Base.TexObject);
>>> -
>>> -   return ret;
>>> +   return intel_miptree_blit(brw,
>>> + irb->mt, irb->mt_level, irb->mt_layer,
>>> + x, y, irb->Base.Base.Name == 0,
>>> + intelImage->mt, dst_level, dst_slice,
>>> + dstx, dsty, false,
>>> + width, height, GL_COPY);
>>>  }
>>>
>>>
>>> --
>>> 2.9.4
>>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Properly fold constants in SPLIT operation

2017-06-21 Thread Ilia Mirkin
Wait, this is actively buggy! How did this ever work :( I guess we
don't split immediates too frequently, and I was testing it with
zero's or something.

Can you figure out the commit where I added this idiotic code and add
a Fixes: tag?

Reviewed-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org

On Mon, Jun 12, 2017 at 4:53 PM, Pierre Moreau  wrote:
> Signed-off-by: Pierre Moreau 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index e032255178..57223d311c 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -975,8 +975,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>bld.setPosition(i, false);
>
>uint8_t size = i->getDef(0)->reg.size;
> -  uint32_t mask = (1ULL << size) - 1;
> -  assert(size <= 32);
> +  uint8_t bitsize = size * 8;
> +  uint32_t mask = (1ULL << bitsize) - 1;
> +  assert(bitsize <= 32);
>
>uint64_t val = imm0.reg.data.u64;
>for (int8_t d = 0; i->defExists(d); ++d) {
> @@ -984,7 +985,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>   assert(def->reg.size == size);
>
>   newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32);

Is that what you want here? Should this be typeOfSize(size) ?

I guess you want to split this into 32-bit values anyways since that's
what everything processes... eventually we might want to play around
with the SIMD opcodes but probably not soon.

> - val >>= size;
> + val >>= bitsize;
>}
>delete_Instruction(prog, i);
>break;
> --
> 2.13.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Jason Ekstrand
On Wed, Jun 21, 2017 at 11:20 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Wed, Jun 21, 2017 at 11:16:39AM -0700, Jason Ekstrand wrote:
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101538
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101539
>
> I guess also:
>
> https://bugs.freedesktop.org/show_bug.cgi?id=101535
>
> Thanks for the quick fix!!
>

No problem.  I've landed it now so peoples desktops can start working again.


> Reviewed-by: Topi Pohjolainen 
>
> > Cc: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 ++
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index abc7f98..3b7262f 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -1672,8 +1672,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> >  * calculate equivalent MCS surface against it.
> >  */
> > intel_miptree_get_isl_surf(brw, mt, _main_surf);
> > -   assert(isl_surf_get_mcs_surf(>isl_dev, _main_surf,
> > -_mcs_surf));
> > +   MAYBE_UNUSED bool ok =
> > +  isl_surf_get_mcs_surf(>isl_dev, _main_surf,
> _mcs_surf);
> > +   assert(ok);
> >
> > /* Buffer needs to be initialised requiring the buffer to be
> immediately
> >  * mapped to cpu space for writing. Therefore do not use the gpu
> access
> > @@ -1832,8 +1833,9 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
> > struct isl_surf temp_hiz_surf;
> >
> > intel_miptree_get_isl_surf(brw, mt, _main_surf);
> > -   assert(isl_surf_get_hiz_surf(>isl_dev, _main_surf,
> > -_hiz_surf));
> > +   MAYBE_UNUSED bool ok =
> > +  isl_surf_get_hiz_surf(>isl_dev, _main_surf,
> _hiz_surf);
> > +   assert(ok);
> >
> > const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
> > mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Pohjolainen, Topi
On Wed, Jun 21, 2017 at 11:16:39AM -0700, Jason Ekstrand wrote:
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101538
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101539

I guess also:

https://bugs.freedesktop.org/show_bug.cgi?id=101535

Thanks for the quick fix!!

Reviewed-by: Topi Pohjolainen 

> Cc: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index abc7f98..3b7262f 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1672,8 +1672,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>  * calculate equivalent MCS surface against it.
>  */
> intel_miptree_get_isl_surf(brw, mt, _main_surf);
> -   assert(isl_surf_get_mcs_surf(>isl_dev, _main_surf,
> -_mcs_surf));
> +   MAYBE_UNUSED bool ok =
> +  isl_surf_get_mcs_surf(>isl_dev, _main_surf, _mcs_surf);
> +   assert(ok);
>  
> /* Buffer needs to be initialised requiring the buffer to be immediately
>  * mapped to cpu space for writing. Therefore do not use the gpu access
> @@ -1832,8 +1833,9 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
> struct isl_surf temp_hiz_surf;
>  
> intel_miptree_get_isl_surf(brw, mt, _main_surf);
> -   assert(isl_surf_get_hiz_surf(>isl_dev, _main_surf,
> -_hiz_surf));
> +   MAYBE_UNUSED bool ok =
> +  isl_surf_get_hiz_surf(>isl_dev, _main_surf, _hiz_surf);
> +   assert(ok);
>  
> const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
> mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/CFL: Add PCI Ids for Coffee Lake.

2017-06-21 Thread Anusha Srivatsa
Coffee Lake has a gen9 graphics following KBL.
From 3D perspective, CFL is a clone of KBL/SKL features.

v2: Change commit message, correct alignment 
v3: Update IDs.

Cc: Benjamin Widawsky 
Cc: Anuj Phogat 
Cc: Rodrigo Vivi 
Signed-off-by: Anusha Srivatsa 
---
 include/pci_ids/i965_pci_ids.h | 11 +++
 src/intel/common/gen_device_info.c | 23 +++
 src/intel/common/gen_device_info.h |  1 +
 3 files changed, 35 insertions(+)

diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index b296359..5e9566c 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -165,6 +165,17 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 
(Kaby Lake GT3)")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
 CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
 CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
+CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 6x1)")
+CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 4x1)")
+CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 4x2)")
+CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
+CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
+CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 6x2)")
+CHIPSET(0x3E94, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
+CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
+CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
+CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
+CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 4x3)")
 CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
 CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
 CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
diff --git a/src/intel/common/gen_device_info.c 
b/src/intel/common/gen_device_info.c
index 423748e..d49beaa 100644
--- a/src/intel/common/gen_device_info.c
+++ b/src/intel/common/gen_device_info.c
@@ -607,6 +607,29 @@ static const struct gen_device_info 
gen_device_info_glk_2x6 = {
.is_geminilake = true,
 };
 
+static const struct gen_device_info gen_device_info_cfl_gt1 = {
+   GEN9_FEATURES,
+   .is_coffeelake = true,
+   .gt = 1,
+
+   .num_slices = 1,
+};
+static const struct gen_device_info gen_device_info_cfl_gt2 = {
+   GEN9_FEATURES,
+   .is_coffeelake = true,
+   .gt = 2,
+
+   .num_slices = 1,
+};
+
+static const struct gen_device_info gen_device_info_cfl_gt3 = {
+   GEN9_FEATURES,
+   .is_coffeelake = true,
+   .gt = 3,
+
+   .num_slices = 2,
+};
+
 #define GEN10_HW_INFO   \
.gen = 10,   \
.num_thread_per_eu = 7,  \
diff --git a/src/intel/common/gen_device_info.h 
b/src/intel/common/gen_device_info.h
index cc83857..a83251c 100644
--- a/src/intel/common/gen_device_info.h
+++ b/src/intel/common/gen_device_info.h
@@ -46,6 +46,7 @@ struct gen_device_info
bool is_broxton;
bool is_kabylake;
bool is_geminilake;
+   bool is_coffeelake;
bool is_cannonlake;
 
bool has_hiz_and_separate_stencil;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH libdrm 3/3] intel: PCI Ids for U SKU in CFL

2017-06-21 Thread Anusha Srivatsa
Add the PCI IDs for U SKU IN CFL by following the spec.

v2: Update IDs

Cc: Rodrigo Vivi 
Signed-off-by: Anusha Srivatsa 
---
 intel/intel_chipset.h | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h
index fed5a0d..891b50f 100644
--- a/intel/intel_chipset.h
+++ b/intel/intel_chipset.h
@@ -228,6 +228,10 @@
 #define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96
 #define PCI_CHIP_COFFEELAKE_H_GT2_1 0x3E9B
 #define PCI_CHIP_COFFEELAKE_H_GT2_2 0x3E94
+#define PCI_CHIP_COFFEELAKE_U_GT3_1 0x3EA5
+#define PCI_CHIP_COFFEELAKE_U_GT3_2 0x3EA6
+#define PCI_CHIP_COFFEELAKE_U_GT3_3 0x3EA7
+#define PCI_CHIP_COFFEELAKE_U_GT3_4 0x3EA8
 
 #define IS_MOBILE(devid)   ((devid) == PCI_CHIP_I855_GM || \
 (devid) == PCI_CHIP_I915_GM || \
@@ -469,8 +473,14 @@
 #define IS_CFL_H(devid) ((devid) == PCI_CHIP_COFFEELAKE_H_GT2_1 || \
  (devid) == PCI_CHIP_COFFEELAKE_H_GT2_2)
 
+#define IS_CFL_U(devid) ((devid) == PCI_CHIP_COFFEELAKE_U_GT3_1 || \
+ (devid) == PCI_CHIP_COFFEELAKE_U_GT3_2 || \
+ (devid) == PCI_CHIP_COFFEELAKE_U_GT3_3 || \
+ (devid) == PCI_CHIP_COFFEELAKE_U_GT3_4)
+
 #define IS_COFFEELAKE(devid)   (IS_CFL_S(devid) || \
-   IS_CFL_H(devid))
+   IS_CFL_H(devid) || \
+   IS_CFL_U(devid))
 
 #define IS_GEN9(devid) (IS_SKYLAKE(devid)  || \
 IS_BROXTON(devid)  || \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH libdrm 2/3] intel: PCI Ids for H SKU in CFL

2017-06-21 Thread Anusha Srivatsa
Add the PCI IDs for H SKU IN CFL by following the spec.

v2: Update IDs

Cc: Rodrigo Vivi 
Signed-off-by: Anusha Srivatsa 
---
 intel/intel_chipset.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h
index aeb72ba..fed5a0d 100644
--- a/intel/intel_chipset.h
+++ b/intel/intel_chipset.h
@@ -226,6 +226,8 @@
 #define PCI_CHIP_COFFEELAKE_S_GT2_1 0x3E91
 #define PCI_CHIP_COFFEELAKE_S_GT2_2 0x3E92
 #define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96
+#define PCI_CHIP_COFFEELAKE_H_GT2_1 0x3E9B
+#define PCI_CHIP_COFFEELAKE_H_GT2_2 0x3E94
 
 #define IS_MOBILE(devid)   ((devid) == PCI_CHIP_I855_GM || \
 (devid) == PCI_CHIP_I915_GM || \
@@ -464,7 +466,11 @@
  (devid) == PCI_CHIP_COFFEELAKE_S_GT2_2 || \
  (devid) == PCI_CHIP_COFFEELAKE_S_GT2_3)
 
-#define IS_COFFEELAKE(devid)   (IS_CFL_S(devid))
+#define IS_CFL_H(devid) ((devid) == PCI_CHIP_COFFEELAKE_H_GT2_1 || \
+ (devid) == PCI_CHIP_COFFEELAKE_H_GT2_2)
+
+#define IS_COFFEELAKE(devid)   (IS_CFL_S(devid) || \
+   IS_CFL_H(devid))
 
 #define IS_GEN9(devid) (IS_SKYLAKE(devid)  || \
 IS_BROXTON(devid)  || \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH libdrm 1/3] intel: PCI Ids for S SKU in CFL

2017-06-21 Thread Anusha Srivatsa
Add the PCI IDs for S SKU IN CFL by following the spec.

v2: Update IDs.

Cc: Rodrigo Vivi 
Signed-off-by: Anusha Srivatsa 
---
 intel/intel_chipset.h | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h
index 41fc0da..aeb72ba 100644
--- a/intel/intel_chipset.h
+++ b/intel/intel_chipset.h
@@ -221,6 +221,12 @@
 #define PCI_CHIP_GLK   0x3184
 #define PCI_CHIP_GLK_2X6   0x3185
 
+#define PCI_CHIP_COFFEELAKE_S_GT1_1 0x3E90
+#define PCI_CHIP_COFFEELAKE_S_GT1_2 0x3E93
+#define PCI_CHIP_COFFEELAKE_S_GT2_1 0x3E91
+#define PCI_CHIP_COFFEELAKE_S_GT2_2 0x3E92
+#define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96
+
 #define IS_MOBILE(devid)   ((devid) == PCI_CHIP_I855_GM || \
 (devid) == PCI_CHIP_I915_GM || \
 (devid) == PCI_CHIP_I945_GM || \
@@ -452,10 +458,19 @@
 #define IS_GEMINILAKE(devid)   ((devid) == PCI_CHIP_GLK || \
 (devid) == PCI_CHIP_GLK_2X6)
 
+#define IS_CFL_S(devid) ((devid) == PCI_CHIP_COFFEELAKE_S_GT1_1 || \
+ (devid) == PCI_CHIP_COFFEELAKE_S_GT1_2 || \
+ (devid) == PCI_CHIP_COFFEELAKE_S_GT2_1 || \
+ (devid) == PCI_CHIP_COFFEELAKE_S_GT2_2 || \
+ (devid) == PCI_CHIP_COFFEELAKE_S_GT2_3)
+
+#define IS_COFFEELAKE(devid)   (IS_CFL_S(devid))
+
 #define IS_GEN9(devid) (IS_SKYLAKE(devid)  || \
 IS_BROXTON(devid)  || \
 IS_KABYLAKE(devid) || \
-IS_GEMINILAKE(devid))
+IS_GEMINILAKE(devid) || \
+IS_COFFEELAKE(devid))
 
 #define IS_9XX(dev)(IS_GEN3(dev) || \
 IS_GEN4(dev) || \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/miptree: Move isl_surf_get_(hiz|mcs)_surf out of the assert

2017-06-21 Thread Jason Ekstrand
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101538
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101539
Cc: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index abc7f98..3b7262f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1672,8 +1672,9 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 * calculate equivalent MCS surface against it.
 */
intel_miptree_get_isl_surf(brw, mt, _main_surf);
-   assert(isl_surf_get_mcs_surf(>isl_dev, _main_surf,
-_mcs_surf));
+   MAYBE_UNUSED bool ok =
+  isl_surf_get_mcs_surf(>isl_dev, _main_surf, _mcs_surf);
+   assert(ok);
 
/* Buffer needs to be initialised requiring the buffer to be immediately
 * mapped to cpu space for writing. Therefore do not use the gpu access
@@ -1832,8 +1833,9 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
struct isl_surf temp_hiz_surf;
 
intel_miptree_get_isl_surf(brw, mt, _main_surf);
-   assert(isl_surf_get_hiz_surf(>isl_dev, _main_surf,
-_hiz_surf));
+   MAYBE_UNUSED bool ok =
+  isl_surf_get_hiz_surf(>isl_dev, _main_surf, _hiz_surf);
+   assert(ok);
 
const uint32_t alloc_flags = BO_ALLOC_FOR_RENDER;
mt->hiz_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 05/12] i965: Convert CC state on gen4-5 to genxml.

2017-06-21 Thread Rafael Antognolli
Use set_blend_entry_bits and set_depth_stencil_bits to fill most of the
color calc struct, and then manually update the rest.

v2:
   - Always check for depth_irb (Ken)
   - Always set Backface Stencil Ref (Ken)
   - Always set alpha reference value (Ken)

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_cc.c| 174 --
 src/mesa/drivers/dri/i965/brw_state.h |   1 -
 src/mesa/drivers/dri/i965/brw_structs.h   |  92 --
 src/mesa/drivers/dri/i965/brw_util.h  |   1 -
 src/mesa/drivers/dri/i965/genX_state_upload.c |  84 ++---
 5 files changed, 68 insertions(+), 284 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cc.c 
b/src/mesa/drivers/dri/i965/brw_cc.c
index cdaa696..503ec83 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -39,180 +39,6 @@
 #include "main/stencil.h"
 #include "intel_batchbuffer.h"
 
-/**
- * Modify blend function to force destination alpha to 1.0
- *
- * If \c function specifies a blend function that uses destination alpha,
- * replace it with a function that hard-wires destination alpha to 1.0.  This
- * is used when rendering to xRGB targets.
- */
-GLenum
-brw_fix_xRGB_alpha(GLenum function)
-{
-   switch (function) {
-   case GL_DST_ALPHA:
-  return GL_ONE;
-
-   case GL_ONE_MINUS_DST_ALPHA:
-   case GL_SRC_ALPHA_SATURATE:
-  return GL_ZERO;
-   }
-
-   return function;
-}
-
-/**
- * Creates a CC unit packet from the current blend state.
- */
-static void upload_cc_unit(struct brw_context *brw)
-{
-   struct gl_context *ctx = >ctx;
-   struct brw_cc_unit_state *cc;
-
-   cc = brw_state_batch(brw, sizeof(*cc), 64, >cc.state_offset);
-   memset(cc, 0, sizeof(*cc));
-
-   /* _NEW_STENCIL | _NEW_BUFFERS */
-   if (ctx->Stencil._Enabled) {
-  const unsigned back = ctx->Stencil._BackFace;
-
-  cc->cc0.stencil_enable = 1;
-  cc->cc0.stencil_func =
-intel_translate_compare_func(ctx->Stencil.Function[0]);
-  cc->cc0.stencil_fail_op =
-intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
-  cc->cc0.stencil_pass_depth_fail_op =
-intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
-  cc->cc0.stencil_pass_depth_pass_op =
-intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
-  cc->cc1.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
-  cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
-  cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
-
-  if (ctx->Stencil._TestTwoSide) {
-cc->cc0.bf_stencil_enable = 1;
-cc->cc0.bf_stencil_func =
-   intel_translate_compare_func(ctx->Stencil.Function[back]);
-cc->cc0.bf_stencil_fail_op =
-   intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
-cc->cc0.bf_stencil_pass_depth_fail_op =
-   intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
-cc->cc0.bf_stencil_pass_depth_pass_op =
-   intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
-cc->cc1.bf_stencil_ref = _mesa_get_stencil_ref(ctx, back);
-cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
-cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
-  }
-
-  /* Not really sure about this:
-   */
-  if (ctx->Stencil.WriteMask[0] ||
- (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
-cc->cc0.stencil_write_enable = 1;
-   }
-
-   /* _NEW_COLOR */
-   if (ctx->Color.ColorLogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
-  cc->cc2.logicop_enable = 1;
-  cc->cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
-   } else if (ctx->Color.BlendEnabled && !ctx->Color._AdvancedBlendMode) {
-  GLenum eqRGB = ctx->Color.Blend[0].EquationRGB;
-  GLenum eqA = ctx->Color.Blend[0].EquationA;
-  GLenum srcRGB = ctx->Color.Blend[0].SrcRGB;
-  GLenum dstRGB = ctx->Color.Blend[0].DstRGB;
-  GLenum srcA = ctx->Color.Blend[0].SrcA;
-  GLenum dstA = ctx->Color.Blend[0].DstA;
-
-  if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
-srcRGB = dstRGB = GL_ONE;
-  }
-
-  if (eqA == GL_MIN || eqA == GL_MAX) {
-srcA = dstA = GL_ONE;
-  }
-
-  /* If the renderbuffer is XRGB, we have to frob the blend function to
-   * force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
-   * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
-   */
-  const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-  if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
-   GL_TEXTURE_ALPHA_TYPE)) {
-srcRGB = brw_fix_xRGB_alpha(srcRGB);
-srcA   = brw_fix_xRGB_alpha(srcA);
-dstRGB = brw_fix_xRGB_alpha(dstRGB);
-dstA   = brw_fix_xRGB_alpha(dstA);
-  }
-
-  cc->cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
-  

[Mesa-dev] [PATCH v2 10/12] i965: Convert CLIP_STATE to genxml.

2017-06-21 Thread Rafael Antognolli
The code doesn't get exactly a lot simpler but at least it is in a
single place, and we delete more than we add.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/Makefile.sources|   1 -
 src/mesa/drivers/dri/i965/brw_clip_state.c| 147 ---
 src/mesa/drivers/dri/i965/brw_structs.h   |  65 --
 src/mesa/drivers/dri/i965/genX_state_upload.c | 164 +++---
 4 files changed, 119 insertions(+), 258 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_clip_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index a06a8c1..89be92e 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -6,7 +6,6 @@ i965_FILES = \
brw_bufmgr.h \
brw_clear.c \
brw_clip.c \
-   brw_clip_state.c \
brw_compute.c \
brw_conditional_render.c \
brw_context.c \
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c 
b/src/mesa/drivers/dri/i965/brw_clip_state.c
deleted file mode 100644
index 8f22c0f..000
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **/
- /*
-  * Authors:
-  *   Keith Whitwell 
-  */
-
-#include "intel_batchbuffer.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "main/framebuffer.h"
-
-static void
-brw_upload_clip_unit(struct brw_context *brw)
-{
-   struct gl_context *ctx = >ctx;
-   struct brw_clip_unit_state *clip;
-
-   clip = brw_state_batch(brw, sizeof(*clip), 32, >clip.state_offset);
-   memset(clip, 0, sizeof(*clip));
-
-   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_CLIP_PROG_DATA */
-   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
-16 - 1);
-   clip->thread0.kernel_start_pointer =
-  brw_program_reloc(brw,
-   brw->clip.state_offset +
-   offsetof(struct brw_clip_unit_state, thread0),
-   brw->clip.prog_offset +
-   (clip->thread0.grf_reg_count << 1)) >> 6;
-
-   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   clip->thread1.single_program_flow = 1;
-
-   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
-   clip->thread3.const_urb_entry_read_length =
-  brw->clip.prog_data->curb_read_length;
-
-   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
-   clip->thread3.dispatch_grf_start_reg = 1;
-   clip->thread3.urb_entry_read_offset = 0;
-
-   /* BRW_NEW_URB_FENCE */
-   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
-   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-   /* If we have enough clip URB entries to run two threads, do so.
-*/
-   if (brw->urb.nr_clip_entries >= 10) {
-  /* Half of the URB entries go to each thread, and it has to be an
-   * even number.
-   */
-  assert(brw->urb.nr_clip_entries % 2 == 0);
-
-  /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
-   * only 2 threads can output VUEs at a time.
-   */
-  if (brw->gen == 5)
- clip->thread4.max_threads = 16 - 1;
-  else
- clip->thread4.max_threads = 2 - 1;
-   } else {
-  assert(brw->urb.nr_clip_entries >= 5);
-  clip->thread4.max_threads = 1 - 1;
-   }
-
-   /* _NEW_TRANSFORM */
-   if (brw->gen == 5 || brw->is_g4x)
-  clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled;
-   else
-  /* Up to 6 actual clip flags, plus the 

[Mesa-dev] [PATCH v2 11/12] i965: Convert WM_STATE to genxml on gen4-5.

2017-06-21 Thread Rafael Antognolli
The code doesn't get exactly a lot simpler but at least it is in a single
place, and we delete more than we add.

Another good point is that you get rid of struct brw_wm_unit_state
which was a third mechanism for encoding GEN state. We used to have
GENXML, manual packing and these bitfield structs. Now we're down to
just GENXML and some manual packing. (Khristian)

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/Makefile.sources|   1 -
 src/mesa/drivers/dri/i965/brw_state.h |   1 -
 src/mesa/drivers/dri/i965/brw_structs.h   | 121 
 src/mesa/drivers/dri/i965/brw_wm.h|   2 -
 src/mesa/drivers/dri/i965/brw_wm_state.c  | 274 --
 src/mesa/drivers/dri/i965/genX_state_upload.c | 191 ++
 6 files changed, 153 insertions(+), 437 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 89be92e..c15b3ef 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -61,7 +61,6 @@ i965_FILES = \
brw_vs_surface_state.c \
brw_wm.c \
brw_wm.h \
-   brw_wm_state.c \
brw_wm_surface_state.c \
gen4_blorp_exec.h \
gen6_clip_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 8f3bd7f..9588a51 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -89,7 +89,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces;
 extern const struct brw_tracked_state brw_cs_ubo_surfaces;
 extern const struct brw_tracked_state brw_cs_abo_surfaces;
 extern const struct brw_tracked_state brw_cs_image_surfaces;
-extern const struct brw_tracked_state brw_wm_unit;
 
 extern const struct brw_tracked_state brw_psp_urb_cbs;
 
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h 
b/src/mesa/drivers/dri/i965/brw_structs.h
index 5a0d91d..fb592be 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -65,127 +65,6 @@ struct brw_urb_fence
} bits1;
 };
 
-/* State structs for the various fixed function units:
- */
-
-
-struct thread0
-{
-   unsigned pad0:1;
-   unsigned grf_reg_count:3;
-   unsigned pad1:2;
-   unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
-};
-
-struct thread1
-{
-   unsigned ext_halt_exception_enable:1;
-   unsigned sw_exception_enable:1;
-   unsigned mask_stack_exception_enable:1;
-   unsigned timeout_exception_enable:1;
-   unsigned illegal_op_exception_enable:1;
-   unsigned pad0:3;
-   unsigned depth_coef_urb_read_offset:6;  /* WM only */
-   unsigned pad1:2;
-   unsigned floating_point_mode:1;
-   unsigned thread_priority:1;
-   unsigned binding_table_entry_count:8;
-   unsigned pad3:5;
-   unsigned single_program_flow:1;
-};
-
-struct thread2
-{
-   unsigned per_thread_scratch_space:4;
-   unsigned pad0:6;
-   unsigned scratch_space_base_pointer:22;
-};
-
-
-struct thread3
-{
-   unsigned dispatch_grf_start_reg:4;
-   unsigned urb_entry_read_offset:6;
-   unsigned pad0:1;
-   unsigned urb_entry_read_length:6;
-   unsigned pad1:1;
-   unsigned const_urb_entry_read_offset:6;
-   unsigned pad2:1;
-   unsigned const_urb_entry_read_length:6;
-   unsigned pad3:1;
-};
-
-struct brw_wm_unit_state
-{
-   struct thread0 thread0;
-   struct thread1 thread1;
-   struct thread2 thread2;
-   struct thread3 thread3;
-
-   struct {
-  unsigned stats_enable:1;
-  unsigned depth_buffer_clear:1;
-  unsigned sampler_count:3;
-  unsigned sampler_state_pointer:27;
-   } wm4;
-
-   struct
-   {
-  unsigned enable_8_pix:1;
-  unsigned enable_16_pix:1;
-  unsigned enable_32_pix:1;
-  unsigned enable_con_32_pix:1;
-  unsigned enable_con_64_pix:1;
-  unsigned pad0:1;
-
-  /* These next four bits are for Ironlake+ */
-  unsigned fast_span_coverage_enable:1;
-  unsigned depth_buffer_clear:1;
-  unsigned depth_buffer_resolve_enable:1;
-  unsigned hierarchical_depth_buffer_resolve_enable:1;
-
-  unsigned legacy_global_depth_bias:1;
-  unsigned line_stipple:1;
-  unsigned depth_offset:1;
-  unsigned polygon_stipple:1;
-  unsigned line_aa_region_width:2;
-  unsigned line_endcap_aa_region_width:2;
-  unsigned early_depth_test:1;
-  unsigned thread_dispatch_enable:1;
-  unsigned program_uses_depth:1;
-  unsigned program_computes_depth:1;
-  unsigned program_uses_killpixel:1;
-  unsigned legacy_line_rast: 1;
-  unsigned transposed_urb_read_enable:1;
-  unsigned max_threads:7;
-   } wm5;
-
-   float global_depth_offset_constant;
-   float global_depth_offset_scale;
-
-   /* for Ironlake only */
-   struct {
-  unsigned pad0:1;
-  unsigned grf_reg_count_1:3;
-  unsigned pad1:2;
-  unsigned 

[Mesa-dev] [PATCH v2 07/12] i965: Remove upload_gs_state_for_tf.

2017-06-21 Thread Rafael Antognolli
This function only emits a particular case of 3DSTATE_GS. Instead, we can do
that inside genX(upload_gs_state), and later reuse part of that code for
emitting gen4-5 state.

There's the additional benefit of allowing us to remove gen6_gs_state.c, which
was only left because of this function.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/Makefile.sources|  1 -
 src/mesa/drivers/dri/i965/brw_state.h |  2 -
 src/mesa/drivers/dri/i965/gen6_gs_state.c | 56 ---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 17 +++-
 4 files changed, 16 insertions(+), 60 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/gen6_gs_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index b2edba9..8af9a7c 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -69,7 +69,6 @@ i965_FILES = \
gen6_clip_state.c \
gen6_constant_state.c \
gen6_depth_state.c \
-   gen6_gs_state.c \
gen6_multisample_state.c \
gen6_queryobj.c \
gen6_sampler_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index ead0078..af70464 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -361,8 +361,6 @@ void gen8_init_atoms(struct brw_context *brw);
 void gen9_init_atoms(struct brw_context *brw);
 void gen10_init_atoms(struct brw_context *brw);
 
-void upload_gs_state_for_tf(struct brw_context *brw);
-
 /* Memory Object Control State:
  * Specifying zero for L3 means "uncached in L3", at least on Haswell
  * and Baytrail, since there are no PTE flags for setting L3 cacheability.
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c 
b/src/mesa/drivers/dri/i965/gen6_gs_state.c
deleted file mode 100644
index 6450c76..000
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *Eric Anholt 
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "intel_batchbuffer.h"
-#include "main/shaderapi.h"
-
-void
-upload_gs_state_for_tf(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = >screen->devinfo;
-
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
-   OUT_BATCH(brw->ff_gs.prog_offset);
-   OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
-   OUT_BATCH(0); /* no scratch space */
-   OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
- (brw->ff_gs.prog_data->urb_read_length << 
GEN6_GS_URB_READ_LENGTH_SHIFT));
-   OUT_BATCH(((devinfo->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_SO_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE);
-   OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
- GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
- (brw->ff_gs.prog_data->svbi_postincrement_value <<
-  GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
- GEN6_GS_ENABLE);
-   ADVANCE_BATCH();
-}
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 6899801..06733bc 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2472,7 +2472,22 @@ genX(upload_gs_state)(struct brw_context *brw)
   /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
* program. This function provides the needed 3DSTATE_GS for this.
*/
-  upload_gs_state_for_tf(brw);
+  brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+ 

[Mesa-dev] [PATCH v2 01/12] i965: Make a helper function for depth/stencil related state.

2017-06-21 Thread Rafael Antognolli
From: Kenneth Graunke 

Gen4-5 basically glue DEPTH_STENCIL_STATE, COLOR_CALC_STATE, and
BLEND_STATE together into a single COLOR_CALC_STATE structure.

By making a helper function, we'll be able to reuse it when filling
out Gen4-5 COLOR_CALC_STATE without replicating any actual logic.

We use generation-defined typedef to handle the polymorphism.

Reviewed-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 113 +++---
 1 file changed, 65 insertions(+), 48 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 064880b..43f5b36 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1153,9 +1153,16 @@ genX(calculate_attr_overrides)(const struct brw_context 
*brw,
 
 /* -- */
 
-#if GEN_GEN >= 6
-static void
-genX(upload_depth_stencil_state)(struct brw_context *brw)
+#if GEN_GEN >= 8
+typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
+#elif GEN_GEN >= 6
+typedef struct GENX(DEPTH_STENCIL_STATE)  DEPTH_STENCIL_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;
+#endif
+
+static inline void
+set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds)
 {
struct gl_context *ctx = >ctx;
 
@@ -1170,66 +1177,76 @@ genX(upload_depth_stencil_state)(struct brw_context 
*brw)
struct gl_stencil_attrib *stencil = >Stencil;
const int b = stencil->_BackFace;
 
-#if GEN_GEN >= 8
-   brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
-#else
-   uint32_t ds_offset;
-   brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, _offset, wmds) {
-#endif
-  if (depth->Test && depth_irb) {
- wmds.DepthTestEnable = true;
- wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
- wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
-  }
+   if (depth->Test && depth_irb) {
+  ds->DepthTestEnable = true;
+  ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
+  ds->DepthTestFunction = intel_translate_compare_func(depth->Func);
+   }
 
-  if (stencil->_Enabled) {
- wmds.StencilTestEnable = true;
- wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
- wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
-
- wmds.StencilTestFunction =
-intel_translate_compare_func(stencil->Function[0]);
- wmds.StencilFailOp =
-intel_translate_stencil_op(stencil->FailFunc[0]);
- wmds.StencilPassDepthPassOp =
-intel_translate_stencil_op(stencil->ZPassFunc[0]);
- wmds.StencilPassDepthFailOp =
-intel_translate_stencil_op(stencil->ZFailFunc[0]);
-
- wmds.StencilBufferWriteEnable = stencil->_WriteEnabled;
-
- if (stencil->_TestTwoSide) {
-wmds.DoubleSidedStencilEnable = true;
-wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
-wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
-
-wmds.BackfaceStencilTestFunction =
-   intel_translate_compare_func(stencil->Function[b]);
-wmds.BackfaceStencilFailOp =
-   intel_translate_stencil_op(stencil->FailFunc[b]);
-wmds.BackfaceStencilPassDepthPassOp =
-   intel_translate_stencil_op(stencil->ZPassFunc[b]);
-wmds.BackfaceStencilPassDepthFailOp =
-   intel_translate_stencil_op(stencil->ZFailFunc[b]);
- }
+   if (stencil->_Enabled) {
+  ds->StencilTestEnable = true;
+  ds->StencilWriteMask = stencil->WriteMask[0] & 0xff;
+  ds->StencilTestMask = stencil->ValueMask[0] & 0xff;
+
+  ds->StencilTestFunction =
+ intel_translate_compare_func(stencil->Function[0]);
+  ds->StencilFailOp =
+ intel_translate_stencil_op(stencil->FailFunc[0]);
+  ds->StencilPassDepthPassOp =
+ intel_translate_stencil_op(stencil->ZPassFunc[0]);
+  ds->StencilPassDepthFailOp =
+ intel_translate_stencil_op(stencil->ZFailFunc[0]);
+
+  ds->StencilBufferWriteEnable = stencil->_WriteEnabled;
+
+  if (stencil->_TestTwoSide) {
+ ds->DoubleSidedStencilEnable = true;
+ ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
+ ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
+
+ ds->BackfaceStencilTestFunction =
+intel_translate_compare_func(stencil->Function[b]);
+ ds->BackfaceStencilFailOp =
+intel_translate_stencil_op(stencil->FailFunc[b]);
+ ds->BackfaceStencilPassDepthPassOp =
+intel_translate_stencil_op(stencil->ZPassFunc[b]);
+ ds->BackfaceStencilPassDepthFailOp =
+intel_translate_stencil_op(stencil->ZFailFunc[b]);
+  }
 
 #if GEN_GEN >= 9
-   

[Mesa-dev] [PATCH v2 02/12] i965: Make a helper function for blend entry related state.

2017-06-21 Thread Rafael Antognolli
Add a helper function to reuse code that fills blend entry related
state, and make genX(upload_blend_state) use it. This function can later
be used by gen4-5 color calc state to set the blend related bits.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 182 ++
 1 file changed, 101 insertions(+), 81 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 43f5b36..e94a167 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2528,6 +2528,104 @@ fix_dual_blend_alpha_to_one(GLenum function)
 #define blend_eqn(x) brw_translate_blend_equation(x)
 
 #if GEN_GEN >= 6
+typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
+#endif
+
+UNUSED static bool
+set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
+ bool alpha_to_one)
+{
+   struct gl_context *ctx = >ctx;
+
+   /* _NEW_BUFFERS */
+   const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+   bool independent_alpha_blend = false;
+
+   /* Used for implementing the following bit of GL_EXT_texture_integer:
+* "Per-fragment operations that require floating-point color
+*  components, including multisample alpha operations, alpha test,
+*  blending, and dithering, have no effect when the corresponding
+*  colors are written to an integer color buffer."
+*/
+   const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
+
+   /* _NEW_COLOR */
+   if (ctx->Color.ColorLogicOpEnabled) {
+  GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
+ : GL_UNSIGNED_NORMALIZED;
+  WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
+rb_type != GL_UNSIGNED_NORMALIZED &&
+rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
+"renderbuffer\n",
+_mesa_enum_to_string(ctx->Color.LogicOp),
+_mesa_enum_to_string(rb_type));
+  if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
+ entry->LogicOpEnable = true;
+ entry->LogicOpFunction =
+intel_translate_logic_op(ctx->Color.LogicOp);
+  }
+   } else if (ctx->Color.BlendEnabled & (1 << i) && !integer &&
+  !ctx->Color._AdvancedBlendMode) {
+  GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
+  GLenum eqA = ctx->Color.Blend[i].EquationA;
+  GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
+  GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
+  GLenum srcA = ctx->Color.Blend[i].SrcA;
+  GLenum dstA = ctx->Color.Blend[i].DstA;
+
+  if (eqRGB == GL_MIN || eqRGB == GL_MAX)
+ srcRGB = dstRGB = GL_ONE;
+
+  if (eqA == GL_MIN || eqA == GL_MAX)
+ srcA = dstA = GL_ONE;
+
+  /* Due to hardware limitations, the destination may have information
+   * in an alpha channel even when the format specifies no alpha
+   * channel. In order to avoid getting any incorrect blending due to
+   * that alpha channel, coerce the blend factors to values that will
+   * not read the alpha channel, but will instead use the correct
+   * implicit value for alpha.
+   */
+  if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
+   GL_TEXTURE_ALPHA_TYPE)) {
+ srcRGB = brw_fix_xRGB_alpha(srcRGB);
+ srcA = brw_fix_xRGB_alpha(srcA);
+ dstRGB = brw_fix_xRGB_alpha(dstRGB);
+ dstA = brw_fix_xRGB_alpha(dstA);
+  }
+
+  /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+   * "If Dual Source Blending is enabled, this bit must be disabled."
+   *
+   * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
+   * and leave it enabled anyway.
+   */
+  if (GEN_GEN >= 6 && ctx->Color.Blend[i]._UsesDualSrc && alpha_to_one) {
+ srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+ srcA = fix_dual_blend_alpha_to_one(srcA);
+ dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+ dstA = fix_dual_blend_alpha_to_one(dstA);
+  }
+
+  entry->ColorBufferBlendEnable = true;
+  entry->DestinationBlendFactor = blend_factor(dstRGB);
+  entry->SourceBlendFactor = blend_factor(srcRGB);
+  entry->DestinationAlphaBlendFactor = blend_factor(dstA);
+  entry->SourceAlphaBlendFactor = blend_factor(srcA);
+  entry->ColorBlendFunction = blend_eqn(eqRGB);
+  entry->AlphaBlendFunction = blend_eqn(eqA);
+
+  if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+ independent_alpha_blend = true;
+   }
+
+   return independent_alpha_blend;
+}
+
+#if GEN_GEN >= 6
 static void
 genX(upload_blend_state)(struct brw_context *brw)
 {
@@ -2594,87 +2692,9 @@ 

[Mesa-dev] [PATCH v2 12/12] i965: Remove a lot of constants from brw_defines.h.

2017-06-21 Thread Rafael Antognolli
These were originally used to submit state changes using manual packing
of instructions, but we are now using genxml for that. So it should be
safe to just remove them.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_defines.h | 729 +---
 1 file changed, 1 insertion(+), 728 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 312ddda..ce5381d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -126,11 +126,6 @@
 #define BRW_COVERAGE_PIXELS_22
 #define BRW_COVERAGE_PIXELS_43
 
-#define BRW_CULLMODE_BOTH0
-#define BRW_CULLMODE_NONE1
-#define BRW_CULLMODE_FRONT   2
-#define BRW_CULLMODE_BACK3
-
 #define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM  0
 #define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
 
@@ -140,18 +135,6 @@
 #define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT3 /* GEN5 */
 #define BRW_DEPTHFORMAT_D16_UNORM5
 
-#define BRW_FLOATING_POINT_IEEE_7540
-#define BRW_FLOATING_POINT_NON_IEEE_7541
-
-#define BRW_FRONTWINDING_CW  0
-#define BRW_FRONTWINDING_CCW 1
-
-#define BRW_CUT_INDEX_ENABLE (1 << 10)
-
-#define BRW_INDEX_BYTE 0
-#define BRW_INDEX_WORD 1
-#define BRW_INDEX_DWORD2
-
 #define BRW_LOGICOPFUNCTION_CLEAR0
 #define BRW_LOGICOPFUNCTION_NOR  1
 #define BRW_LOGICOPFUNCTION_AND_INVERTED 2
@@ -539,22 +522,6 @@ enum brw_wrap_mode {
 # define GEN6_URB_GS_ENTRIES_SHIFT 8
 # define GEN6_URB_GS_SIZE_SHIFT0
 
-#define _3DSTATE_VF 0x780c /* GEN7.5+ */
-#define HSW_CUT_INDEX_ENABLE(1 << 8)
-
-#define _3DSTATE_VF_INSTANCING  0x7849 /* GEN8+ */
-# define GEN8_VF_INSTANCING_ENABLE  (1 << 8)
-
-#define _3DSTATE_VF_SGVS0x784a /* GEN8+ */
-# define GEN8_SGVS_ENABLE_INSTANCE_ID   (1 << 31)
-# define GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT  29
-# define GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16
-# define GEN8_SGVS_ENABLE_VERTEX_ID (1 << 15)
-# define GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT13
-# define GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT   0
-
-#define _3DSTATE_VF_TOPOLOGY0x784b /* GEN8+ */
-
 #define _3DSTATE_WM_CHROMAKEY  0x784c /* GEN8+ */
 
 #define _3DSTATE_URB_VS 0x7830 /* GEN7+ */
@@ -582,376 +549,10 @@ enum brw_wrap_mode {
 
 #define _3DSTATE_SCISSOR_STATE_POINTERS0x780f /* GEN6+ */
 
-#define _3DSTATE_VS0x7810 /* GEN6+ */
-/* DW2 */
-# define GEN6_VS_SPF_MODE  (1 << 31)
-# define GEN6_VS_VECTOR_MASK_ENABLE(1 << 30)
-# define GEN6_VS_SAMPLER_COUNT_SHIFT   27
-# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
-# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754  (0 << 16)
-# define GEN6_VS_FLOATING_POINT_MODE_ALT   (1 << 16)
-# define HSW_VS_UAV_ACCESS_ENABLE   (1 << 12)
-/* DW4 */
-# define GEN6_VS_DISPATCH_START_GRF_SHIFT  20
-# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
-# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT   4
-/* DW5 */
-# define GEN6_VS_MAX_THREADS_SHIFT 25
-# define HSW_VS_MAX_THREADS_SHIFT  23
-# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
-# define GEN6_VS_CACHE_DISABLE (1 << 1)
-# define GEN6_VS_ENABLE(1 << 0)
-/* Gen8+ DW7 */
-# define GEN8_VS_SIMD8_ENABLE   (1 << 2)
-/* Gen8+ DW8 */
-# define GEN8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT  21
-# define GEN8_VS_URB_OUTPUT_LENGTH_SHIFT16
-# define GEN8_VS_USER_CLIP_DISTANCE_SHIFT   8
-
-#define _3DSTATE_GS0x7811 /* GEN6+ */
-/* DW2 */
-# define GEN6_GS_SPF_MODE  (1 << 31)
-# define GEN6_GS_VECTOR_MASK_ENABLE(1 << 30)
-# define GEN6_GS_SAMPLER_COUNT_SHIFT   27
-# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
-# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754  (0 << 16)
-# define GEN6_GS_FLOATING_POINT_MODE_ALT   (1 << 16)
-# define HSW_GS_UAV_ACCESS_ENABLE  (1 << 12)
-/* DW4 */
-# define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT  23
-# define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
-# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
-# define GEN7_GS_INCLUDE_VERTEX_HANDLES(1 << 10)
-# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT   4
-# define GEN6_GS_DISPATCH_START_GRF_SHIFT  0
-/* DW5 */
-# define 

[Mesa-dev] [PATCH v2 03/12] i965: Check for alpha channel just like in gen6+.

2017-06-21 Thread Rafael Antognolli
gen6+ uses _mesa_base_format_has_channel() to check for the alpha
channel, while gen4-5 use ctx->DrawBuffer->Visual.alphaBits. By using
_mesa_base_format_has_channel() here we keep the same behavior accross
all gen.

While initially both ways of checking the alpha channel seemed correct
to me, this change also seems to fix fbo-blending-formats piglit test on
gen4.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_cc.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cc.c 
b/src/mesa/drivers/dri/i965/brw_cc.c
index 78d3bc8..339bff5 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -34,6 +34,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_util.h"
+#include "main/glformats.h"
 #include "main/macros.h"
 #include "main/stencil.h"
 #include "intel_batchbuffer.h"
@@ -126,7 +127,9 @@ static void upload_cc_unit(struct brw_context *brw)
* force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
* with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
*/
-  if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+  const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+  if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
+   GL_TEXTURE_ALPHA_TYPE)) {
 srcRGB = brw_fix_xRGB_alpha(srcRGB);
 srcA   = brw_fix_xRGB_alpha(srcA);
 dstRGB = brw_fix_xRGB_alpha(dstRGB);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 08/12] i965: Prepare gs_state emitting code to include gen4-5.

2017-06-21 Thread Rafael Antognolli
Since we always call brw_batch_emit anyways, we can hopefully make things
simpler by calling it only once, and then branching inside its body. This
can be helpful when bringing the gen4-5 code into this function.

Additionally, check for GEN_GEN == 6 instead of < 7 in cases that won't apply
to lower gens.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 06733bc..12df5c6 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2354,7 +2354,7 @@ genX(upload_gs_state)(struct brw_context *brw)
   brw_gs_prog_data(stage_prog_data);
 #endif
 
-#if GEN_GEN < 7
+#if GEN_GEN == 6
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
   if (active && stage_state->push_const_size != 0) {
  cgs.Buffer0Valid = true;
@@ -2381,8 +2381,8 @@ genX(upload_gs_state)(struct brw_context *brw)
   gen7_emit_cs_stall_flush(brw);
 #endif
 
-   if (active) {
-  brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+   brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+  if (active) {
  INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
 
 #if GEN_GEN >= 7
@@ -2466,13 +2466,12 @@ genX(upload_gs_state)(struct brw_context *brw)
  gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
  gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
 #endif
-  }
 #if GEN_GEN < 7
-   } else if (brw->ff_gs.prog_active)  {
-  /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
-   * program. This function provides the needed 3DSTATE_GS for this.
-   */
-  brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+  } else if (brw->ff_gs.prog_active) {
+ /* In gen6, transform feedback for the VS stage is done with an
+  * ad-hoc GS program. This function provides the needed 3DSTATE_GS
+  * for this.
+  */
  gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset);
  gs.SingleProgramFlow = true;
  gs.VectorMaskEnable = true;
@@ -2487,10 +2486,8 @@ genX(upload_gs_state)(struct brw_context *brw)
  gs.SVBIPostIncrementValue =
 brw->ff_gs.prog_data->svbi_postincrement_value;
  gs.Enable = true;
-  }
 #endif
-   } else {
-  brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+  } else {
  gs.StatisticsEnable = true;
 #if GEN_GEN < 7
  gs.RenderingEnabled = true;
@@ -2504,7 +2501,8 @@ genX(upload_gs_state)(struct brw_context *brw)
 #endif
   }
}
-#if GEN_GEN < 7
+
+#if GEN_GEN == 6
brw->gs.enabled = active;
 #endif
 }
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/12] i965: Convert GS_STATE to genxml.

2017-06-21 Thread Rafael Antognolli
Merge the code with gen6+ 3DSTATE_GS, and delete brw_gs_state.c,
together with brw_gs_unit_state.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/Makefile.sources|   1 -
 src/mesa/drivers/dri/i965/brw_gs_state.c  | 101 --
 src/mesa/drivers/dri/i965/brw_state.h |   1 -
 src/mesa/drivers/dri/i965/brw_structs.h   |  44 ---
 src/mesa/drivers/dri/i965/genX_state_upload.c |  80 +---
 5 files changed, 55 insertions(+), 172 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_gs_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 8af9a7c..a06a8c1 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -24,7 +24,6 @@ i965_FILES = \
brw_formatquery.c \
brw_gs.c \
brw_gs.h \
-   brw_gs_state.c \
brw_gs_surface_state.c \
brw_link.cpp \
brw_meta_util.c \
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c 
b/src/mesa/drivers/dri/i965/brw_gs_state.c
deleted file mode 100644
index bc3d2e5..000
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **/
- /*
-  * Authors:
-  *   Keith Whitwell 
-  */
-
-
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "intel_batchbuffer.h"
-
-static void
-brw_upload_gs_unit(struct brw_context *brw)
-{
-   struct brw_gs_unit_state *gs;
-
-   gs = brw_state_batch(brw, sizeof(*gs), 32, >ff_gs.state_offset);
-
-   memset(gs, 0, sizeof(*gs));
-
-   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_GS_PROG_DATA */
-   if (brw->ff_gs.prog_active) {
-  gs->thread0.grf_reg_count = (ALIGN(brw->ff_gs.prog_data->total_grf, 16) /
-  16 - 1);
-
-  gs->thread0.kernel_start_pointer =
-brw_program_reloc(brw,
-  brw->ff_gs.state_offset +
-  offsetof(struct brw_gs_unit_state, thread0),
-  brw->ff_gs.prog_offset +
-  (gs->thread0.grf_reg_count << 1)) >> 6;
-
-  gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-  gs->thread1.single_program_flow = 1;
-
-  gs->thread3.dispatch_grf_start_reg = 1;
-  gs->thread3.const_urb_entry_read_offset = 0;
-  gs->thread3.const_urb_entry_read_length = 0;
-  gs->thread3.urb_entry_read_offset = 0;
-  gs->thread3.urb_entry_read_length =
- brw->ff_gs.prog_data->urb_read_length;
-
-  /* BRW_NEW_URB_FENCE */
-  gs->thread4.nr_urb_entries = brw->urb.nr_gs_entries;
-  gs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-
-  if (brw->urb.nr_gs_entries >= 8)
-gs->thread4.max_threads = 1;
-  else
-gs->thread4.max_threads = 0;
-   }
-
-   if (brw->gen == 5)
-  gs->thread4.rendering_enable = 1;
-
-   /* BRW_NEW_VIEWPORT_COUNT */
-   gs->gs6.max_vp_index = brw->clip.viewport_count - 1;
-
-   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
-}
-
-const struct brw_tracked_state brw_gs_unit = {
-   .dirty = {
-  .mesa  = 0,
-  .brw   = BRW_NEW_BATCH |
-   BRW_NEW_BLORP |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-   BRW_NEW_FF_GS_PROG_DATA |
-   BRW_NEW_PROGRAM_CACHE |
-   BRW_NEW_URB_FENCE |
-   BRW_NEW_VIEWPORT_COUNT,
-   },
-   .emit = brw_upload_gs_unit,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index af70464..8f3bd7f 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ 

[Mesa-dev] [PATCH v2 06/12] i965: Convert BLEND_CONSTANT_COLOR state to genxml.

2017-06-21 Thread Rafael Antognolli
It's a very simple conversion, and it allows us to delete brw_cc.c.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/Makefile.sources|  1 -
 src/mesa/drivers/dri/i965/brw_cc.c| 62 ---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 28 +++-
 3 files changed, 27 insertions(+), 64 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cc.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 8bac803..b2edba9 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -4,7 +4,6 @@ i965_FILES = \
brw_blorp.h \
brw_bufmgr.c \
brw_bufmgr.h \
-   brw_cc.c \
brw_clear.c \
brw_clip.c \
brw_clip_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c 
b/src/mesa/drivers/dri/i965/brw_cc.c
deleted file mode 100644
index 503ec83..000
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **/
- /*
-  * Authors:
-  *   Keith Whitwell 
-  */
-
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_util.h"
-#include "main/glformats.h"
-#include "main/macros.h"
-#include "main/stencil.h"
-#include "intel_batchbuffer.h"
-
-static void upload_blend_constant_color(struct brw_context *brw)
-{
-   struct gl_context *ctx = >ctx;
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
-   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[0]);
-   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
-   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
-   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state brw_blend_constant_color = {
-   .dirty = {
-  .mesa = _NEW_COLOR,
-  .brw = BRW_NEW_CONTEXT |
- BRW_NEW_BLORP,
-   },
-   .emit = upload_blend_constant_color
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 60cf1e6..6899801 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4306,6 +4306,32 @@ genX(emit_mi_report_perf_count)(struct brw_context *brw,
 
 /* -- */
 
+#if GEN_GEN <= 5
+
+static void genX(upload_blend_constant_color)(struct brw_context *brw)
+{
+   struct gl_context *ctx = >ctx;
+
+   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
+  blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
+  blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
+  blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
+  blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
+   }
+}
+
+static const struct brw_tracked_state genX(blend_constant_color) = {
+   .dirty = {
+  .mesa = _NEW_COLOR,
+  .brw = BRW_NEW_CONTEXT |
+ BRW_NEW_BLORP,
+   },
+   .emit = genX(upload_blend_constant_color)
+};
+#endif
+
+/* -- */
+
 void
 genX(init_atoms)(struct brw_context *brw)
 {
@@ -4349,7 +4375,7 @@ genX(init_atoms)(struct brw_context *brw)
   _invariant_state,
 
   _binding_table_pointers,
-  _blend_constant_color,
+  (blend_constant_color),
 
   _depthbuffer,
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH v2 04/12] i965: Move color calc code around a bit.

2017-06-21 Thread Rafael Antognolli
This makes the code more consistent accross generations.

Signed-off-by: Rafael Antognolli 
Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_cc.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cc.c 
b/src/mesa/drivers/dri/i965/brw_cc.c
index 339bff5..cdaa696 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -123,6 +123,14 @@ static void upload_cc_unit(struct brw_context *brw)
   GLenum srcA = ctx->Color.Blend[0].SrcA;
   GLenum dstA = ctx->Color.Blend[0].DstA;
 
+  if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+srcRGB = dstRGB = GL_ONE;
+  }
+
+  if (eqA == GL_MIN || eqA == GL_MAX) {
+srcA = dstA = GL_ONE;
+  }
+
   /* If the renderbuffer is XRGB, we have to frob the blend function to
* force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
* with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
@@ -136,14 +144,6 @@ static void upload_cc_unit(struct brw_context *brw)
 dstA   = brw_fix_xRGB_alpha(dstA);
   }
 
-  if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
-srcRGB = dstRGB = GL_ONE;
-  }
-
-  if (eqA == GL_MIN || eqA == GL_MAX) {
-srcA = dstA = GL_ONE;
-  }
-
   cc->cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
   cc->cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
   cc->cc6.blend_function = brw_translate_blend_equation(eqRGB);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Remove spurious mutex frobbing around call to intel_miptree_blit

2017-06-21 Thread Ian Romanick
On 06/20/2017 01:33 PM, Kristian Høgsberg wrote:
> On Mon, Jun 19, 2017 at 2:33 PM, Ian Romanick  wrote:
>> From: Ian Romanick 
>>
>> These locks were added in 2f28a0dc, but I don't see anything in the
>> intel_miptree_blit path that should make this necessary.
> 
> I doubt it's needed now with the new blorp. If I remember correctly, I
> had to drop the lock there since intel_miptree_blit() could hit the XY
> blit path that requires a fast clear resolve. The fast resolve being
> meta, would then try to lock the texture again.

I figured it was something like that.  If I add that commentary to the
commit message, can I call that a Reviewed-by?

> Kristian
> 
>> Signed-off-by: Ian Romanick 
>> Cc: Kristian Høgsberg 
>> ---
>>  src/mesa/drivers/dri/i965/intel_tex_copy.c | 19 ++-
>>  1 file changed, 6 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c 
>> b/src/mesa/drivers/dri/i965/intel_tex_copy.c
>> index 9c255ae..e0d5cad 100644
>> --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
>> +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
>> @@ -51,7 +51,6 @@ intel_copy_texsubimage(struct brw_context *brw,
>> GLint x, GLint y, GLsizei width, GLsizei height)
>>  {
>> const GLenum internalFormat = intelImage->base.Base.InternalFormat;
>> -   bool ret;
>>
>> /* No pixel transfer operations (zoom, bias, mapping), just a blit */
>> if (brw->ctx._ImageTransferState)
>> @@ -83,19 +82,13 @@ intel_copy_texsubimage(struct brw_context *brw,
>> int dst_slice = slice + intelImage->base.Base.Face +
>> intelImage->base.Base.TexObject->MinLayer;
>>
>> -   _mesa_unlock_texture(>ctx, intelImage->base.Base.TexObject);
>> -
>> /* blit from src buffer to texture */
>> -   ret = intel_miptree_blit(brw,
>> -irb->mt, irb->mt_level, irb->mt_layer,
>> -x, y, irb->Base.Base.Name == 0,
>> -intelImage->mt, dst_level, dst_slice,
>> -dstx, dsty, false,
>> -width, height, GL_COPY);
>> -
>> -   _mesa_lock_texture(>ctx, intelImage->base.Base.TexObject);
>> -
>> -   return ret;
>> +   return intel_miptree_blit(brw,
>> + irb->mt, irb->mt_level, irb->mt_layer,
>> + x, y, irb->Base.Base.Name == 0,
>> + intelImage->mt, dst_level, dst_slice,
>> + dstx, dsty, false,
>> + width, height, GL_COPY);
>>  }
>>
>>
>> --
>> 2.9.4
>>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] ac/nir: Make intrinsic_name buffer long enough

2017-06-21 Thread Grazvydas Ignotas
On Wed, Jun 21, 2017 at 6:59 PM, James Legg  wrote:
> When using cmpswap on an image, it was being trunctated to
> lvm.amdgcn.image.atomic.cmpswa, with the coords type missing entirely.

You could add an assert to catch this easier in future, like

MAYBE_UNUSED int ret;
...
ret = snprintf(intrinsic_name, ...
assert(ret < sizeof(intrinsic_name));

In either case
Reviewed-by: Grazvydas Ignotas 

> ---
>  src/amd/common/ac_nir_to_llvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 49117d2..3a26668 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3366,7 +3366,7 @@ static LLVMValueRef visit_image_atomic(struct 
> nir_to_llvm_context *ctx,
> const char *base_name = "llvm.amdgcn.image.atomic";
> const char *atomic_name;
> LLVMValueRef coords;
> -   char intrinsic_name[32], coords_type[8];
> +   char intrinsic_name[41], coords_type[8];
> const struct glsl_type *type = glsl_without_array(var->type);
>
> if (ctx->stage == MESA_SHADER_FRAGMENT)
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: FORMAT_FEATURE_TRANSFER_SRC/DST_BIT_KHR not used with VkFormatProperties.bufferFeatures

2017-06-21 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Wed, Jun 14, 2017 at 9:55 AM, Andres Gomez  wrote:

> VK_FORMAT_FEATURE_TRANSFER_[SRC|DST]_BIT_KHR is a flag value of the
> VkFormatFeatureFlagBits enum that can only be hold and checked against
> the linearTilingFeatures or optimalTilingFeatures members of the
> VkFormatProperties struct but not the bufferFeatures member.
>
> From the Vulkan® 1.0.51, with the VK_KHR_maintenance1 extension,
> section 32.3.2 docs for VkFormatProperties:
>
>"* linearTilingFeatures is a bitmask of VkFormatFeatureFlagBits
>   specifying features supported by images created with a tiling
>   parameter of VK_IMAGE_TILING_LINEAR.
>
> * optimalTilingFeatures is a bitmask of VkFormatFeatureFlagBits
>   specifying features supported by images created with a tiling
>   parameter of VK_IMAGE_TILING_OPTIMAL.
>
> * bufferFeatures is a bitmask of VkFormatFeatureFlagBits
>   specifying features supported by buffers."
>
> ...
>
> Bits which can be set in the VkFormatProperties features
> linearTilingFeatures, optimalTilingFeatures, and bufferFeatures
> are:
>
> typedef enum VkFormatFeatureFlagBits {
>
> ...
>
>   VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x4000,
>   VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x8000,
>
> ...
>
> } VkFormatFeatureFlagBits;
>
> ...
>
> The following bits may be set in linearTilingFeatures and
> optimalTilingFeatures, specifying that the features are supported
> by images or image views created with the queried
> vkGetPhysicalDeviceFormatProperties::format:
>
> ...
>
> * VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR specifies that an image
>   can be used as a source image for copy commands.
>
> * VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR specifies that an image
>   can be used as a destination image for copy commands and clear
>   commands."
>
> Cc: Jason Ekstrand 
> Cc: Iago Toral Quiroga 
> Cc: Lionel Landwerlin 
> Signed-off-by: Andres Gomez 
> ---
>  src/intel/vulkan/anv_formats.c | 5 -
>  1 file changed, 5 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_
> formats.c
> index 104d4f7a5fe..0bc81d12dab 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -378,11 +378,6 @@ get_buffer_format_properties(const struct
> gen_device_info *devinfo,
> if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT)
>flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
>
> -   if (flags) {
> -  flags |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
> -   VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
> -   }
> -
> return flags;
>  }
>
> --
> 2.11.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >